1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyEndpgm, 178 ImmTyHigh 179 }; 180 181 struct TokOp { 182 const char *Data; 183 unsigned Length; 184 }; 185 186 struct ImmOp { 187 int64_t Val; 188 ImmTy Type; 189 bool IsFPImm; 190 Modifiers Mods; 191 }; 192 193 struct RegOp { 194 unsigned RegNo; 195 bool IsForcedVOP3; 196 Modifiers Mods; 197 }; 198 199 union { 200 TokOp Tok; 201 ImmOp Imm; 202 RegOp Reg; 203 const MCExpr *Expr; 204 }; 205 206 bool isToken() const override { 207 if (Kind == Token) 208 return true; 209 210 if (Kind != Expression || !Expr) 211 return false; 212 213 // When parsing operands, we can't always tell if something was meant to be 214 // a token, like 'gds', or an expression that references a global variable. 215 // In this case, we assume the string is an expression, and if we need to 216 // interpret is a token, then we treat the symbol name as the token. 217 return isa<MCSymbolRefExpr>(Expr); 218 } 219 220 bool isImm() const override { 221 return Kind == Immediate; 222 } 223 224 bool isInlinableImm(MVT type) const; 225 bool isLiteralImm(MVT type) const; 226 227 bool isRegKind() const { 228 return Kind == Register; 229 } 230 231 bool isReg() const override { 232 return isRegKind() && !hasModifiers(); 233 } 234 235 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 236 return isRegClass(RCID) || isInlinableImm(type); 237 } 238 239 bool isRegOrImmWithInt16InputMods() const { 240 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 241 } 242 243 bool isRegOrImmWithInt32InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 245 } 246 247 bool isRegOrImmWithInt64InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 249 } 250 251 bool isRegOrImmWithFP16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 253 } 254 255 bool isRegOrImmWithFP32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 257 } 258 259 bool isRegOrImmWithFP64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 261 } 262 263 bool isVReg() const { 264 return isRegClass(AMDGPU::VGPR_32RegClassID) || 265 isRegClass(AMDGPU::VReg_64RegClassID) || 266 isRegClass(AMDGPU::VReg_96RegClassID) || 267 isRegClass(AMDGPU::VReg_128RegClassID) || 268 isRegClass(AMDGPU::VReg_256RegClassID) || 269 isRegClass(AMDGPU::VReg_512RegClassID); 270 } 271 272 bool isVReg32() const { 273 return isRegClass(AMDGPU::VGPR_32RegClassID); 274 } 275 276 bool isVReg32OrOff() const { 277 return isOff() || isVReg32(); 278 } 279 280 bool isSDWAOperand(MVT type) const; 281 bool isSDWAFP16Operand() const; 282 bool isSDWAFP32Operand() const; 283 bool isSDWAInt16Operand() const; 284 bool isSDWAInt32Operand() const; 285 286 bool isImmTy(ImmTy ImmT) const { 287 return isImm() && Imm.Type == ImmT; 288 } 289 290 bool isImmModifier() const { 291 return isImm() && Imm.Type != ImmTyNone; 292 } 293 294 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 295 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 296 bool isDMask() const { return isImmTy(ImmTyDMask); } 297 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 298 bool isDA() const { return isImmTy(ImmTyDA); } 299 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 300 bool isLWE() const { return isImmTy(ImmTyLWE); } 301 bool isOff() const { return isImmTy(ImmTyOff); } 302 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 303 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 304 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 305 bool isOffen() const { return isImmTy(ImmTyOffen); } 306 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 307 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 308 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 309 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 310 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 311 312 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 313 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 314 bool isGDS() const { return isImmTy(ImmTyGDS); } 315 bool isLDS() const { return isImmTy(ImmTyLDS); } 316 bool isGLC() const { return isImmTy(ImmTyGLC); } 317 bool isSLC() const { return isImmTy(ImmTySLC); } 318 bool isTFE() const { return isImmTy(ImmTyTFE); } 319 bool isD16() const { return isImmTy(ImmTyD16); } 320 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 321 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 322 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 323 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 324 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 325 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 326 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 327 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 328 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 329 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 330 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 331 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 332 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 333 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 334 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 335 bool isHigh() const { return isImmTy(ImmTyHigh); } 336 337 bool isMod() const { 338 return isClampSI() || isOModSI(); 339 } 340 341 bool isRegOrImm() const { 342 return isReg() || isImm(); 343 } 344 345 bool isRegClass(unsigned RCID) const; 346 347 bool isInlineValue() const; 348 349 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 350 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 351 } 352 353 bool isSCSrcB16() const { 354 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 355 } 356 357 bool isSCSrcV2B16() const { 358 return isSCSrcB16(); 359 } 360 361 bool isSCSrcB32() const { 362 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 363 } 364 365 bool isSCSrcB64() const { 366 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 367 } 368 369 bool isSCSrcF16() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 371 } 372 373 bool isSCSrcV2F16() const { 374 return isSCSrcF16(); 375 } 376 377 bool isSCSrcF32() const { 378 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 379 } 380 381 bool isSCSrcF64() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 383 } 384 385 bool isSSrcB32() const { 386 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 387 } 388 389 bool isSSrcB16() const { 390 return isSCSrcB16() || isLiteralImm(MVT::i16); 391 } 392 393 bool isSSrcV2B16() const { 394 llvm_unreachable("cannot happen"); 395 return isSSrcB16(); 396 } 397 398 bool isSSrcB64() const { 399 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 400 // See isVSrc64(). 401 return isSCSrcB64() || isLiteralImm(MVT::i64); 402 } 403 404 bool isSSrcF32() const { 405 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 406 } 407 408 bool isSSrcF64() const { 409 return isSCSrcB64() || isLiteralImm(MVT::f64); 410 } 411 412 bool isSSrcF16() const { 413 return isSCSrcB16() || isLiteralImm(MVT::f16); 414 } 415 416 bool isSSrcV2F16() const { 417 llvm_unreachable("cannot happen"); 418 return isSSrcF16(); 419 } 420 421 bool isSSrcOrLdsB32() const { 422 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 423 isLiteralImm(MVT::i32) || isExpr(); 424 } 425 426 bool isVCSrcB32() const { 427 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 428 } 429 430 bool isVCSrcB64() const { 431 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 432 } 433 434 bool isVCSrcB16() const { 435 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 436 } 437 438 bool isVCSrcV2B16() const { 439 return isVCSrcB16(); 440 } 441 442 bool isVCSrcF32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 444 } 445 446 bool isVCSrcF64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 448 } 449 450 bool isVCSrcF16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 452 } 453 454 bool isVCSrcV2F16() const { 455 return isVCSrcF16(); 456 } 457 458 bool isVSrcB32() const { 459 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 460 } 461 462 bool isVSrcB64() const { 463 return isVCSrcF64() || isLiteralImm(MVT::i64); 464 } 465 466 bool isVSrcB16() const { 467 return isVCSrcF16() || isLiteralImm(MVT::i16); 468 } 469 470 bool isVSrcV2B16() const { 471 llvm_unreachable("cannot happen"); 472 return isVSrcB16(); 473 } 474 475 bool isVSrcF32() const { 476 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 477 } 478 479 bool isVSrcF64() const { 480 return isVCSrcF64() || isLiteralImm(MVT::f64); 481 } 482 483 bool isVSrcF16() const { 484 return isVCSrcF16() || isLiteralImm(MVT::f16); 485 } 486 487 bool isVSrcV2F16() const { 488 llvm_unreachable("cannot happen"); 489 return isVSrcF16(); 490 } 491 492 bool isKImmFP32() const { 493 return isLiteralImm(MVT::f32); 494 } 495 496 bool isKImmFP16() const { 497 return isLiteralImm(MVT::f16); 498 } 499 500 bool isMem() const override { 501 return false; 502 } 503 504 bool isExpr() const { 505 return Kind == Expression; 506 } 507 508 bool isSoppBrTarget() const { 509 return isExpr() || isImm(); 510 } 511 512 bool isSWaitCnt() const; 513 bool isHwreg() const; 514 bool isSendMsg() const; 515 bool isSwizzle() const; 516 bool isSMRDOffset8() const; 517 bool isSMRDOffset20() const; 518 bool isSMRDLiteralOffset() const; 519 bool isDPPCtrl() const; 520 bool isGPRIdxMode() const; 521 bool isS16Imm() const; 522 bool isU16Imm() const; 523 bool isEndpgm() const; 524 525 StringRef getExpressionAsToken() const { 526 assert(isExpr()); 527 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 528 return S->getSymbol().getName(); 529 } 530 531 StringRef getToken() const { 532 assert(isToken()); 533 534 if (Kind == Expression) 535 return getExpressionAsToken(); 536 537 return StringRef(Tok.Data, Tok.Length); 538 } 539 540 int64_t getImm() const { 541 assert(isImm()); 542 return Imm.Val; 543 } 544 545 ImmTy getImmTy() const { 546 assert(isImm()); 547 return Imm.Type; 548 } 549 550 unsigned getReg() const override { 551 return Reg.RegNo; 552 } 553 554 SMLoc getStartLoc() const override { 555 return StartLoc; 556 } 557 558 SMLoc getEndLoc() const override { 559 return EndLoc; 560 } 561 562 SMRange getLocRange() const { 563 return SMRange(StartLoc, EndLoc); 564 } 565 566 Modifiers getModifiers() const { 567 assert(isRegKind() || isImmTy(ImmTyNone)); 568 return isRegKind() ? Reg.Mods : Imm.Mods; 569 } 570 571 void setModifiers(Modifiers Mods) { 572 assert(isRegKind() || isImmTy(ImmTyNone)); 573 if (isRegKind()) 574 Reg.Mods = Mods; 575 else 576 Imm.Mods = Mods; 577 } 578 579 bool hasModifiers() const { 580 return getModifiers().hasModifiers(); 581 } 582 583 bool hasFPModifiers() const { 584 return getModifiers().hasFPModifiers(); 585 } 586 587 bool hasIntModifiers() const { 588 return getModifiers().hasIntModifiers(); 589 } 590 591 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 592 593 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 594 595 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 596 597 template <unsigned Bitwidth> 598 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 599 600 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 601 addKImmFPOperands<16>(Inst, N); 602 } 603 604 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 605 addKImmFPOperands<32>(Inst, N); 606 } 607 608 void addRegOperands(MCInst &Inst, unsigned N) const; 609 610 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 611 if (isRegKind()) 612 addRegOperands(Inst, N); 613 else if (isExpr()) 614 Inst.addOperand(MCOperand::createExpr(Expr)); 615 else 616 addImmOperands(Inst, N); 617 } 618 619 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 620 Modifiers Mods = getModifiers(); 621 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 622 if (isRegKind()) { 623 addRegOperands(Inst, N); 624 } else { 625 addImmOperands(Inst, N, false); 626 } 627 } 628 629 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 630 assert(!hasIntModifiers()); 631 addRegOrImmWithInputModsOperands(Inst, N); 632 } 633 634 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasFPModifiers()); 636 addRegOrImmWithInputModsOperands(Inst, N); 637 } 638 639 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 640 Modifiers Mods = getModifiers(); 641 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 642 assert(isRegKind()); 643 addRegOperands(Inst, N); 644 } 645 646 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 647 assert(!hasIntModifiers()); 648 addRegWithInputModsOperands(Inst, N); 649 } 650 651 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 652 assert(!hasFPModifiers()); 653 addRegWithInputModsOperands(Inst, N); 654 } 655 656 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 657 if (isImm()) 658 addImmOperands(Inst, N); 659 else { 660 assert(isExpr()); 661 Inst.addOperand(MCOperand::createExpr(Expr)); 662 } 663 } 664 665 static void printImmTy(raw_ostream& OS, ImmTy Type) { 666 switch (Type) { 667 case ImmTyNone: OS << "None"; break; 668 case ImmTyGDS: OS << "GDS"; break; 669 case ImmTyLDS: OS << "LDS"; break; 670 case ImmTyOffen: OS << "Offen"; break; 671 case ImmTyIdxen: OS << "Idxen"; break; 672 case ImmTyAddr64: OS << "Addr64"; break; 673 case ImmTyOffset: OS << "Offset"; break; 674 case ImmTyInstOffset: OS << "InstOffset"; break; 675 case ImmTyOffset0: OS << "Offset0"; break; 676 case ImmTyOffset1: OS << "Offset1"; break; 677 case ImmTyGLC: OS << "GLC"; break; 678 case ImmTySLC: OS << "SLC"; break; 679 case ImmTyTFE: OS << "TFE"; break; 680 case ImmTyD16: OS << "D16"; break; 681 case ImmTyFORMAT: OS << "FORMAT"; break; 682 case ImmTyClampSI: OS << "ClampSI"; break; 683 case ImmTyOModSI: OS << "OModSI"; break; 684 case ImmTyDppCtrl: OS << "DppCtrl"; break; 685 case ImmTyDppRowMask: OS << "DppRowMask"; break; 686 case ImmTyDppBankMask: OS << "DppBankMask"; break; 687 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 688 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 689 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 690 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 691 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 692 case ImmTyDMask: OS << "DMask"; break; 693 case ImmTyUNorm: OS << "UNorm"; break; 694 case ImmTyDA: OS << "DA"; break; 695 case ImmTyR128A16: OS << "R128A16"; break; 696 case ImmTyLWE: OS << "LWE"; break; 697 case ImmTyOff: OS << "Off"; break; 698 case ImmTyExpTgt: OS << "ExpTgt"; break; 699 case ImmTyExpCompr: OS << "ExpCompr"; break; 700 case ImmTyExpVM: OS << "ExpVM"; break; 701 case ImmTyHwreg: OS << "Hwreg"; break; 702 case ImmTySendMsg: OS << "SendMsg"; break; 703 case ImmTyInterpSlot: OS << "InterpSlot"; break; 704 case ImmTyInterpAttr: OS << "InterpAttr"; break; 705 case ImmTyAttrChan: OS << "AttrChan"; break; 706 case ImmTyOpSel: OS << "OpSel"; break; 707 case ImmTyOpSelHi: OS << "OpSelHi"; break; 708 case ImmTyNegLo: OS << "NegLo"; break; 709 case ImmTyNegHi: OS << "NegHi"; break; 710 case ImmTySwizzle: OS << "Swizzle"; break; 711 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 712 case ImmTyHigh: OS << "High"; break; 713 case ImmTyEndpgm: 714 OS << "Endpgm"; 715 break; 716 } 717 } 718 719 void print(raw_ostream &OS) const override { 720 switch (Kind) { 721 case Register: 722 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 723 break; 724 case Immediate: 725 OS << '<' << getImm(); 726 if (getImmTy() != ImmTyNone) { 727 OS << " type: "; printImmTy(OS, getImmTy()); 728 } 729 OS << " mods: " << Imm.Mods << '>'; 730 break; 731 case Token: 732 OS << '\'' << getToken() << '\''; 733 break; 734 case Expression: 735 OS << "<expr " << *Expr << '>'; 736 break; 737 } 738 } 739 740 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 741 int64_t Val, SMLoc Loc, 742 ImmTy Type = ImmTyNone, 743 bool IsFPImm = false) { 744 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 745 Op->Imm.Val = Val; 746 Op->Imm.IsFPImm = IsFPImm; 747 Op->Imm.Type = Type; 748 Op->Imm.Mods = Modifiers(); 749 Op->StartLoc = Loc; 750 Op->EndLoc = Loc; 751 return Op; 752 } 753 754 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 755 StringRef Str, SMLoc Loc, 756 bool HasExplicitEncodingSize = true) { 757 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 758 Res->Tok.Data = Str.data(); 759 Res->Tok.Length = Str.size(); 760 Res->StartLoc = Loc; 761 Res->EndLoc = Loc; 762 return Res; 763 } 764 765 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 766 unsigned RegNo, SMLoc S, 767 SMLoc E, 768 bool ForceVOP3) { 769 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 770 Op->Reg.RegNo = RegNo; 771 Op->Reg.Mods = Modifiers(); 772 Op->Reg.IsForcedVOP3 = ForceVOP3; 773 Op->StartLoc = S; 774 Op->EndLoc = E; 775 return Op; 776 } 777 778 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 779 const class MCExpr *Expr, SMLoc S) { 780 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 781 Op->Expr = Expr; 782 Op->StartLoc = S; 783 Op->EndLoc = S; 784 return Op; 785 } 786 }; 787 788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 789 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 790 return OS; 791 } 792 793 //===----------------------------------------------------------------------===// 794 // AsmParser 795 //===----------------------------------------------------------------------===// 796 797 // Holds info related to the current kernel, e.g. count of SGPRs used. 798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 799 // .amdgpu_hsa_kernel or at EOF. 800 class KernelScopeInfo { 801 int SgprIndexUnusedMin = -1; 802 int VgprIndexUnusedMin = -1; 803 MCContext *Ctx = nullptr; 804 805 void usesSgprAt(int i) { 806 if (i >= SgprIndexUnusedMin) { 807 SgprIndexUnusedMin = ++i; 808 if (Ctx) { 809 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 810 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 811 } 812 } 813 } 814 815 void usesVgprAt(int i) { 816 if (i >= VgprIndexUnusedMin) { 817 VgprIndexUnusedMin = ++i; 818 if (Ctx) { 819 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 820 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 821 } 822 } 823 } 824 825 public: 826 KernelScopeInfo() = default; 827 828 void initialize(MCContext &Context) { 829 Ctx = &Context; 830 usesSgprAt(SgprIndexUnusedMin = -1); 831 usesVgprAt(VgprIndexUnusedMin = -1); 832 } 833 834 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 835 switch (RegKind) { 836 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 837 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 838 default: break; 839 } 840 } 841 }; 842 843 class AMDGPUAsmParser : public MCTargetAsmParser { 844 MCAsmParser &Parser; 845 846 // Number of extra operands parsed after the first optional operand. 847 // This may be necessary to skip hardcoded mandatory operands. 848 static const unsigned MAX_OPR_LOOKAHEAD = 8; 849 850 unsigned ForcedEncodingSize = 0; 851 bool ForcedDPP = false; 852 bool ForcedSDWA = false; 853 KernelScopeInfo KernelScope; 854 855 /// @name Auto-generated Match Functions 856 /// { 857 858 #define GET_ASSEMBLER_HEADER 859 #include "AMDGPUGenAsmMatcher.inc" 860 861 /// } 862 863 private: 864 bool ParseAsAbsoluteExpression(uint32_t &Ret); 865 bool OutOfRangeError(SMRange Range); 866 /// Calculate VGPR/SGPR blocks required for given target, reserved 867 /// registers, and user-specified NextFreeXGPR values. 868 /// 869 /// \param Features [in] Target features, used for bug corrections. 870 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 871 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 872 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 873 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 874 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 875 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 876 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 877 /// \param VGPRBlocks [out] Result VGPR block count. 878 /// \param SGPRBlocks [out] Result SGPR block count. 879 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 880 bool FlatScrUsed, bool XNACKUsed, 881 unsigned NextFreeVGPR, SMRange VGPRRange, 882 unsigned NextFreeSGPR, SMRange SGPRRange, 883 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 884 bool ParseDirectiveAMDGCNTarget(); 885 bool ParseDirectiveAMDHSAKernel(); 886 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 887 bool ParseDirectiveHSACodeObjectVersion(); 888 bool ParseDirectiveHSACodeObjectISA(); 889 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 890 bool ParseDirectiveAMDKernelCodeT(); 891 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 892 bool ParseDirectiveAMDGPUHsaKernel(); 893 894 bool ParseDirectiveISAVersion(); 895 bool ParseDirectiveHSAMetadata(); 896 bool ParseDirectivePALMetadataBegin(); 897 bool ParseDirectivePALMetadata(); 898 899 /// Common code to parse out a block of text (typically YAML) between start and 900 /// end directives. 901 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 902 const char *AssemblerDirectiveEnd, 903 std::string &CollectString); 904 905 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 906 RegisterKind RegKind, unsigned Reg1, 907 unsigned RegNum); 908 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 909 unsigned& RegNum, unsigned& RegWidth, 910 unsigned *DwordRegIndex); 911 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 912 void initializeGprCountSymbol(RegisterKind RegKind); 913 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 914 unsigned RegWidth); 915 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 916 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 917 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 918 bool IsGdsHardcoded); 919 920 public: 921 enum AMDGPUMatchResultTy { 922 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 923 }; 924 925 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 926 927 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 928 const MCInstrInfo &MII, 929 const MCTargetOptions &Options) 930 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 931 MCAsmParserExtension::Initialize(Parser); 932 933 if (getFeatureBits().none()) { 934 // Set default features. 935 copySTI().ToggleFeature("southern-islands"); 936 } 937 938 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 939 940 { 941 // TODO: make those pre-defined variables read-only. 942 // Currently there is none suitable machinery in the core llvm-mc for this. 943 // MCSymbol::isRedefinable is intended for another purpose, and 944 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 945 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 946 MCContext &Ctx = getContext(); 947 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 948 MCSymbol *Sym = 949 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 950 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 951 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 952 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 953 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 954 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 955 } else { 956 MCSymbol *Sym = 957 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 958 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 959 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 960 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 961 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 962 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 963 } 964 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 965 initializeGprCountSymbol(IS_VGPR); 966 initializeGprCountSymbol(IS_SGPR); 967 } else 968 KernelScope.initialize(getContext()); 969 } 970 } 971 972 bool hasXNACK() const { 973 return AMDGPU::hasXNACK(getSTI()); 974 } 975 976 bool hasMIMG_R128() const { 977 return AMDGPU::hasMIMG_R128(getSTI()); 978 } 979 980 bool hasPackedD16() const { 981 return AMDGPU::hasPackedD16(getSTI()); 982 } 983 984 bool isSI() const { 985 return AMDGPU::isSI(getSTI()); 986 } 987 988 bool isCI() const { 989 return AMDGPU::isCI(getSTI()); 990 } 991 992 bool isVI() const { 993 return AMDGPU::isVI(getSTI()); 994 } 995 996 bool isGFX9() const { 997 return AMDGPU::isGFX9(getSTI()); 998 } 999 1000 bool hasInv2PiInlineImm() const { 1001 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1002 } 1003 1004 bool hasFlatOffsets() const { 1005 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1006 } 1007 1008 bool hasSGPR102_SGPR103() const { 1009 return !isVI(); 1010 } 1011 1012 bool hasIntClamp() const { 1013 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1014 } 1015 1016 AMDGPUTargetStreamer &getTargetStreamer() { 1017 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1018 return static_cast<AMDGPUTargetStreamer &>(TS); 1019 } 1020 1021 const MCRegisterInfo *getMRI() const { 1022 // We need this const_cast because for some reason getContext() is not const 1023 // in MCAsmParser. 1024 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1025 } 1026 1027 const MCInstrInfo *getMII() const { 1028 return &MII; 1029 } 1030 1031 const FeatureBitset &getFeatureBits() const { 1032 return getSTI().getFeatureBits(); 1033 } 1034 1035 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1036 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1037 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1038 1039 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1040 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1041 bool isForcedDPP() const { return ForcedDPP; } 1042 bool isForcedSDWA() const { return ForcedSDWA; } 1043 ArrayRef<unsigned> getMatchedVariants() const; 1044 1045 std::unique_ptr<AMDGPUOperand> parseRegister(); 1046 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1047 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1048 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1049 unsigned Kind) override; 1050 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1051 OperandVector &Operands, MCStreamer &Out, 1052 uint64_t &ErrorInfo, 1053 bool MatchingInlineAsm) override; 1054 bool ParseDirective(AsmToken DirectiveID) override; 1055 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1056 StringRef parseMnemonicSuffix(StringRef Name); 1057 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1058 SMLoc NameLoc, OperandVector &Operands) override; 1059 //bool ProcessInstruction(MCInst &Inst); 1060 1061 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1062 1063 OperandMatchResultTy 1064 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1065 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1066 bool (*ConvertResult)(int64_t &) = nullptr); 1067 1068 OperandMatchResultTy parseOperandArrayWithPrefix( 1069 const char *Prefix, 1070 OperandVector &Operands, 1071 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1072 bool (*ConvertResult)(int64_t&) = nullptr); 1073 1074 OperandMatchResultTy 1075 parseNamedBit(const char *Name, OperandVector &Operands, 1076 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1077 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1078 StringRef &Value); 1079 1080 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1081 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1082 OperandMatchResultTy parseReg(OperandVector &Operands); 1083 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1084 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1085 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1086 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1087 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1088 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1089 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1090 1091 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1092 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1093 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1094 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1095 1096 bool parseCnt(int64_t &IntVal); 1097 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1098 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1099 1100 private: 1101 struct OperandInfoTy { 1102 int64_t Id; 1103 bool IsSymbolic = false; 1104 1105 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1106 }; 1107 1108 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1109 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1110 1111 void errorExpTgt(); 1112 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1113 1114 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1115 bool validateSOPLiteral(const MCInst &Inst) const; 1116 bool validateConstantBusLimitations(const MCInst &Inst); 1117 bool validateEarlyClobberLimitations(const MCInst &Inst); 1118 bool validateIntClampSupported(const MCInst &Inst); 1119 bool validateMIMGAtomicDMask(const MCInst &Inst); 1120 bool validateMIMGGatherDMask(const MCInst &Inst); 1121 bool validateMIMGDataSize(const MCInst &Inst); 1122 bool validateMIMGD16(const MCInst &Inst); 1123 bool validateLdsDirect(const MCInst &Inst); 1124 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1125 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1126 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1127 1128 bool trySkipId(const StringRef Id); 1129 bool trySkipToken(const AsmToken::TokenKind Kind); 1130 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1131 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1132 bool parseExpr(int64_t &Imm); 1133 1134 public: 1135 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1136 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1137 1138 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1139 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1140 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1141 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1142 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1143 1144 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1145 const unsigned MinVal, 1146 const unsigned MaxVal, 1147 const StringRef ErrMsg); 1148 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1149 bool parseSwizzleOffset(int64_t &Imm); 1150 bool parseSwizzleMacro(int64_t &Imm); 1151 bool parseSwizzleQuadPerm(int64_t &Imm); 1152 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1153 bool parseSwizzleBroadcast(int64_t &Imm); 1154 bool parseSwizzleSwap(int64_t &Imm); 1155 bool parseSwizzleReverse(int64_t &Imm); 1156 1157 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1158 int64_t parseGPRIdxMacro(); 1159 1160 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1161 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1162 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1163 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1164 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1165 1166 AMDGPUOperand::Ptr defaultGLC() const; 1167 AMDGPUOperand::Ptr defaultSLC() const; 1168 1169 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1170 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1171 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1172 AMDGPUOperand::Ptr defaultOffsetU12() const; 1173 AMDGPUOperand::Ptr defaultOffsetS13() const; 1174 1175 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1176 1177 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1178 OptionalImmIndexMap &OptionalIdx); 1179 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1180 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1181 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1182 1183 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1184 1185 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1186 bool IsAtomic = false); 1187 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1188 1189 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1190 AMDGPUOperand::Ptr defaultRowMask() const; 1191 AMDGPUOperand::Ptr defaultBankMask() const; 1192 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1193 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1194 1195 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1196 AMDGPUOperand::ImmTy Type); 1197 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1198 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1199 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1200 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1201 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1202 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1203 uint64_t BasicInstType, bool skipVcc = false); 1204 1205 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1206 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1207 }; 1208 1209 struct OptionalOperand { 1210 const char *Name; 1211 AMDGPUOperand::ImmTy Type; 1212 bool IsBit; 1213 bool (*ConvertResult)(int64_t&); 1214 }; 1215 1216 } // end anonymous namespace 1217 1218 // May be called with integer type with equivalent bitwidth. 1219 static const fltSemantics *getFltSemantics(unsigned Size) { 1220 switch (Size) { 1221 case 4: 1222 return &APFloat::IEEEsingle(); 1223 case 8: 1224 return &APFloat::IEEEdouble(); 1225 case 2: 1226 return &APFloat::IEEEhalf(); 1227 default: 1228 llvm_unreachable("unsupported fp type"); 1229 } 1230 } 1231 1232 static const fltSemantics *getFltSemantics(MVT VT) { 1233 return getFltSemantics(VT.getSizeInBits() / 8); 1234 } 1235 1236 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1237 switch (OperandType) { 1238 case AMDGPU::OPERAND_REG_IMM_INT32: 1239 case AMDGPU::OPERAND_REG_IMM_FP32: 1240 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1241 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1242 return &APFloat::IEEEsingle(); 1243 case AMDGPU::OPERAND_REG_IMM_INT64: 1244 case AMDGPU::OPERAND_REG_IMM_FP64: 1245 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1246 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1247 return &APFloat::IEEEdouble(); 1248 case AMDGPU::OPERAND_REG_IMM_INT16: 1249 case AMDGPU::OPERAND_REG_IMM_FP16: 1250 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1251 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1252 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1253 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1254 return &APFloat::IEEEhalf(); 1255 default: 1256 llvm_unreachable("unsupported fp type"); 1257 } 1258 } 1259 1260 //===----------------------------------------------------------------------===// 1261 // Operand 1262 //===----------------------------------------------------------------------===// 1263 1264 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1265 bool Lost; 1266 1267 // Convert literal to single precision 1268 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1269 APFloat::rmNearestTiesToEven, 1270 &Lost); 1271 // We allow precision lost but not overflow or underflow 1272 if (Status != APFloat::opOK && 1273 Lost && 1274 ((Status & APFloat::opOverflow) != 0 || 1275 (Status & APFloat::opUnderflow) != 0)) { 1276 return false; 1277 } 1278 1279 return true; 1280 } 1281 1282 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1283 return isUIntN(Size, Val) || isIntN(Size, Val); 1284 } 1285 1286 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1287 1288 // This is a hack to enable named inline values like 1289 // shared_base with both 32-bit and 64-bit operands. 1290 // Note that these values are defined as 1291 // 32-bit operands only. 1292 if (isInlineValue()) { 1293 return true; 1294 } 1295 1296 if (!isImmTy(ImmTyNone)) { 1297 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1298 return false; 1299 } 1300 // TODO: We should avoid using host float here. It would be better to 1301 // check the float bit values which is what a few other places do. 1302 // We've had bot failures before due to weird NaN support on mips hosts. 1303 1304 APInt Literal(64, Imm.Val); 1305 1306 if (Imm.IsFPImm) { // We got fp literal token 1307 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1308 return AMDGPU::isInlinableLiteral64(Imm.Val, 1309 AsmParser->hasInv2PiInlineImm()); 1310 } 1311 1312 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1313 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1314 return false; 1315 1316 if (type.getScalarSizeInBits() == 16) { 1317 return AMDGPU::isInlinableLiteral16( 1318 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1319 AsmParser->hasInv2PiInlineImm()); 1320 } 1321 1322 // Check if single precision literal is inlinable 1323 return AMDGPU::isInlinableLiteral32( 1324 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1325 AsmParser->hasInv2PiInlineImm()); 1326 } 1327 1328 // We got int literal token. 1329 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1330 return AMDGPU::isInlinableLiteral64(Imm.Val, 1331 AsmParser->hasInv2PiInlineImm()); 1332 } 1333 1334 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1335 return false; 1336 } 1337 1338 if (type.getScalarSizeInBits() == 16) { 1339 return AMDGPU::isInlinableLiteral16( 1340 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1341 AsmParser->hasInv2PiInlineImm()); 1342 } 1343 1344 return AMDGPU::isInlinableLiteral32( 1345 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1346 AsmParser->hasInv2PiInlineImm()); 1347 } 1348 1349 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1350 // Check that this immediate can be added as literal 1351 if (!isImmTy(ImmTyNone)) { 1352 return false; 1353 } 1354 1355 if (!Imm.IsFPImm) { 1356 // We got int literal token. 1357 1358 if (type == MVT::f64 && hasFPModifiers()) { 1359 // Cannot apply fp modifiers to int literals preserving the same semantics 1360 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1361 // disable these cases. 1362 return false; 1363 } 1364 1365 unsigned Size = type.getSizeInBits(); 1366 if (Size == 64) 1367 Size = 32; 1368 1369 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1370 // types. 1371 return isSafeTruncation(Imm.Val, Size); 1372 } 1373 1374 // We got fp literal token 1375 if (type == MVT::f64) { // Expected 64-bit fp operand 1376 // We would set low 64-bits of literal to zeroes but we accept this literals 1377 return true; 1378 } 1379 1380 if (type == MVT::i64) { // Expected 64-bit int operand 1381 // We don't allow fp literals in 64-bit integer instructions. It is 1382 // unclear how we should encode them. 1383 return false; 1384 } 1385 1386 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1387 return canLosslesslyConvertToFPType(FPLiteral, type); 1388 } 1389 1390 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1391 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1392 } 1393 1394 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1395 if (AsmParser->isVI()) 1396 return isVReg32(); 1397 else if (AsmParser->isGFX9()) 1398 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1399 else 1400 return false; 1401 } 1402 1403 bool AMDGPUOperand::isSDWAFP16Operand() const { 1404 return isSDWAOperand(MVT::f16); 1405 } 1406 1407 bool AMDGPUOperand::isSDWAFP32Operand() const { 1408 return isSDWAOperand(MVT::f32); 1409 } 1410 1411 bool AMDGPUOperand::isSDWAInt16Operand() const { 1412 return isSDWAOperand(MVT::i16); 1413 } 1414 1415 bool AMDGPUOperand::isSDWAInt32Operand() const { 1416 return isSDWAOperand(MVT::i32); 1417 } 1418 1419 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1420 { 1421 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1422 assert(Size == 2 || Size == 4 || Size == 8); 1423 1424 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1425 1426 if (Imm.Mods.Abs) { 1427 Val &= ~FpSignMask; 1428 } 1429 if (Imm.Mods.Neg) { 1430 Val ^= FpSignMask; 1431 } 1432 1433 return Val; 1434 } 1435 1436 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1437 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1438 Inst.getNumOperands())) { 1439 addLiteralImmOperand(Inst, Imm.Val, 1440 ApplyModifiers & 1441 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1442 } else { 1443 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1444 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1445 } 1446 } 1447 1448 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1449 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1450 auto OpNum = Inst.getNumOperands(); 1451 // Check that this operand accepts literals 1452 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1453 1454 if (ApplyModifiers) { 1455 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1456 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1457 Val = applyInputFPModifiers(Val, Size); 1458 } 1459 1460 APInt Literal(64, Val); 1461 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1462 1463 if (Imm.IsFPImm) { // We got fp literal token 1464 switch (OpTy) { 1465 case AMDGPU::OPERAND_REG_IMM_INT64: 1466 case AMDGPU::OPERAND_REG_IMM_FP64: 1467 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1468 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1469 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1470 AsmParser->hasInv2PiInlineImm())) { 1471 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1472 return; 1473 } 1474 1475 // Non-inlineable 1476 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1477 // For fp operands we check if low 32 bits are zeros 1478 if (Literal.getLoBits(32) != 0) { 1479 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1480 "Can't encode literal as exact 64-bit floating-point operand. " 1481 "Low 32-bits will be set to zero"); 1482 } 1483 1484 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1485 return; 1486 } 1487 1488 // We don't allow fp literals in 64-bit integer instructions. It is 1489 // unclear how we should encode them. This case should be checked earlier 1490 // in predicate methods (isLiteralImm()) 1491 llvm_unreachable("fp literal in 64-bit integer instruction."); 1492 1493 case AMDGPU::OPERAND_REG_IMM_INT32: 1494 case AMDGPU::OPERAND_REG_IMM_FP32: 1495 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1496 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1497 case AMDGPU::OPERAND_REG_IMM_INT16: 1498 case AMDGPU::OPERAND_REG_IMM_FP16: 1499 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1500 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1501 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1502 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1503 bool lost; 1504 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1505 // Convert literal to single precision 1506 FPLiteral.convert(*getOpFltSemantics(OpTy), 1507 APFloat::rmNearestTiesToEven, &lost); 1508 // We allow precision lost but not overflow or underflow. This should be 1509 // checked earlier in isLiteralImm() 1510 1511 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1512 Inst.addOperand(MCOperand::createImm(ImmVal)); 1513 return; 1514 } 1515 default: 1516 llvm_unreachable("invalid operand size"); 1517 } 1518 1519 return; 1520 } 1521 1522 // We got int literal token. 1523 // Only sign extend inline immediates. 1524 switch (OpTy) { 1525 case AMDGPU::OPERAND_REG_IMM_INT32: 1526 case AMDGPU::OPERAND_REG_IMM_FP32: 1527 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1528 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1529 if (isSafeTruncation(Val, 32) && 1530 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1531 AsmParser->hasInv2PiInlineImm())) { 1532 Inst.addOperand(MCOperand::createImm(Val)); 1533 return; 1534 } 1535 1536 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1537 return; 1538 1539 case AMDGPU::OPERAND_REG_IMM_INT64: 1540 case AMDGPU::OPERAND_REG_IMM_FP64: 1541 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1542 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1543 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1544 Inst.addOperand(MCOperand::createImm(Val)); 1545 return; 1546 } 1547 1548 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1549 return; 1550 1551 case AMDGPU::OPERAND_REG_IMM_INT16: 1552 case AMDGPU::OPERAND_REG_IMM_FP16: 1553 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1554 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1555 if (isSafeTruncation(Val, 16) && 1556 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1557 AsmParser->hasInv2PiInlineImm())) { 1558 Inst.addOperand(MCOperand::createImm(Val)); 1559 return; 1560 } 1561 1562 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1563 return; 1564 1565 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1566 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1567 assert(isSafeTruncation(Val, 16)); 1568 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1569 AsmParser->hasInv2PiInlineImm())); 1570 1571 Inst.addOperand(MCOperand::createImm(Val)); 1572 return; 1573 } 1574 default: 1575 llvm_unreachable("invalid operand size"); 1576 } 1577 } 1578 1579 template <unsigned Bitwidth> 1580 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1581 APInt Literal(64, Imm.Val); 1582 1583 if (!Imm.IsFPImm) { 1584 // We got int literal token. 1585 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1586 return; 1587 } 1588 1589 bool Lost; 1590 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1591 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1592 APFloat::rmNearestTiesToEven, &Lost); 1593 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1594 } 1595 1596 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1597 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1598 } 1599 1600 static bool isInlineValue(unsigned Reg) { 1601 switch (Reg) { 1602 case AMDGPU::SRC_SHARED_BASE: 1603 case AMDGPU::SRC_SHARED_LIMIT: 1604 case AMDGPU::SRC_PRIVATE_BASE: 1605 case AMDGPU::SRC_PRIVATE_LIMIT: 1606 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1607 return true; 1608 default: 1609 return false; 1610 } 1611 } 1612 1613 bool AMDGPUOperand::isInlineValue() const { 1614 return isRegKind() && ::isInlineValue(getReg()); 1615 } 1616 1617 //===----------------------------------------------------------------------===// 1618 // AsmParser 1619 //===----------------------------------------------------------------------===// 1620 1621 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1622 if (Is == IS_VGPR) { 1623 switch (RegWidth) { 1624 default: return -1; 1625 case 1: return AMDGPU::VGPR_32RegClassID; 1626 case 2: return AMDGPU::VReg_64RegClassID; 1627 case 3: return AMDGPU::VReg_96RegClassID; 1628 case 4: return AMDGPU::VReg_128RegClassID; 1629 case 8: return AMDGPU::VReg_256RegClassID; 1630 case 16: return AMDGPU::VReg_512RegClassID; 1631 } 1632 } else if (Is == IS_TTMP) { 1633 switch (RegWidth) { 1634 default: return -1; 1635 case 1: return AMDGPU::TTMP_32RegClassID; 1636 case 2: return AMDGPU::TTMP_64RegClassID; 1637 case 4: return AMDGPU::TTMP_128RegClassID; 1638 case 8: return AMDGPU::TTMP_256RegClassID; 1639 case 16: return AMDGPU::TTMP_512RegClassID; 1640 } 1641 } else if (Is == IS_SGPR) { 1642 switch (RegWidth) { 1643 default: return -1; 1644 case 1: return AMDGPU::SGPR_32RegClassID; 1645 case 2: return AMDGPU::SGPR_64RegClassID; 1646 case 4: return AMDGPU::SGPR_128RegClassID; 1647 case 8: return AMDGPU::SGPR_256RegClassID; 1648 case 16: return AMDGPU::SGPR_512RegClassID; 1649 } 1650 } 1651 return -1; 1652 } 1653 1654 static unsigned getSpecialRegForName(StringRef RegName) { 1655 return StringSwitch<unsigned>(RegName) 1656 .Case("exec", AMDGPU::EXEC) 1657 .Case("vcc", AMDGPU::VCC) 1658 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1659 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1660 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1661 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1662 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1663 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1664 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1665 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1666 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1667 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1668 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1669 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1670 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1671 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1672 .Case("m0", AMDGPU::M0) 1673 .Case("scc", AMDGPU::SCC) 1674 .Case("tba", AMDGPU::TBA) 1675 .Case("tma", AMDGPU::TMA) 1676 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1677 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1678 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1679 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1680 .Case("vcc_lo", AMDGPU::VCC_LO) 1681 .Case("vcc_hi", AMDGPU::VCC_HI) 1682 .Case("exec_lo", AMDGPU::EXEC_LO) 1683 .Case("exec_hi", AMDGPU::EXEC_HI) 1684 .Case("tma_lo", AMDGPU::TMA_LO) 1685 .Case("tma_hi", AMDGPU::TMA_HI) 1686 .Case("tba_lo", AMDGPU::TBA_LO) 1687 .Case("tba_hi", AMDGPU::TBA_HI) 1688 .Default(0); 1689 } 1690 1691 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1692 SMLoc &EndLoc) { 1693 auto R = parseRegister(); 1694 if (!R) return true; 1695 assert(R->isReg()); 1696 RegNo = R->getReg(); 1697 StartLoc = R->getStartLoc(); 1698 EndLoc = R->getEndLoc(); 1699 return false; 1700 } 1701 1702 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1703 RegisterKind RegKind, unsigned Reg1, 1704 unsigned RegNum) { 1705 switch (RegKind) { 1706 case IS_SPECIAL: 1707 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1708 Reg = AMDGPU::EXEC; 1709 RegWidth = 2; 1710 return true; 1711 } 1712 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1713 Reg = AMDGPU::FLAT_SCR; 1714 RegWidth = 2; 1715 return true; 1716 } 1717 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1718 Reg = AMDGPU::XNACK_MASK; 1719 RegWidth = 2; 1720 return true; 1721 } 1722 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1723 Reg = AMDGPU::VCC; 1724 RegWidth = 2; 1725 return true; 1726 } 1727 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1728 Reg = AMDGPU::TBA; 1729 RegWidth = 2; 1730 return true; 1731 } 1732 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1733 Reg = AMDGPU::TMA; 1734 RegWidth = 2; 1735 return true; 1736 } 1737 return false; 1738 case IS_VGPR: 1739 case IS_SGPR: 1740 case IS_TTMP: 1741 if (Reg1 != Reg + RegWidth) { 1742 return false; 1743 } 1744 RegWidth++; 1745 return true; 1746 default: 1747 llvm_unreachable("unexpected register kind"); 1748 } 1749 } 1750 1751 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1752 unsigned &RegNum, unsigned &RegWidth, 1753 unsigned *DwordRegIndex) { 1754 if (DwordRegIndex) { *DwordRegIndex = 0; } 1755 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1756 if (getLexer().is(AsmToken::Identifier)) { 1757 StringRef RegName = Parser.getTok().getString(); 1758 if ((Reg = getSpecialRegForName(RegName))) { 1759 Parser.Lex(); 1760 RegKind = IS_SPECIAL; 1761 } else { 1762 unsigned RegNumIndex = 0; 1763 if (RegName[0] == 'v') { 1764 RegNumIndex = 1; 1765 RegKind = IS_VGPR; 1766 } else if (RegName[0] == 's') { 1767 RegNumIndex = 1; 1768 RegKind = IS_SGPR; 1769 } else if (RegName.startswith("ttmp")) { 1770 RegNumIndex = strlen("ttmp"); 1771 RegKind = IS_TTMP; 1772 } else { 1773 return false; 1774 } 1775 if (RegName.size() > RegNumIndex) { 1776 // Single 32-bit register: vXX. 1777 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1778 return false; 1779 Parser.Lex(); 1780 RegWidth = 1; 1781 } else { 1782 // Range of registers: v[XX:YY]. ":YY" is optional. 1783 Parser.Lex(); 1784 int64_t RegLo, RegHi; 1785 if (getLexer().isNot(AsmToken::LBrac)) 1786 return false; 1787 Parser.Lex(); 1788 1789 if (getParser().parseAbsoluteExpression(RegLo)) 1790 return false; 1791 1792 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1793 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1794 return false; 1795 Parser.Lex(); 1796 1797 if (isRBrace) { 1798 RegHi = RegLo; 1799 } else { 1800 if (getParser().parseAbsoluteExpression(RegHi)) 1801 return false; 1802 1803 if (getLexer().isNot(AsmToken::RBrac)) 1804 return false; 1805 Parser.Lex(); 1806 } 1807 RegNum = (unsigned) RegLo; 1808 RegWidth = (RegHi - RegLo) + 1; 1809 } 1810 } 1811 } else if (getLexer().is(AsmToken::LBrac)) { 1812 // List of consecutive registers: [s0,s1,s2,s3] 1813 Parser.Lex(); 1814 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1815 return false; 1816 if (RegWidth != 1) 1817 return false; 1818 RegisterKind RegKind1; 1819 unsigned Reg1, RegNum1, RegWidth1; 1820 do { 1821 if (getLexer().is(AsmToken::Comma)) { 1822 Parser.Lex(); 1823 } else if (getLexer().is(AsmToken::RBrac)) { 1824 Parser.Lex(); 1825 break; 1826 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1827 if (RegWidth1 != 1) { 1828 return false; 1829 } 1830 if (RegKind1 != RegKind) { 1831 return false; 1832 } 1833 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1834 return false; 1835 } 1836 } else { 1837 return false; 1838 } 1839 } while (true); 1840 } else { 1841 return false; 1842 } 1843 switch (RegKind) { 1844 case IS_SPECIAL: 1845 RegNum = 0; 1846 RegWidth = 1; 1847 break; 1848 case IS_VGPR: 1849 case IS_SGPR: 1850 case IS_TTMP: 1851 { 1852 unsigned Size = 1; 1853 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1854 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1855 Size = std::min(RegWidth, 4u); 1856 } 1857 if (RegNum % Size != 0) 1858 return false; 1859 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1860 RegNum = RegNum / Size; 1861 int RCID = getRegClass(RegKind, RegWidth); 1862 if (RCID == -1) 1863 return false; 1864 const MCRegisterClass RC = TRI->getRegClass(RCID); 1865 if (RegNum >= RC.getNumRegs()) 1866 return false; 1867 Reg = RC.getRegister(RegNum); 1868 break; 1869 } 1870 1871 default: 1872 llvm_unreachable("unexpected register kind"); 1873 } 1874 1875 if (!subtargetHasRegister(*TRI, Reg)) 1876 return false; 1877 return true; 1878 } 1879 1880 Optional<StringRef> 1881 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1882 switch (RegKind) { 1883 case IS_VGPR: 1884 return StringRef(".amdgcn.next_free_vgpr"); 1885 case IS_SGPR: 1886 return StringRef(".amdgcn.next_free_sgpr"); 1887 default: 1888 return None; 1889 } 1890 } 1891 1892 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1893 auto SymbolName = getGprCountSymbolName(RegKind); 1894 assert(SymbolName && "initializing invalid register kind"); 1895 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1896 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1897 } 1898 1899 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1900 unsigned DwordRegIndex, 1901 unsigned RegWidth) { 1902 // Symbols are only defined for GCN targets 1903 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1904 return true; 1905 1906 auto SymbolName = getGprCountSymbolName(RegKind); 1907 if (!SymbolName) 1908 return true; 1909 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1910 1911 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1912 int64_t OldCount; 1913 1914 if (!Sym->isVariable()) 1915 return !Error(getParser().getTok().getLoc(), 1916 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1917 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1918 return !Error( 1919 getParser().getTok().getLoc(), 1920 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1921 1922 if (OldCount <= NewMax) 1923 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1924 1925 return true; 1926 } 1927 1928 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1929 const auto &Tok = Parser.getTok(); 1930 SMLoc StartLoc = Tok.getLoc(); 1931 SMLoc EndLoc = Tok.getEndLoc(); 1932 RegisterKind RegKind; 1933 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1934 1935 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1936 return nullptr; 1937 } 1938 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1939 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1940 return nullptr; 1941 } else 1942 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1943 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1944 } 1945 1946 bool 1947 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1948 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1949 (getLexer().getKind() == AsmToken::Integer || 1950 getLexer().getKind() == AsmToken::Real)) { 1951 // This is a workaround for handling operands like these: 1952 // |1.0| 1953 // |-1| 1954 // This syntax is not compatible with syntax of standard 1955 // MC expressions (due to the trailing '|'). 1956 1957 SMLoc EndLoc; 1958 const MCExpr *Expr; 1959 1960 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1961 return true; 1962 } 1963 1964 return !Expr->evaluateAsAbsolute(Val); 1965 } 1966 1967 return getParser().parseAbsoluteExpression(Val); 1968 } 1969 1970 OperandMatchResultTy 1971 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1972 // TODO: add syntactic sugar for 1/(2*PI) 1973 bool Minus = false; 1974 if (getLexer().getKind() == AsmToken::Minus) { 1975 const AsmToken NextToken = getLexer().peekTok(); 1976 if (!NextToken.is(AsmToken::Integer) && 1977 !NextToken.is(AsmToken::Real)) { 1978 return MatchOperand_NoMatch; 1979 } 1980 Minus = true; 1981 Parser.Lex(); 1982 } 1983 1984 SMLoc S = Parser.getTok().getLoc(); 1985 switch(getLexer().getKind()) { 1986 case AsmToken::Integer: { 1987 int64_t IntVal; 1988 if (parseAbsoluteExpr(IntVal, AbsMod)) 1989 return MatchOperand_ParseFail; 1990 if (Minus) 1991 IntVal *= -1; 1992 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1993 return MatchOperand_Success; 1994 } 1995 case AsmToken::Real: { 1996 int64_t IntVal; 1997 if (parseAbsoluteExpr(IntVal, AbsMod)) 1998 return MatchOperand_ParseFail; 1999 2000 APFloat F(BitsToDouble(IntVal)); 2001 if (Minus) 2002 F.changeSign(); 2003 Operands.push_back( 2004 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 2005 AMDGPUOperand::ImmTyNone, true)); 2006 return MatchOperand_Success; 2007 } 2008 default: 2009 return MatchOperand_NoMatch; 2010 } 2011 } 2012 2013 OperandMatchResultTy 2014 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2015 if (auto R = parseRegister()) { 2016 assert(R->isReg()); 2017 R->Reg.IsForcedVOP3 = isForcedVOP3(); 2018 Operands.push_back(std::move(R)); 2019 return MatchOperand_Success; 2020 } 2021 return MatchOperand_NoMatch; 2022 } 2023 2024 OperandMatchResultTy 2025 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 2026 auto res = parseImm(Operands, AbsMod); 2027 if (res != MatchOperand_NoMatch) { 2028 return res; 2029 } 2030 2031 return parseReg(Operands); 2032 } 2033 2034 OperandMatchResultTy 2035 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2036 bool AllowImm) { 2037 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 2038 2039 if (getLexer().getKind()== AsmToken::Minus) { 2040 const AsmToken NextToken = getLexer().peekTok(); 2041 2042 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2043 if (NextToken.is(AsmToken::Minus)) { 2044 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 2045 return MatchOperand_ParseFail; 2046 } 2047 2048 // '-' followed by an integer literal N should be interpreted as integer 2049 // negation rather than a floating-point NEG modifier applied to N. 2050 // Beside being contr-intuitive, such use of floating-point NEG modifier 2051 // results in different meaning of integer literals used with VOP1/2/C 2052 // and VOP3, for example: 2053 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2054 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2055 // Negative fp literals should be handled likewise for unifomtity 2056 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 2057 Parser.Lex(); 2058 Negate = true; 2059 } 2060 } 2061 2062 if (getLexer().getKind() == AsmToken::Identifier && 2063 Parser.getTok().getString() == "neg") { 2064 if (Negate) { 2065 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2066 return MatchOperand_ParseFail; 2067 } 2068 Parser.Lex(); 2069 Negate2 = true; 2070 if (getLexer().isNot(AsmToken::LParen)) { 2071 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2072 return MatchOperand_ParseFail; 2073 } 2074 Parser.Lex(); 2075 } 2076 2077 if (getLexer().getKind() == AsmToken::Identifier && 2078 Parser.getTok().getString() == "abs") { 2079 Parser.Lex(); 2080 Abs2 = true; 2081 if (getLexer().isNot(AsmToken::LParen)) { 2082 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2083 return MatchOperand_ParseFail; 2084 } 2085 Parser.Lex(); 2086 } 2087 2088 if (getLexer().getKind() == AsmToken::Pipe) { 2089 if (Abs2) { 2090 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2091 return MatchOperand_ParseFail; 2092 } 2093 Parser.Lex(); 2094 Abs = true; 2095 } 2096 2097 OperandMatchResultTy Res; 2098 if (AllowImm) { 2099 Res = parseRegOrImm(Operands, Abs); 2100 } else { 2101 Res = parseReg(Operands); 2102 } 2103 if (Res != MatchOperand_Success) { 2104 return Res; 2105 } 2106 2107 AMDGPUOperand::Modifiers Mods; 2108 if (Abs) { 2109 if (getLexer().getKind() != AsmToken::Pipe) { 2110 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2111 return MatchOperand_ParseFail; 2112 } 2113 Parser.Lex(); 2114 Mods.Abs = true; 2115 } 2116 if (Abs2) { 2117 if (getLexer().isNot(AsmToken::RParen)) { 2118 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2119 return MatchOperand_ParseFail; 2120 } 2121 Parser.Lex(); 2122 Mods.Abs = true; 2123 } 2124 2125 if (Negate) { 2126 Mods.Neg = true; 2127 } else if (Negate2) { 2128 if (getLexer().isNot(AsmToken::RParen)) { 2129 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2130 return MatchOperand_ParseFail; 2131 } 2132 Parser.Lex(); 2133 Mods.Neg = true; 2134 } 2135 2136 if (Mods.hasFPModifiers()) { 2137 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2138 Op.setModifiers(Mods); 2139 } 2140 return MatchOperand_Success; 2141 } 2142 2143 OperandMatchResultTy 2144 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2145 bool AllowImm) { 2146 bool Sext = false; 2147 2148 if (getLexer().getKind() == AsmToken::Identifier && 2149 Parser.getTok().getString() == "sext") { 2150 Parser.Lex(); 2151 Sext = true; 2152 if (getLexer().isNot(AsmToken::LParen)) { 2153 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2154 return MatchOperand_ParseFail; 2155 } 2156 Parser.Lex(); 2157 } 2158 2159 OperandMatchResultTy Res; 2160 if (AllowImm) { 2161 Res = parseRegOrImm(Operands); 2162 } else { 2163 Res = parseReg(Operands); 2164 } 2165 if (Res != MatchOperand_Success) { 2166 return Res; 2167 } 2168 2169 AMDGPUOperand::Modifiers Mods; 2170 if (Sext) { 2171 if (getLexer().isNot(AsmToken::RParen)) { 2172 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2173 return MatchOperand_ParseFail; 2174 } 2175 Parser.Lex(); 2176 Mods.Sext = true; 2177 } 2178 2179 if (Mods.hasIntModifiers()) { 2180 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2181 Op.setModifiers(Mods); 2182 } 2183 2184 return MatchOperand_Success; 2185 } 2186 2187 OperandMatchResultTy 2188 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2189 return parseRegOrImmWithFPInputMods(Operands, false); 2190 } 2191 2192 OperandMatchResultTy 2193 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2194 return parseRegOrImmWithIntInputMods(Operands, false); 2195 } 2196 2197 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2198 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2199 if (Reg) { 2200 Operands.push_back(std::move(Reg)); 2201 return MatchOperand_Success; 2202 } 2203 2204 const AsmToken &Tok = Parser.getTok(); 2205 if (Tok.getString() == "off") { 2206 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2207 AMDGPUOperand::ImmTyOff, false)); 2208 Parser.Lex(); 2209 return MatchOperand_Success; 2210 } 2211 2212 return MatchOperand_NoMatch; 2213 } 2214 2215 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2216 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2217 2218 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2219 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2220 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2221 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2222 return Match_InvalidOperand; 2223 2224 if ((TSFlags & SIInstrFlags::VOP3) && 2225 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2226 getForcedEncodingSize() != 64) 2227 return Match_PreferE32; 2228 2229 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2230 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2231 // v_mac_f32/16 allow only dst_sel == DWORD; 2232 auto OpNum = 2233 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2234 const auto &Op = Inst.getOperand(OpNum); 2235 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2236 return Match_InvalidOperand; 2237 } 2238 } 2239 2240 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2241 // FIXME: Produces error without correct column reported. 2242 auto OpNum = 2243 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2244 const auto &Op = Inst.getOperand(OpNum); 2245 if (Op.getImm() != 0) 2246 return Match_InvalidOperand; 2247 } 2248 2249 return Match_Success; 2250 } 2251 2252 // What asm variants we should check 2253 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2254 if (getForcedEncodingSize() == 32) { 2255 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2256 return makeArrayRef(Variants); 2257 } 2258 2259 if (isForcedVOP3()) { 2260 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2261 return makeArrayRef(Variants); 2262 } 2263 2264 if (isForcedSDWA()) { 2265 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2266 AMDGPUAsmVariants::SDWA9}; 2267 return makeArrayRef(Variants); 2268 } 2269 2270 if (isForcedDPP()) { 2271 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2272 return makeArrayRef(Variants); 2273 } 2274 2275 static const unsigned Variants[] = { 2276 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2277 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2278 }; 2279 2280 return makeArrayRef(Variants); 2281 } 2282 2283 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2284 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2285 const unsigned Num = Desc.getNumImplicitUses(); 2286 for (unsigned i = 0; i < Num; ++i) { 2287 unsigned Reg = Desc.ImplicitUses[i]; 2288 switch (Reg) { 2289 case AMDGPU::FLAT_SCR: 2290 case AMDGPU::VCC: 2291 case AMDGPU::M0: 2292 return Reg; 2293 default: 2294 break; 2295 } 2296 } 2297 return AMDGPU::NoRegister; 2298 } 2299 2300 // NB: This code is correct only when used to check constant 2301 // bus limitations because GFX7 support no f16 inline constants. 2302 // Note that there are no cases when a GFX7 opcode violates 2303 // constant bus limitations due to the use of an f16 constant. 2304 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2305 unsigned OpIdx) const { 2306 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2307 2308 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2309 return false; 2310 } 2311 2312 const MCOperand &MO = Inst.getOperand(OpIdx); 2313 2314 int64_t Val = MO.getImm(); 2315 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2316 2317 switch (OpSize) { // expected operand size 2318 case 8: 2319 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2320 case 4: 2321 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2322 case 2: { 2323 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2324 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2325 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2326 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2327 } else { 2328 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2329 } 2330 } 2331 default: 2332 llvm_unreachable("invalid operand size"); 2333 } 2334 } 2335 2336 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2337 const MCOperand &MO = Inst.getOperand(OpIdx); 2338 if (MO.isImm()) { 2339 return !isInlineConstant(Inst, OpIdx); 2340 } 2341 return !MO.isReg() || 2342 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2343 } 2344 2345 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2346 const unsigned Opcode = Inst.getOpcode(); 2347 const MCInstrDesc &Desc = MII.get(Opcode); 2348 unsigned ConstantBusUseCount = 0; 2349 2350 if (Desc.TSFlags & 2351 (SIInstrFlags::VOPC | 2352 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2353 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2354 SIInstrFlags::SDWA)) { 2355 // Check special imm operands (used by madmk, etc) 2356 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2357 ++ConstantBusUseCount; 2358 } 2359 2360 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2361 if (SGPRUsed != AMDGPU::NoRegister) { 2362 ++ConstantBusUseCount; 2363 } 2364 2365 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2366 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2367 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2368 2369 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2370 2371 for (int OpIdx : OpIndices) { 2372 if (OpIdx == -1) break; 2373 2374 const MCOperand &MO = Inst.getOperand(OpIdx); 2375 if (usesConstantBus(Inst, OpIdx)) { 2376 if (MO.isReg()) { 2377 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2378 // Pairs of registers with a partial intersections like these 2379 // s0, s[0:1] 2380 // flat_scratch_lo, flat_scratch 2381 // flat_scratch_lo, flat_scratch_hi 2382 // are theoretically valid but they are disabled anyway. 2383 // Note that this code mimics SIInstrInfo::verifyInstruction 2384 if (Reg != SGPRUsed) { 2385 ++ConstantBusUseCount; 2386 } 2387 SGPRUsed = Reg; 2388 } else { // Expression or a literal 2389 ++ConstantBusUseCount; 2390 } 2391 } 2392 } 2393 } 2394 2395 return ConstantBusUseCount <= 1; 2396 } 2397 2398 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2399 const unsigned Opcode = Inst.getOpcode(); 2400 const MCInstrDesc &Desc = MII.get(Opcode); 2401 2402 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2403 if (DstIdx == -1 || 2404 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2405 return true; 2406 } 2407 2408 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2409 2410 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2411 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2412 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2413 2414 assert(DstIdx != -1); 2415 const MCOperand &Dst = Inst.getOperand(DstIdx); 2416 assert(Dst.isReg()); 2417 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2418 2419 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2420 2421 for (int SrcIdx : SrcIndices) { 2422 if (SrcIdx == -1) break; 2423 const MCOperand &Src = Inst.getOperand(SrcIdx); 2424 if (Src.isReg()) { 2425 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2426 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2427 return false; 2428 } 2429 } 2430 } 2431 2432 return true; 2433 } 2434 2435 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2436 2437 const unsigned Opc = Inst.getOpcode(); 2438 const MCInstrDesc &Desc = MII.get(Opc); 2439 2440 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2441 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2442 assert(ClampIdx != -1); 2443 return Inst.getOperand(ClampIdx).getImm() == 0; 2444 } 2445 2446 return true; 2447 } 2448 2449 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2450 2451 const unsigned Opc = Inst.getOpcode(); 2452 const MCInstrDesc &Desc = MII.get(Opc); 2453 2454 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2455 return true; 2456 2457 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2458 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2459 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2460 2461 assert(VDataIdx != -1); 2462 assert(DMaskIdx != -1); 2463 assert(TFEIdx != -1); 2464 2465 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2466 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2467 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2468 if (DMask == 0) 2469 DMask = 1; 2470 2471 unsigned DataSize = 2472 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2473 if (hasPackedD16()) { 2474 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2475 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2476 DataSize = (DataSize + 1) / 2; 2477 } 2478 2479 return (VDataSize / 4) == DataSize + TFESize; 2480 } 2481 2482 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2483 2484 const unsigned Opc = Inst.getOpcode(); 2485 const MCInstrDesc &Desc = MII.get(Opc); 2486 2487 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2488 return true; 2489 if (!Desc.mayLoad() || !Desc.mayStore()) 2490 return true; // Not atomic 2491 2492 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2493 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2494 2495 // This is an incomplete check because image_atomic_cmpswap 2496 // may only use 0x3 and 0xf while other atomic operations 2497 // may use 0x1 and 0x3. However these limitations are 2498 // verified when we check that dmask matches dst size. 2499 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2500 } 2501 2502 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2503 2504 const unsigned Opc = Inst.getOpcode(); 2505 const MCInstrDesc &Desc = MII.get(Opc); 2506 2507 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2508 return true; 2509 2510 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2511 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2512 2513 // GATHER4 instructions use dmask in a different fashion compared to 2514 // other MIMG instructions. The only useful DMASK values are 2515 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2516 // (red,red,red,red) etc.) The ISA document doesn't mention 2517 // this. 2518 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2519 } 2520 2521 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2522 2523 const unsigned Opc = Inst.getOpcode(); 2524 const MCInstrDesc &Desc = MII.get(Opc); 2525 2526 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2527 return true; 2528 2529 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2530 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2531 if (isCI() || isSI()) 2532 return false; 2533 } 2534 2535 return true; 2536 } 2537 2538 static bool IsRevOpcode(const unsigned Opcode) 2539 { 2540 switch (Opcode) { 2541 case AMDGPU::V_SUBREV_F32_e32: 2542 case AMDGPU::V_SUBREV_F32_e64: 2543 case AMDGPU::V_SUBREV_F32_e32_si: 2544 case AMDGPU::V_SUBREV_F32_e32_vi: 2545 case AMDGPU::V_SUBREV_F32_e64_si: 2546 case AMDGPU::V_SUBREV_F32_e64_vi: 2547 case AMDGPU::V_SUBREV_I32_e32: 2548 case AMDGPU::V_SUBREV_I32_e64: 2549 case AMDGPU::V_SUBREV_I32_e32_si: 2550 case AMDGPU::V_SUBREV_I32_e64_si: 2551 case AMDGPU::V_SUBBREV_U32_e32: 2552 case AMDGPU::V_SUBBREV_U32_e64: 2553 case AMDGPU::V_SUBBREV_U32_e32_si: 2554 case AMDGPU::V_SUBBREV_U32_e32_vi: 2555 case AMDGPU::V_SUBBREV_U32_e64_si: 2556 case AMDGPU::V_SUBBREV_U32_e64_vi: 2557 case AMDGPU::V_SUBREV_U32_e32: 2558 case AMDGPU::V_SUBREV_U32_e64: 2559 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2560 case AMDGPU::V_SUBREV_U32_e32_vi: 2561 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2562 case AMDGPU::V_SUBREV_U32_e64_vi: 2563 case AMDGPU::V_SUBREV_F16_e32: 2564 case AMDGPU::V_SUBREV_F16_e64: 2565 case AMDGPU::V_SUBREV_F16_e32_vi: 2566 case AMDGPU::V_SUBREV_F16_e64_vi: 2567 case AMDGPU::V_SUBREV_U16_e32: 2568 case AMDGPU::V_SUBREV_U16_e64: 2569 case AMDGPU::V_SUBREV_U16_e32_vi: 2570 case AMDGPU::V_SUBREV_U16_e64_vi: 2571 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2572 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2573 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2574 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2575 case AMDGPU::V_LSHLREV_B32_e32_si: 2576 case AMDGPU::V_LSHLREV_B32_e64_si: 2577 case AMDGPU::V_LSHLREV_B16_e32_vi: 2578 case AMDGPU::V_LSHLREV_B16_e64_vi: 2579 case AMDGPU::V_LSHLREV_B32_e32_vi: 2580 case AMDGPU::V_LSHLREV_B32_e64_vi: 2581 case AMDGPU::V_LSHLREV_B64_vi: 2582 case AMDGPU::V_LSHRREV_B32_e32_si: 2583 case AMDGPU::V_LSHRREV_B32_e64_si: 2584 case AMDGPU::V_LSHRREV_B16_e32_vi: 2585 case AMDGPU::V_LSHRREV_B16_e64_vi: 2586 case AMDGPU::V_LSHRREV_B32_e32_vi: 2587 case AMDGPU::V_LSHRREV_B32_e64_vi: 2588 case AMDGPU::V_LSHRREV_B64_vi: 2589 case AMDGPU::V_ASHRREV_I32_e64_si: 2590 case AMDGPU::V_ASHRREV_I32_e32_si: 2591 case AMDGPU::V_ASHRREV_I16_e32_vi: 2592 case AMDGPU::V_ASHRREV_I16_e64_vi: 2593 case AMDGPU::V_ASHRREV_I32_e32_vi: 2594 case AMDGPU::V_ASHRREV_I32_e64_vi: 2595 case AMDGPU::V_ASHRREV_I64_vi: 2596 case AMDGPU::V_PK_LSHLREV_B16_vi: 2597 case AMDGPU::V_PK_LSHRREV_B16_vi: 2598 case AMDGPU::V_PK_ASHRREV_I16_vi: 2599 return true; 2600 default: 2601 return false; 2602 } 2603 } 2604 2605 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2606 2607 using namespace SIInstrFlags; 2608 const unsigned Opcode = Inst.getOpcode(); 2609 const MCInstrDesc &Desc = MII.get(Opcode); 2610 2611 // lds_direct register is defined so that it can be used 2612 // with 9-bit operands only. Ignore encodings which do not accept these. 2613 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2614 return true; 2615 2616 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2617 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2618 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2619 2620 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2621 2622 // lds_direct cannot be specified as either src1 or src2. 2623 for (int SrcIdx : SrcIndices) { 2624 if (SrcIdx == -1) break; 2625 const MCOperand &Src = Inst.getOperand(SrcIdx); 2626 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2627 return false; 2628 } 2629 } 2630 2631 if (Src0Idx == -1) 2632 return true; 2633 2634 const MCOperand &Src = Inst.getOperand(Src0Idx); 2635 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2636 return true; 2637 2638 // lds_direct is specified as src0. Check additional limitations. 2639 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2640 } 2641 2642 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2643 unsigned Opcode = Inst.getOpcode(); 2644 const MCInstrDesc &Desc = MII.get(Opcode); 2645 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2646 return true; 2647 2648 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2649 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2650 2651 const int OpIndices[] = { Src0Idx, Src1Idx }; 2652 2653 unsigned NumLiterals = 0; 2654 uint32_t LiteralValue; 2655 2656 for (int OpIdx : OpIndices) { 2657 if (OpIdx == -1) break; 2658 2659 const MCOperand &MO = Inst.getOperand(OpIdx); 2660 if (MO.isImm() && 2661 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2662 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2663 !isInlineConstant(Inst, OpIdx)) { 2664 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2665 if (NumLiterals == 0 || LiteralValue != Value) { 2666 LiteralValue = Value; 2667 ++NumLiterals; 2668 } 2669 } 2670 } 2671 2672 return NumLiterals <= 1; 2673 } 2674 2675 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2676 const SMLoc &IDLoc) { 2677 if (!validateLdsDirect(Inst)) { 2678 Error(IDLoc, 2679 "invalid use of lds_direct"); 2680 return false; 2681 } 2682 if (!validateSOPLiteral(Inst)) { 2683 Error(IDLoc, 2684 "only one literal operand is allowed"); 2685 return false; 2686 } 2687 if (!validateConstantBusLimitations(Inst)) { 2688 Error(IDLoc, 2689 "invalid operand (violates constant bus restrictions)"); 2690 return false; 2691 } 2692 if (!validateEarlyClobberLimitations(Inst)) { 2693 Error(IDLoc, 2694 "destination must be different than all sources"); 2695 return false; 2696 } 2697 if (!validateIntClampSupported(Inst)) { 2698 Error(IDLoc, 2699 "integer clamping is not supported on this GPU"); 2700 return false; 2701 } 2702 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2703 if (!validateMIMGD16(Inst)) { 2704 Error(IDLoc, 2705 "d16 modifier is not supported on this GPU"); 2706 return false; 2707 } 2708 if (!validateMIMGDataSize(Inst)) { 2709 Error(IDLoc, 2710 "image data size does not match dmask and tfe"); 2711 return false; 2712 } 2713 if (!validateMIMGAtomicDMask(Inst)) { 2714 Error(IDLoc, 2715 "invalid atomic image dmask"); 2716 return false; 2717 } 2718 if (!validateMIMGGatherDMask(Inst)) { 2719 Error(IDLoc, 2720 "invalid image_gather dmask: only one bit must be set"); 2721 return false; 2722 } 2723 2724 return true; 2725 } 2726 2727 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2728 const FeatureBitset &FBS, 2729 unsigned VariantID = 0); 2730 2731 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2732 OperandVector &Operands, 2733 MCStreamer &Out, 2734 uint64_t &ErrorInfo, 2735 bool MatchingInlineAsm) { 2736 MCInst Inst; 2737 unsigned Result = Match_Success; 2738 for (auto Variant : getMatchedVariants()) { 2739 uint64_t EI; 2740 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2741 Variant); 2742 // We order match statuses from least to most specific. We use most specific 2743 // status as resulting 2744 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2745 if ((R == Match_Success) || 2746 (R == Match_PreferE32) || 2747 (R == Match_MissingFeature && Result != Match_PreferE32) || 2748 (R == Match_InvalidOperand && Result != Match_MissingFeature 2749 && Result != Match_PreferE32) || 2750 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2751 && Result != Match_MissingFeature 2752 && Result != Match_PreferE32)) { 2753 Result = R; 2754 ErrorInfo = EI; 2755 } 2756 if (R == Match_Success) 2757 break; 2758 } 2759 2760 switch (Result) { 2761 default: break; 2762 case Match_Success: 2763 if (!validateInstruction(Inst, IDLoc)) { 2764 return true; 2765 } 2766 Inst.setLoc(IDLoc); 2767 Out.EmitInstruction(Inst, getSTI()); 2768 return false; 2769 2770 case Match_MissingFeature: 2771 return Error(IDLoc, "instruction not supported on this GPU"); 2772 2773 case Match_MnemonicFail: { 2774 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2775 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2776 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2777 return Error(IDLoc, "invalid instruction" + Suggestion, 2778 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2779 } 2780 2781 case Match_InvalidOperand: { 2782 SMLoc ErrorLoc = IDLoc; 2783 if (ErrorInfo != ~0ULL) { 2784 if (ErrorInfo >= Operands.size()) { 2785 return Error(IDLoc, "too few operands for instruction"); 2786 } 2787 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2788 if (ErrorLoc == SMLoc()) 2789 ErrorLoc = IDLoc; 2790 } 2791 return Error(ErrorLoc, "invalid operand for instruction"); 2792 } 2793 2794 case Match_PreferE32: 2795 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2796 "should be encoded as e32"); 2797 } 2798 llvm_unreachable("Implement any new match types added!"); 2799 } 2800 2801 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2802 int64_t Tmp = -1; 2803 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2804 return true; 2805 } 2806 if (getParser().parseAbsoluteExpression(Tmp)) { 2807 return true; 2808 } 2809 Ret = static_cast<uint32_t>(Tmp); 2810 return false; 2811 } 2812 2813 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2814 uint32_t &Minor) { 2815 if (ParseAsAbsoluteExpression(Major)) 2816 return TokError("invalid major version"); 2817 2818 if (getLexer().isNot(AsmToken::Comma)) 2819 return TokError("minor version number required, comma expected"); 2820 Lex(); 2821 2822 if (ParseAsAbsoluteExpression(Minor)) 2823 return TokError("invalid minor version"); 2824 2825 return false; 2826 } 2827 2828 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2829 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2830 return TokError("directive only supported for amdgcn architecture"); 2831 2832 std::string Target; 2833 2834 SMLoc TargetStart = getTok().getLoc(); 2835 if (getParser().parseEscapedString(Target)) 2836 return true; 2837 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2838 2839 std::string ExpectedTarget; 2840 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2841 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2842 2843 if (Target != ExpectedTargetOS.str()) 2844 return getParser().Error(TargetRange.Start, "target must match options", 2845 TargetRange); 2846 2847 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2848 return false; 2849 } 2850 2851 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2852 return getParser().Error(Range.Start, "value out of range", Range); 2853 } 2854 2855 bool AMDGPUAsmParser::calculateGPRBlocks( 2856 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2857 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2858 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2859 unsigned &SGPRBlocks) { 2860 // TODO(scott.linder): These calculations are duplicated from 2861 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2862 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2863 2864 unsigned NumVGPRs = NextFreeVGPR; 2865 unsigned NumSGPRs = NextFreeSGPR; 2866 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2867 2868 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2869 NumSGPRs > MaxAddressableNumSGPRs) 2870 return OutOfRangeError(SGPRRange); 2871 2872 NumSGPRs += 2873 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2874 2875 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2876 NumSGPRs > MaxAddressableNumSGPRs) 2877 return OutOfRangeError(SGPRRange); 2878 2879 if (Features.test(FeatureSGPRInitBug)) 2880 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2881 2882 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2883 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2884 2885 return false; 2886 } 2887 2888 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2889 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2890 return TokError("directive only supported for amdgcn architecture"); 2891 2892 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2893 return TokError("directive only supported for amdhsa OS"); 2894 2895 StringRef KernelName; 2896 if (getParser().parseIdentifier(KernelName)) 2897 return true; 2898 2899 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2900 2901 StringSet<> Seen; 2902 2903 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2904 2905 SMRange VGPRRange; 2906 uint64_t NextFreeVGPR = 0; 2907 SMRange SGPRRange; 2908 uint64_t NextFreeSGPR = 0; 2909 unsigned UserSGPRCount = 0; 2910 bool ReserveVCC = true; 2911 bool ReserveFlatScr = true; 2912 bool ReserveXNACK = hasXNACK(); 2913 2914 while (true) { 2915 while (getLexer().is(AsmToken::EndOfStatement)) 2916 Lex(); 2917 2918 if (getLexer().isNot(AsmToken::Identifier)) 2919 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2920 2921 StringRef ID = getTok().getIdentifier(); 2922 SMRange IDRange = getTok().getLocRange(); 2923 Lex(); 2924 2925 if (ID == ".end_amdhsa_kernel") 2926 break; 2927 2928 if (Seen.find(ID) != Seen.end()) 2929 return TokError(".amdhsa_ directives cannot be repeated"); 2930 Seen.insert(ID); 2931 2932 SMLoc ValStart = getTok().getLoc(); 2933 int64_t IVal; 2934 if (getParser().parseAbsoluteExpression(IVal)) 2935 return true; 2936 SMLoc ValEnd = getTok().getLoc(); 2937 SMRange ValRange = SMRange(ValStart, ValEnd); 2938 2939 if (IVal < 0) 2940 return OutOfRangeError(ValRange); 2941 2942 uint64_t Val = IVal; 2943 2944 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2945 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2946 return OutOfRangeError(RANGE); \ 2947 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2948 2949 if (ID == ".amdhsa_group_segment_fixed_size") { 2950 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2951 return OutOfRangeError(ValRange); 2952 KD.group_segment_fixed_size = Val; 2953 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2954 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2955 return OutOfRangeError(ValRange); 2956 KD.private_segment_fixed_size = Val; 2957 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2958 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2959 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2960 Val, ValRange); 2961 UserSGPRCount += 4; 2962 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2963 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2964 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2965 ValRange); 2966 UserSGPRCount += 2; 2967 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2968 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2969 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2970 ValRange); 2971 UserSGPRCount += 2; 2972 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2973 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2974 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2975 Val, ValRange); 2976 UserSGPRCount += 2; 2977 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2978 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2979 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2980 ValRange); 2981 UserSGPRCount += 2; 2982 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2983 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2984 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2985 ValRange); 2986 UserSGPRCount += 2; 2987 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2988 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2989 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2990 Val, ValRange); 2991 UserSGPRCount += 1; 2992 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2993 PARSE_BITS_ENTRY( 2994 KD.compute_pgm_rsrc2, 2995 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2996 ValRange); 2997 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2998 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2999 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3000 ValRange); 3001 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3002 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3003 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3004 ValRange); 3005 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3006 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3007 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3008 ValRange); 3009 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3011 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3012 ValRange); 3013 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3015 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3016 ValRange); 3017 } else if (ID == ".amdhsa_next_free_vgpr") { 3018 VGPRRange = ValRange; 3019 NextFreeVGPR = Val; 3020 } else if (ID == ".amdhsa_next_free_sgpr") { 3021 SGPRRange = ValRange; 3022 NextFreeSGPR = Val; 3023 } else if (ID == ".amdhsa_reserve_vcc") { 3024 if (!isUInt<1>(Val)) 3025 return OutOfRangeError(ValRange); 3026 ReserveVCC = Val; 3027 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3028 if (IVersion.Major < 7) 3029 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3030 IDRange); 3031 if (!isUInt<1>(Val)) 3032 return OutOfRangeError(ValRange); 3033 ReserveFlatScr = Val; 3034 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3035 if (IVersion.Major < 8) 3036 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3037 IDRange); 3038 if (!isUInt<1>(Val)) 3039 return OutOfRangeError(ValRange); 3040 ReserveXNACK = Val; 3041 } else if (ID == ".amdhsa_float_round_mode_32") { 3042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3043 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3044 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3045 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3046 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3047 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3048 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3049 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3050 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3051 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3052 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3053 ValRange); 3054 } else if (ID == ".amdhsa_dx10_clamp") { 3055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3056 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3057 } else if (ID == ".amdhsa_ieee_mode") { 3058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3059 Val, ValRange); 3060 } else if (ID == ".amdhsa_fp16_overflow") { 3061 if (IVersion.Major < 9) 3062 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3063 IDRange); 3064 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3065 ValRange); 3066 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3067 PARSE_BITS_ENTRY( 3068 KD.compute_pgm_rsrc2, 3069 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3070 ValRange); 3071 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3072 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3073 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3074 Val, ValRange); 3075 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3076 PARSE_BITS_ENTRY( 3077 KD.compute_pgm_rsrc2, 3078 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3079 ValRange); 3080 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3082 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3083 Val, ValRange); 3084 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3086 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3087 Val, ValRange); 3088 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3089 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3090 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3091 Val, ValRange); 3092 } else if (ID == ".amdhsa_exception_int_div_zero") { 3093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3094 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3095 Val, ValRange); 3096 } else { 3097 return getParser().Error(IDRange.Start, 3098 "unknown .amdhsa_kernel directive", IDRange); 3099 } 3100 3101 #undef PARSE_BITS_ENTRY 3102 } 3103 3104 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3105 return TokError(".amdhsa_next_free_vgpr directive is required"); 3106 3107 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3108 return TokError(".amdhsa_next_free_sgpr directive is required"); 3109 3110 unsigned VGPRBlocks; 3111 unsigned SGPRBlocks; 3112 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3113 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3114 SGPRRange, VGPRBlocks, SGPRBlocks)) 3115 return true; 3116 3117 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3118 VGPRBlocks)) 3119 return OutOfRangeError(VGPRRange); 3120 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3121 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3122 3123 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3124 SGPRBlocks)) 3125 return OutOfRangeError(SGPRRange); 3126 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3127 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3128 SGPRBlocks); 3129 3130 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3131 return TokError("too many user SGPRs enabled"); 3132 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3133 UserSGPRCount); 3134 3135 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3136 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3137 ReserveFlatScr, ReserveXNACK); 3138 return false; 3139 } 3140 3141 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3142 uint32_t Major; 3143 uint32_t Minor; 3144 3145 if (ParseDirectiveMajorMinor(Major, Minor)) 3146 return true; 3147 3148 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3149 return false; 3150 } 3151 3152 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3153 uint32_t Major; 3154 uint32_t Minor; 3155 uint32_t Stepping; 3156 StringRef VendorName; 3157 StringRef ArchName; 3158 3159 // If this directive has no arguments, then use the ISA version for the 3160 // targeted GPU. 3161 if (getLexer().is(AsmToken::EndOfStatement)) { 3162 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3163 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3164 ISA.Stepping, 3165 "AMD", "AMDGPU"); 3166 return false; 3167 } 3168 3169 if (ParseDirectiveMajorMinor(Major, Minor)) 3170 return true; 3171 3172 if (getLexer().isNot(AsmToken::Comma)) 3173 return TokError("stepping version number required, comma expected"); 3174 Lex(); 3175 3176 if (ParseAsAbsoluteExpression(Stepping)) 3177 return TokError("invalid stepping version"); 3178 3179 if (getLexer().isNot(AsmToken::Comma)) 3180 return TokError("vendor name required, comma expected"); 3181 Lex(); 3182 3183 if (getLexer().isNot(AsmToken::String)) 3184 return TokError("invalid vendor name"); 3185 3186 VendorName = getLexer().getTok().getStringContents(); 3187 Lex(); 3188 3189 if (getLexer().isNot(AsmToken::Comma)) 3190 return TokError("arch name required, comma expected"); 3191 Lex(); 3192 3193 if (getLexer().isNot(AsmToken::String)) 3194 return TokError("invalid arch name"); 3195 3196 ArchName = getLexer().getTok().getStringContents(); 3197 Lex(); 3198 3199 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3200 VendorName, ArchName); 3201 return false; 3202 } 3203 3204 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3205 amd_kernel_code_t &Header) { 3206 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3207 // assembly for backwards compatibility. 3208 if (ID == "max_scratch_backing_memory_byte_size") { 3209 Parser.eatToEndOfStatement(); 3210 return false; 3211 } 3212 3213 SmallString<40> ErrStr; 3214 raw_svector_ostream Err(ErrStr); 3215 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3216 return TokError(Err.str()); 3217 } 3218 Lex(); 3219 return false; 3220 } 3221 3222 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3223 amd_kernel_code_t Header; 3224 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3225 3226 while (true) { 3227 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3228 // will set the current token to EndOfStatement. 3229 while(getLexer().is(AsmToken::EndOfStatement)) 3230 Lex(); 3231 3232 if (getLexer().isNot(AsmToken::Identifier)) 3233 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3234 3235 StringRef ID = getLexer().getTok().getIdentifier(); 3236 Lex(); 3237 3238 if (ID == ".end_amd_kernel_code_t") 3239 break; 3240 3241 if (ParseAMDKernelCodeTValue(ID, Header)) 3242 return true; 3243 } 3244 3245 getTargetStreamer().EmitAMDKernelCodeT(Header); 3246 3247 return false; 3248 } 3249 3250 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3251 if (getLexer().isNot(AsmToken::Identifier)) 3252 return TokError("expected symbol name"); 3253 3254 StringRef KernelName = Parser.getTok().getString(); 3255 3256 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3257 ELF::STT_AMDGPU_HSA_KERNEL); 3258 Lex(); 3259 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3260 KernelScope.initialize(getContext()); 3261 return false; 3262 } 3263 3264 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3265 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3266 return Error(getParser().getTok().getLoc(), 3267 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3268 "architectures"); 3269 } 3270 3271 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3272 3273 std::string ISAVersionStringFromSTI; 3274 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3275 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3276 3277 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3278 return Error(getParser().getTok().getLoc(), 3279 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3280 "arguments specified through the command line"); 3281 } 3282 3283 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3284 Lex(); 3285 3286 return false; 3287 } 3288 3289 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3290 const char *AssemblerDirectiveBegin; 3291 const char *AssemblerDirectiveEnd; 3292 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3293 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3294 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3295 HSAMD::V3::AssemblerDirectiveEnd) 3296 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3297 HSAMD::AssemblerDirectiveEnd); 3298 3299 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3300 return Error(getParser().getTok().getLoc(), 3301 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3302 "not available on non-amdhsa OSes")).str()); 3303 } 3304 3305 std::string HSAMetadataString; 3306 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3307 HSAMetadataString)) 3308 return true; 3309 3310 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3311 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3312 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3313 } else { 3314 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3315 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3316 } 3317 3318 return false; 3319 } 3320 3321 /// Common code to parse out a block of text (typically YAML) between start and 3322 /// end directives. 3323 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3324 const char *AssemblerDirectiveEnd, 3325 std::string &CollectString) { 3326 3327 raw_string_ostream CollectStream(CollectString); 3328 3329 getLexer().setSkipSpace(false); 3330 3331 bool FoundEnd = false; 3332 while (!getLexer().is(AsmToken::Eof)) { 3333 while (getLexer().is(AsmToken::Space)) { 3334 CollectStream << getLexer().getTok().getString(); 3335 Lex(); 3336 } 3337 3338 if (getLexer().is(AsmToken::Identifier)) { 3339 StringRef ID = getLexer().getTok().getIdentifier(); 3340 if (ID == AssemblerDirectiveEnd) { 3341 Lex(); 3342 FoundEnd = true; 3343 break; 3344 } 3345 } 3346 3347 CollectStream << Parser.parseStringToEndOfStatement() 3348 << getContext().getAsmInfo()->getSeparatorString(); 3349 3350 Parser.eatToEndOfStatement(); 3351 } 3352 3353 getLexer().setSkipSpace(true); 3354 3355 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3356 return TokError(Twine("expected directive ") + 3357 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3358 } 3359 3360 CollectStream.flush(); 3361 return false; 3362 } 3363 3364 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3365 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3366 std::string String; 3367 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3368 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3369 return true; 3370 3371 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3372 if (!PALMetadata->setFromString(String)) 3373 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3374 return false; 3375 } 3376 3377 /// Parse the assembler directive for old linear-format PAL metadata. 3378 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3379 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3380 return Error(getParser().getTok().getLoc(), 3381 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3382 "not available on non-amdpal OSes")).str()); 3383 } 3384 3385 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3386 PALMetadata->setLegacy(); 3387 for (;;) { 3388 uint32_t Key, Value; 3389 if (ParseAsAbsoluteExpression(Key)) { 3390 return TokError(Twine("invalid value in ") + 3391 Twine(PALMD::AssemblerDirective)); 3392 } 3393 if (getLexer().isNot(AsmToken::Comma)) { 3394 return TokError(Twine("expected an even number of values in ") + 3395 Twine(PALMD::AssemblerDirective)); 3396 } 3397 Lex(); 3398 if (ParseAsAbsoluteExpression(Value)) { 3399 return TokError(Twine("invalid value in ") + 3400 Twine(PALMD::AssemblerDirective)); 3401 } 3402 PALMetadata->setRegister(Key, Value); 3403 if (getLexer().isNot(AsmToken::Comma)) 3404 break; 3405 Lex(); 3406 } 3407 return false; 3408 } 3409 3410 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3411 StringRef IDVal = DirectiveID.getString(); 3412 3413 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3414 if (IDVal == ".amdgcn_target") 3415 return ParseDirectiveAMDGCNTarget(); 3416 3417 if (IDVal == ".amdhsa_kernel") 3418 return ParseDirectiveAMDHSAKernel(); 3419 3420 // TODO: Restructure/combine with PAL metadata directive. 3421 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3422 return ParseDirectiveHSAMetadata(); 3423 } else { 3424 if (IDVal == ".hsa_code_object_version") 3425 return ParseDirectiveHSACodeObjectVersion(); 3426 3427 if (IDVal == ".hsa_code_object_isa") 3428 return ParseDirectiveHSACodeObjectISA(); 3429 3430 if (IDVal == ".amd_kernel_code_t") 3431 return ParseDirectiveAMDKernelCodeT(); 3432 3433 if (IDVal == ".amdgpu_hsa_kernel") 3434 return ParseDirectiveAMDGPUHsaKernel(); 3435 3436 if (IDVal == ".amd_amdgpu_isa") 3437 return ParseDirectiveISAVersion(); 3438 3439 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3440 return ParseDirectiveHSAMetadata(); 3441 } 3442 3443 if (IDVal == PALMD::AssemblerDirectiveBegin) 3444 return ParseDirectivePALMetadataBegin(); 3445 3446 if (IDVal == PALMD::AssemblerDirective) 3447 return ParseDirectivePALMetadata(); 3448 3449 return true; 3450 } 3451 3452 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3453 unsigned RegNo) const { 3454 3455 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3456 R.isValid(); ++R) { 3457 if (*R == RegNo) 3458 return isGFX9(); 3459 } 3460 3461 switch (RegNo) { 3462 case AMDGPU::TBA: 3463 case AMDGPU::TBA_LO: 3464 case AMDGPU::TBA_HI: 3465 case AMDGPU::TMA: 3466 case AMDGPU::TMA_LO: 3467 case AMDGPU::TMA_HI: 3468 return !isGFX9(); 3469 case AMDGPU::XNACK_MASK: 3470 case AMDGPU::XNACK_MASK_LO: 3471 case AMDGPU::XNACK_MASK_HI: 3472 return !isCI() && !isSI() && hasXNACK(); 3473 default: 3474 break; 3475 } 3476 3477 if (isInlineValue(RegNo)) 3478 return !isCI() && !isSI() && !isVI(); 3479 3480 if (isCI()) 3481 return true; 3482 3483 if (isSI()) { 3484 // No flat_scr 3485 switch (RegNo) { 3486 case AMDGPU::FLAT_SCR: 3487 case AMDGPU::FLAT_SCR_LO: 3488 case AMDGPU::FLAT_SCR_HI: 3489 return false; 3490 default: 3491 return true; 3492 } 3493 } 3494 3495 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3496 // SI/CI have. 3497 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3498 R.isValid(); ++R) { 3499 if (*R == RegNo) 3500 return false; 3501 } 3502 3503 return true; 3504 } 3505 3506 OperandMatchResultTy 3507 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3508 // Try to parse with a custom parser 3509 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3510 3511 // If we successfully parsed the operand or if there as an error parsing, 3512 // we are done. 3513 // 3514 // If we are parsing after we reach EndOfStatement then this means we 3515 // are appending default values to the Operands list. This is only done 3516 // by custom parser, so we shouldn't continue on to the generic parsing. 3517 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3518 getLexer().is(AsmToken::EndOfStatement)) 3519 return ResTy; 3520 3521 ResTy = parseRegOrImm(Operands); 3522 3523 if (ResTy == MatchOperand_Success) 3524 return ResTy; 3525 3526 const auto &Tok = Parser.getTok(); 3527 SMLoc S = Tok.getLoc(); 3528 3529 const MCExpr *Expr = nullptr; 3530 if (!Parser.parseExpression(Expr)) { 3531 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3532 return MatchOperand_Success; 3533 } 3534 3535 // Possibly this is an instruction flag like 'gds'. 3536 if (Tok.getKind() == AsmToken::Identifier) { 3537 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3538 Parser.Lex(); 3539 return MatchOperand_Success; 3540 } 3541 3542 return MatchOperand_NoMatch; 3543 } 3544 3545 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3546 // Clear any forced encodings from the previous instruction. 3547 setForcedEncodingSize(0); 3548 setForcedDPP(false); 3549 setForcedSDWA(false); 3550 3551 if (Name.endswith("_e64")) { 3552 setForcedEncodingSize(64); 3553 return Name.substr(0, Name.size() - 4); 3554 } else if (Name.endswith("_e32")) { 3555 setForcedEncodingSize(32); 3556 return Name.substr(0, Name.size() - 4); 3557 } else if (Name.endswith("_dpp")) { 3558 setForcedDPP(true); 3559 return Name.substr(0, Name.size() - 4); 3560 } else if (Name.endswith("_sdwa")) { 3561 setForcedSDWA(true); 3562 return Name.substr(0, Name.size() - 5); 3563 } 3564 return Name; 3565 } 3566 3567 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3568 StringRef Name, 3569 SMLoc NameLoc, OperandVector &Operands) { 3570 // Add the instruction mnemonic 3571 Name = parseMnemonicSuffix(Name); 3572 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3573 3574 while (!getLexer().is(AsmToken::EndOfStatement)) { 3575 OperandMatchResultTy Res = parseOperand(Operands, Name); 3576 3577 // Eat the comma or space if there is one. 3578 if (getLexer().is(AsmToken::Comma)) 3579 Parser.Lex(); 3580 3581 switch (Res) { 3582 case MatchOperand_Success: break; 3583 case MatchOperand_ParseFail: 3584 Error(getLexer().getLoc(), "failed parsing operand."); 3585 while (!getLexer().is(AsmToken::EndOfStatement)) { 3586 Parser.Lex(); 3587 } 3588 return true; 3589 case MatchOperand_NoMatch: 3590 Error(getLexer().getLoc(), "not a valid operand."); 3591 while (!getLexer().is(AsmToken::EndOfStatement)) { 3592 Parser.Lex(); 3593 } 3594 return true; 3595 } 3596 } 3597 3598 return false; 3599 } 3600 3601 //===----------------------------------------------------------------------===// 3602 // Utility functions 3603 //===----------------------------------------------------------------------===// 3604 3605 OperandMatchResultTy 3606 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3607 switch(getLexer().getKind()) { 3608 default: return MatchOperand_NoMatch; 3609 case AsmToken::Identifier: { 3610 StringRef Name = Parser.getTok().getString(); 3611 if (!Name.equals(Prefix)) { 3612 return MatchOperand_NoMatch; 3613 } 3614 3615 Parser.Lex(); 3616 if (getLexer().isNot(AsmToken::Colon)) 3617 return MatchOperand_ParseFail; 3618 3619 Parser.Lex(); 3620 3621 bool IsMinus = false; 3622 if (getLexer().getKind() == AsmToken::Minus) { 3623 Parser.Lex(); 3624 IsMinus = true; 3625 } 3626 3627 if (getLexer().isNot(AsmToken::Integer)) 3628 return MatchOperand_ParseFail; 3629 3630 if (getParser().parseAbsoluteExpression(Int)) 3631 return MatchOperand_ParseFail; 3632 3633 if (IsMinus) 3634 Int = -Int; 3635 break; 3636 } 3637 } 3638 return MatchOperand_Success; 3639 } 3640 3641 OperandMatchResultTy 3642 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3643 AMDGPUOperand::ImmTy ImmTy, 3644 bool (*ConvertResult)(int64_t&)) { 3645 SMLoc S = Parser.getTok().getLoc(); 3646 int64_t Value = 0; 3647 3648 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3649 if (Res != MatchOperand_Success) 3650 return Res; 3651 3652 if (ConvertResult && !ConvertResult(Value)) { 3653 return MatchOperand_ParseFail; 3654 } 3655 3656 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3657 return MatchOperand_Success; 3658 } 3659 3660 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3661 const char *Prefix, 3662 OperandVector &Operands, 3663 AMDGPUOperand::ImmTy ImmTy, 3664 bool (*ConvertResult)(int64_t&)) { 3665 StringRef Name = Parser.getTok().getString(); 3666 if (!Name.equals(Prefix)) 3667 return MatchOperand_NoMatch; 3668 3669 Parser.Lex(); 3670 if (getLexer().isNot(AsmToken::Colon)) 3671 return MatchOperand_ParseFail; 3672 3673 Parser.Lex(); 3674 if (getLexer().isNot(AsmToken::LBrac)) 3675 return MatchOperand_ParseFail; 3676 Parser.Lex(); 3677 3678 unsigned Val = 0; 3679 SMLoc S = Parser.getTok().getLoc(); 3680 3681 // FIXME: How to verify the number of elements matches the number of src 3682 // operands? 3683 for (int I = 0; I < 4; ++I) { 3684 if (I != 0) { 3685 if (getLexer().is(AsmToken::RBrac)) 3686 break; 3687 3688 if (getLexer().isNot(AsmToken::Comma)) 3689 return MatchOperand_ParseFail; 3690 Parser.Lex(); 3691 } 3692 3693 if (getLexer().isNot(AsmToken::Integer)) 3694 return MatchOperand_ParseFail; 3695 3696 int64_t Op; 3697 if (getParser().parseAbsoluteExpression(Op)) 3698 return MatchOperand_ParseFail; 3699 3700 if (Op != 0 && Op != 1) 3701 return MatchOperand_ParseFail; 3702 Val |= (Op << I); 3703 } 3704 3705 Parser.Lex(); 3706 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3707 return MatchOperand_Success; 3708 } 3709 3710 OperandMatchResultTy 3711 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3712 AMDGPUOperand::ImmTy ImmTy) { 3713 int64_t Bit = 0; 3714 SMLoc S = Parser.getTok().getLoc(); 3715 3716 // We are at the end of the statement, and this is a default argument, so 3717 // use a default value. 3718 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3719 switch(getLexer().getKind()) { 3720 case AsmToken::Identifier: { 3721 StringRef Tok = Parser.getTok().getString(); 3722 if (Tok == Name) { 3723 if (Tok == "r128" && isGFX9()) 3724 Error(S, "r128 modifier is not supported on this GPU"); 3725 if (Tok == "a16" && !isGFX9()) 3726 Error(S, "a16 modifier is not supported on this GPU"); 3727 Bit = 1; 3728 Parser.Lex(); 3729 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3730 Bit = 0; 3731 Parser.Lex(); 3732 } else { 3733 return MatchOperand_NoMatch; 3734 } 3735 break; 3736 } 3737 default: 3738 return MatchOperand_NoMatch; 3739 } 3740 } 3741 3742 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3743 return MatchOperand_Success; 3744 } 3745 3746 static void addOptionalImmOperand( 3747 MCInst& Inst, const OperandVector& Operands, 3748 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3749 AMDGPUOperand::ImmTy ImmT, 3750 int64_t Default = 0) { 3751 auto i = OptionalIdx.find(ImmT); 3752 if (i != OptionalIdx.end()) { 3753 unsigned Idx = i->second; 3754 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3755 } else { 3756 Inst.addOperand(MCOperand::createImm(Default)); 3757 } 3758 } 3759 3760 OperandMatchResultTy 3761 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3762 if (getLexer().isNot(AsmToken::Identifier)) { 3763 return MatchOperand_NoMatch; 3764 } 3765 StringRef Tok = Parser.getTok().getString(); 3766 if (Tok != Prefix) { 3767 return MatchOperand_NoMatch; 3768 } 3769 3770 Parser.Lex(); 3771 if (getLexer().isNot(AsmToken::Colon)) { 3772 return MatchOperand_ParseFail; 3773 } 3774 3775 Parser.Lex(); 3776 if (getLexer().isNot(AsmToken::Identifier)) { 3777 return MatchOperand_ParseFail; 3778 } 3779 3780 Value = Parser.getTok().getString(); 3781 return MatchOperand_Success; 3782 } 3783 3784 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3785 // values to live in a joint format operand in the MCInst encoding. 3786 OperandMatchResultTy 3787 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3788 SMLoc S = Parser.getTok().getLoc(); 3789 int64_t Dfmt = 0, Nfmt = 0; 3790 // dfmt and nfmt can appear in either order, and each is optional. 3791 bool GotDfmt = false, GotNfmt = false; 3792 while (!GotDfmt || !GotNfmt) { 3793 if (!GotDfmt) { 3794 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3795 if (Res != MatchOperand_NoMatch) { 3796 if (Res != MatchOperand_Success) 3797 return Res; 3798 if (Dfmt >= 16) { 3799 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3800 return MatchOperand_ParseFail; 3801 } 3802 GotDfmt = true; 3803 Parser.Lex(); 3804 continue; 3805 } 3806 } 3807 if (!GotNfmt) { 3808 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3809 if (Res != MatchOperand_NoMatch) { 3810 if (Res != MatchOperand_Success) 3811 return Res; 3812 if (Nfmt >= 8) { 3813 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3814 return MatchOperand_ParseFail; 3815 } 3816 GotNfmt = true; 3817 Parser.Lex(); 3818 continue; 3819 } 3820 } 3821 break; 3822 } 3823 if (!GotDfmt && !GotNfmt) 3824 return MatchOperand_NoMatch; 3825 auto Format = Dfmt | Nfmt << 4; 3826 Operands.push_back( 3827 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3828 return MatchOperand_Success; 3829 } 3830 3831 //===----------------------------------------------------------------------===// 3832 // ds 3833 //===----------------------------------------------------------------------===// 3834 3835 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3836 const OperandVector &Operands) { 3837 OptionalImmIndexMap OptionalIdx; 3838 3839 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3840 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3841 3842 // Add the register arguments 3843 if (Op.isReg()) { 3844 Op.addRegOperands(Inst, 1); 3845 continue; 3846 } 3847 3848 // Handle optional arguments 3849 OptionalIdx[Op.getImmTy()] = i; 3850 } 3851 3852 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3853 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3854 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3855 3856 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3857 } 3858 3859 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3860 bool IsGdsHardcoded) { 3861 OptionalImmIndexMap OptionalIdx; 3862 3863 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3864 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3865 3866 // Add the register arguments 3867 if (Op.isReg()) { 3868 Op.addRegOperands(Inst, 1); 3869 continue; 3870 } 3871 3872 if (Op.isToken() && Op.getToken() == "gds") { 3873 IsGdsHardcoded = true; 3874 continue; 3875 } 3876 3877 // Handle optional arguments 3878 OptionalIdx[Op.getImmTy()] = i; 3879 } 3880 3881 AMDGPUOperand::ImmTy OffsetType = 3882 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3883 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3884 AMDGPUOperand::ImmTyOffset; 3885 3886 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3887 3888 if (!IsGdsHardcoded) { 3889 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3890 } 3891 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3892 } 3893 3894 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3895 OptionalImmIndexMap OptionalIdx; 3896 3897 unsigned OperandIdx[4]; 3898 unsigned EnMask = 0; 3899 int SrcIdx = 0; 3900 3901 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3902 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3903 3904 // Add the register arguments 3905 if (Op.isReg()) { 3906 assert(SrcIdx < 4); 3907 OperandIdx[SrcIdx] = Inst.size(); 3908 Op.addRegOperands(Inst, 1); 3909 ++SrcIdx; 3910 continue; 3911 } 3912 3913 if (Op.isOff()) { 3914 assert(SrcIdx < 4); 3915 OperandIdx[SrcIdx] = Inst.size(); 3916 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3917 ++SrcIdx; 3918 continue; 3919 } 3920 3921 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3922 Op.addImmOperands(Inst, 1); 3923 continue; 3924 } 3925 3926 if (Op.isToken() && Op.getToken() == "done") 3927 continue; 3928 3929 // Handle optional arguments 3930 OptionalIdx[Op.getImmTy()] = i; 3931 } 3932 3933 assert(SrcIdx == 4); 3934 3935 bool Compr = false; 3936 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3937 Compr = true; 3938 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3939 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3940 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3941 } 3942 3943 for (auto i = 0; i < SrcIdx; ++i) { 3944 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3945 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3946 } 3947 } 3948 3949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3951 3952 Inst.addOperand(MCOperand::createImm(EnMask)); 3953 } 3954 3955 //===----------------------------------------------------------------------===// 3956 // s_waitcnt 3957 //===----------------------------------------------------------------------===// 3958 3959 static bool 3960 encodeCnt( 3961 const AMDGPU::IsaVersion ISA, 3962 int64_t &IntVal, 3963 int64_t CntVal, 3964 bool Saturate, 3965 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3966 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3967 { 3968 bool Failed = false; 3969 3970 IntVal = encode(ISA, IntVal, CntVal); 3971 if (CntVal != decode(ISA, IntVal)) { 3972 if (Saturate) { 3973 IntVal = encode(ISA, IntVal, -1); 3974 } else { 3975 Failed = true; 3976 } 3977 } 3978 return Failed; 3979 } 3980 3981 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3982 StringRef CntName = Parser.getTok().getString(); 3983 int64_t CntVal; 3984 3985 Parser.Lex(); 3986 if (getLexer().isNot(AsmToken::LParen)) 3987 return true; 3988 3989 Parser.Lex(); 3990 if (getLexer().isNot(AsmToken::Integer)) 3991 return true; 3992 3993 SMLoc ValLoc = Parser.getTok().getLoc(); 3994 if (getParser().parseAbsoluteExpression(CntVal)) 3995 return true; 3996 3997 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3998 3999 bool Failed = true; 4000 bool Sat = CntName.endswith("_sat"); 4001 4002 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4003 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4004 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4005 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4006 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4007 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4008 } 4009 4010 if (Failed) { 4011 Error(ValLoc, "too large value for " + CntName); 4012 return true; 4013 } 4014 4015 if (getLexer().isNot(AsmToken::RParen)) { 4016 return true; 4017 } 4018 4019 Parser.Lex(); 4020 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4021 const AsmToken NextToken = getLexer().peekTok(); 4022 if (NextToken.is(AsmToken::Identifier)) { 4023 Parser.Lex(); 4024 } 4025 } 4026 4027 return false; 4028 } 4029 4030 OperandMatchResultTy 4031 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4032 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4033 int64_t Waitcnt = getWaitcntBitMask(ISA); 4034 SMLoc S = Parser.getTok().getLoc(); 4035 4036 switch(getLexer().getKind()) { 4037 default: return MatchOperand_ParseFail; 4038 case AsmToken::Integer: 4039 // The operand can be an integer value. 4040 if (getParser().parseAbsoluteExpression(Waitcnt)) 4041 return MatchOperand_ParseFail; 4042 break; 4043 4044 case AsmToken::Identifier: 4045 do { 4046 if (parseCnt(Waitcnt)) 4047 return MatchOperand_ParseFail; 4048 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4049 break; 4050 } 4051 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4052 return MatchOperand_Success; 4053 } 4054 4055 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4056 int64_t &Width) { 4057 using namespace llvm::AMDGPU::Hwreg; 4058 4059 if (Parser.getTok().getString() != "hwreg") 4060 return true; 4061 Parser.Lex(); 4062 4063 if (getLexer().isNot(AsmToken::LParen)) 4064 return true; 4065 Parser.Lex(); 4066 4067 if (getLexer().is(AsmToken::Identifier)) { 4068 HwReg.IsSymbolic = true; 4069 HwReg.Id = ID_UNKNOWN_; 4070 const StringRef tok = Parser.getTok().getString(); 4071 int Last = ID_SYMBOLIC_LAST_; 4072 if (isSI() || isCI() || isVI()) 4073 Last = ID_SYMBOLIC_FIRST_GFX9_; 4074 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4075 if (tok == IdSymbolic[i]) { 4076 HwReg.Id = i; 4077 break; 4078 } 4079 } 4080 Parser.Lex(); 4081 } else { 4082 HwReg.IsSymbolic = false; 4083 if (getLexer().isNot(AsmToken::Integer)) 4084 return true; 4085 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4086 return true; 4087 } 4088 4089 if (getLexer().is(AsmToken::RParen)) { 4090 Parser.Lex(); 4091 return false; 4092 } 4093 4094 // optional params 4095 if (getLexer().isNot(AsmToken::Comma)) 4096 return true; 4097 Parser.Lex(); 4098 4099 if (getLexer().isNot(AsmToken::Integer)) 4100 return true; 4101 if (getParser().parseAbsoluteExpression(Offset)) 4102 return true; 4103 4104 if (getLexer().isNot(AsmToken::Comma)) 4105 return true; 4106 Parser.Lex(); 4107 4108 if (getLexer().isNot(AsmToken::Integer)) 4109 return true; 4110 if (getParser().parseAbsoluteExpression(Width)) 4111 return true; 4112 4113 if (getLexer().isNot(AsmToken::RParen)) 4114 return true; 4115 Parser.Lex(); 4116 4117 return false; 4118 } 4119 4120 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4121 using namespace llvm::AMDGPU::Hwreg; 4122 4123 int64_t Imm16Val = 0; 4124 SMLoc S = Parser.getTok().getLoc(); 4125 4126 switch(getLexer().getKind()) { 4127 default: return MatchOperand_NoMatch; 4128 case AsmToken::Integer: 4129 // The operand can be an integer value. 4130 if (getParser().parseAbsoluteExpression(Imm16Val)) 4131 return MatchOperand_NoMatch; 4132 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4133 Error(S, "invalid immediate: only 16-bit values are legal"); 4134 // Do not return error code, but create an imm operand anyway and proceed 4135 // to the next operand, if any. That avoids unneccessary error messages. 4136 } 4137 break; 4138 4139 case AsmToken::Identifier: { 4140 OperandInfoTy HwReg(ID_UNKNOWN_); 4141 int64_t Offset = OFFSET_DEFAULT_; 4142 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4143 if (parseHwregConstruct(HwReg, Offset, Width)) 4144 return MatchOperand_ParseFail; 4145 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4146 if (HwReg.IsSymbolic) 4147 Error(S, "invalid symbolic name of hardware register"); 4148 else 4149 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4150 } 4151 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4152 Error(S, "invalid bit offset: only 5-bit values are legal"); 4153 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4154 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4155 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4156 } 4157 break; 4158 } 4159 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4160 return MatchOperand_Success; 4161 } 4162 4163 bool AMDGPUOperand::isSWaitCnt() const { 4164 return isImm(); 4165 } 4166 4167 bool AMDGPUOperand::isHwreg() const { 4168 return isImmTy(ImmTyHwreg); 4169 } 4170 4171 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4172 using namespace llvm::AMDGPU::SendMsg; 4173 4174 if (Parser.getTok().getString() != "sendmsg") 4175 return true; 4176 Parser.Lex(); 4177 4178 if (getLexer().isNot(AsmToken::LParen)) 4179 return true; 4180 Parser.Lex(); 4181 4182 if (getLexer().is(AsmToken::Identifier)) { 4183 Msg.IsSymbolic = true; 4184 Msg.Id = ID_UNKNOWN_; 4185 const std::string tok = Parser.getTok().getString(); 4186 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4187 switch(i) { 4188 default: continue; // Omit gaps. 4189 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4190 } 4191 if (tok == IdSymbolic[i]) { 4192 Msg.Id = i; 4193 break; 4194 } 4195 } 4196 Parser.Lex(); 4197 } else { 4198 Msg.IsSymbolic = false; 4199 if (getLexer().isNot(AsmToken::Integer)) 4200 return true; 4201 if (getParser().parseAbsoluteExpression(Msg.Id)) 4202 return true; 4203 if (getLexer().is(AsmToken::Integer)) 4204 if (getParser().parseAbsoluteExpression(Msg.Id)) 4205 Msg.Id = ID_UNKNOWN_; 4206 } 4207 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4208 return false; 4209 4210 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4211 if (getLexer().isNot(AsmToken::RParen)) 4212 return true; 4213 Parser.Lex(); 4214 return false; 4215 } 4216 4217 if (getLexer().isNot(AsmToken::Comma)) 4218 return true; 4219 Parser.Lex(); 4220 4221 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4222 Operation.Id = ID_UNKNOWN_; 4223 if (getLexer().is(AsmToken::Identifier)) { 4224 Operation.IsSymbolic = true; 4225 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4226 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4227 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4228 const StringRef Tok = Parser.getTok().getString(); 4229 for (int i = F; i < L; ++i) { 4230 if (Tok == S[i]) { 4231 Operation.Id = i; 4232 break; 4233 } 4234 } 4235 Parser.Lex(); 4236 } else { 4237 Operation.IsSymbolic = false; 4238 if (getLexer().isNot(AsmToken::Integer)) 4239 return true; 4240 if (getParser().parseAbsoluteExpression(Operation.Id)) 4241 return true; 4242 } 4243 4244 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4245 // Stream id is optional. 4246 if (getLexer().is(AsmToken::RParen)) { 4247 Parser.Lex(); 4248 return false; 4249 } 4250 4251 if (getLexer().isNot(AsmToken::Comma)) 4252 return true; 4253 Parser.Lex(); 4254 4255 if (getLexer().isNot(AsmToken::Integer)) 4256 return true; 4257 if (getParser().parseAbsoluteExpression(StreamId)) 4258 return true; 4259 } 4260 4261 if (getLexer().isNot(AsmToken::RParen)) 4262 return true; 4263 Parser.Lex(); 4264 return false; 4265 } 4266 4267 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4268 if (getLexer().getKind() != AsmToken::Identifier) 4269 return MatchOperand_NoMatch; 4270 4271 StringRef Str = Parser.getTok().getString(); 4272 int Slot = StringSwitch<int>(Str) 4273 .Case("p10", 0) 4274 .Case("p20", 1) 4275 .Case("p0", 2) 4276 .Default(-1); 4277 4278 SMLoc S = Parser.getTok().getLoc(); 4279 if (Slot == -1) 4280 return MatchOperand_ParseFail; 4281 4282 Parser.Lex(); 4283 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4284 AMDGPUOperand::ImmTyInterpSlot)); 4285 return MatchOperand_Success; 4286 } 4287 4288 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4289 if (getLexer().getKind() != AsmToken::Identifier) 4290 return MatchOperand_NoMatch; 4291 4292 StringRef Str = Parser.getTok().getString(); 4293 if (!Str.startswith("attr")) 4294 return MatchOperand_NoMatch; 4295 4296 StringRef Chan = Str.take_back(2); 4297 int AttrChan = StringSwitch<int>(Chan) 4298 .Case(".x", 0) 4299 .Case(".y", 1) 4300 .Case(".z", 2) 4301 .Case(".w", 3) 4302 .Default(-1); 4303 if (AttrChan == -1) 4304 return MatchOperand_ParseFail; 4305 4306 Str = Str.drop_back(2).drop_front(4); 4307 4308 uint8_t Attr; 4309 if (Str.getAsInteger(10, Attr)) 4310 return MatchOperand_ParseFail; 4311 4312 SMLoc S = Parser.getTok().getLoc(); 4313 Parser.Lex(); 4314 if (Attr > 63) { 4315 Error(S, "out of bounds attr"); 4316 return MatchOperand_Success; 4317 } 4318 4319 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4320 4321 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4322 AMDGPUOperand::ImmTyInterpAttr)); 4323 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4324 AMDGPUOperand::ImmTyAttrChan)); 4325 return MatchOperand_Success; 4326 } 4327 4328 void AMDGPUAsmParser::errorExpTgt() { 4329 Error(Parser.getTok().getLoc(), "invalid exp target"); 4330 } 4331 4332 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4333 uint8_t &Val) { 4334 if (Str == "null") { 4335 Val = 9; 4336 return MatchOperand_Success; 4337 } 4338 4339 if (Str.startswith("mrt")) { 4340 Str = Str.drop_front(3); 4341 if (Str == "z") { // == mrtz 4342 Val = 8; 4343 return MatchOperand_Success; 4344 } 4345 4346 if (Str.getAsInteger(10, Val)) 4347 return MatchOperand_ParseFail; 4348 4349 if (Val > 7) 4350 errorExpTgt(); 4351 4352 return MatchOperand_Success; 4353 } 4354 4355 if (Str.startswith("pos")) { 4356 Str = Str.drop_front(3); 4357 if (Str.getAsInteger(10, Val)) 4358 return MatchOperand_ParseFail; 4359 4360 if (Val > 3) 4361 errorExpTgt(); 4362 4363 Val += 12; 4364 return MatchOperand_Success; 4365 } 4366 4367 if (Str.startswith("param")) { 4368 Str = Str.drop_front(5); 4369 if (Str.getAsInteger(10, Val)) 4370 return MatchOperand_ParseFail; 4371 4372 if (Val >= 32) 4373 errorExpTgt(); 4374 4375 Val += 32; 4376 return MatchOperand_Success; 4377 } 4378 4379 if (Str.startswith("invalid_target_")) { 4380 Str = Str.drop_front(15); 4381 if (Str.getAsInteger(10, Val)) 4382 return MatchOperand_ParseFail; 4383 4384 errorExpTgt(); 4385 return MatchOperand_Success; 4386 } 4387 4388 return MatchOperand_NoMatch; 4389 } 4390 4391 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4392 uint8_t Val; 4393 StringRef Str = Parser.getTok().getString(); 4394 4395 auto Res = parseExpTgtImpl(Str, Val); 4396 if (Res != MatchOperand_Success) 4397 return Res; 4398 4399 SMLoc S = Parser.getTok().getLoc(); 4400 Parser.Lex(); 4401 4402 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4403 AMDGPUOperand::ImmTyExpTgt)); 4404 return MatchOperand_Success; 4405 } 4406 4407 OperandMatchResultTy 4408 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4409 using namespace llvm::AMDGPU::SendMsg; 4410 4411 int64_t Imm16Val = 0; 4412 SMLoc S = Parser.getTok().getLoc(); 4413 4414 switch(getLexer().getKind()) { 4415 default: 4416 return MatchOperand_NoMatch; 4417 case AsmToken::Integer: 4418 // The operand can be an integer value. 4419 if (getParser().parseAbsoluteExpression(Imm16Val)) 4420 return MatchOperand_NoMatch; 4421 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4422 Error(S, "invalid immediate: only 16-bit values are legal"); 4423 // Do not return error code, but create an imm operand anyway and proceed 4424 // to the next operand, if any. That avoids unneccessary error messages. 4425 } 4426 break; 4427 case AsmToken::Identifier: { 4428 OperandInfoTy Msg(ID_UNKNOWN_); 4429 OperandInfoTy Operation(OP_UNKNOWN_); 4430 int64_t StreamId = STREAM_ID_DEFAULT_; 4431 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4432 return MatchOperand_ParseFail; 4433 do { 4434 // Validate and encode message ID. 4435 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4436 || Msg.Id == ID_SYSMSG)) { 4437 if (Msg.IsSymbolic) 4438 Error(S, "invalid/unsupported symbolic name of message"); 4439 else 4440 Error(S, "invalid/unsupported code of message"); 4441 break; 4442 } 4443 Imm16Val = (Msg.Id << ID_SHIFT_); 4444 // Validate and encode operation ID. 4445 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4446 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4447 if (Operation.IsSymbolic) 4448 Error(S, "invalid symbolic name of GS_OP"); 4449 else 4450 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4451 break; 4452 } 4453 if (Operation.Id == OP_GS_NOP 4454 && Msg.Id != ID_GS_DONE) { 4455 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4456 break; 4457 } 4458 Imm16Val |= (Operation.Id << OP_SHIFT_); 4459 } 4460 if (Msg.Id == ID_SYSMSG) { 4461 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4462 if (Operation.IsSymbolic) 4463 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4464 else 4465 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4466 break; 4467 } 4468 Imm16Val |= (Operation.Id << OP_SHIFT_); 4469 } 4470 // Validate and encode stream ID. 4471 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4472 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4473 Error(S, "invalid stream id: only 2-bit values are legal"); 4474 break; 4475 } 4476 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4477 } 4478 } while (false); 4479 } 4480 break; 4481 } 4482 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4483 return MatchOperand_Success; 4484 } 4485 4486 bool AMDGPUOperand::isSendMsg() const { 4487 return isImmTy(ImmTySendMsg); 4488 } 4489 4490 //===----------------------------------------------------------------------===// 4491 // parser helpers 4492 //===----------------------------------------------------------------------===// 4493 4494 bool 4495 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4496 if (getLexer().getKind() == AsmToken::Identifier && 4497 Parser.getTok().getString() == Id) { 4498 Parser.Lex(); 4499 return true; 4500 } 4501 return false; 4502 } 4503 4504 bool 4505 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4506 if (getLexer().getKind() == Kind) { 4507 Parser.Lex(); 4508 return true; 4509 } 4510 return false; 4511 } 4512 4513 bool 4514 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4515 const StringRef ErrMsg) { 4516 if (!trySkipToken(Kind)) { 4517 Error(Parser.getTok().getLoc(), ErrMsg); 4518 return false; 4519 } 4520 return true; 4521 } 4522 4523 bool 4524 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4525 return !getParser().parseAbsoluteExpression(Imm); 4526 } 4527 4528 bool 4529 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4530 SMLoc S = Parser.getTok().getLoc(); 4531 if (getLexer().getKind() == AsmToken::String) { 4532 Val = Parser.getTok().getStringContents(); 4533 Parser.Lex(); 4534 return true; 4535 } else { 4536 Error(S, ErrMsg); 4537 return false; 4538 } 4539 } 4540 4541 //===----------------------------------------------------------------------===// 4542 // swizzle 4543 //===----------------------------------------------------------------------===// 4544 4545 LLVM_READNONE 4546 static unsigned 4547 encodeBitmaskPerm(const unsigned AndMask, 4548 const unsigned OrMask, 4549 const unsigned XorMask) { 4550 using namespace llvm::AMDGPU::Swizzle; 4551 4552 return BITMASK_PERM_ENC | 4553 (AndMask << BITMASK_AND_SHIFT) | 4554 (OrMask << BITMASK_OR_SHIFT) | 4555 (XorMask << BITMASK_XOR_SHIFT); 4556 } 4557 4558 bool 4559 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4560 const unsigned MinVal, 4561 const unsigned MaxVal, 4562 const StringRef ErrMsg) { 4563 for (unsigned i = 0; i < OpNum; ++i) { 4564 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4565 return false; 4566 } 4567 SMLoc ExprLoc = Parser.getTok().getLoc(); 4568 if (!parseExpr(Op[i])) { 4569 return false; 4570 } 4571 if (Op[i] < MinVal || Op[i] > MaxVal) { 4572 Error(ExprLoc, ErrMsg); 4573 return false; 4574 } 4575 } 4576 4577 return true; 4578 } 4579 4580 bool 4581 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4582 using namespace llvm::AMDGPU::Swizzle; 4583 4584 int64_t Lane[LANE_NUM]; 4585 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4586 "expected a 2-bit lane id")) { 4587 Imm = QUAD_PERM_ENC; 4588 for (unsigned I = 0; I < LANE_NUM; ++I) { 4589 Imm |= Lane[I] << (LANE_SHIFT * I); 4590 } 4591 return true; 4592 } 4593 return false; 4594 } 4595 4596 bool 4597 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4598 using namespace llvm::AMDGPU::Swizzle; 4599 4600 SMLoc S = Parser.getTok().getLoc(); 4601 int64_t GroupSize; 4602 int64_t LaneIdx; 4603 4604 if (!parseSwizzleOperands(1, &GroupSize, 4605 2, 32, 4606 "group size must be in the interval [2,32]")) { 4607 return false; 4608 } 4609 if (!isPowerOf2_64(GroupSize)) { 4610 Error(S, "group size must be a power of two"); 4611 return false; 4612 } 4613 if (parseSwizzleOperands(1, &LaneIdx, 4614 0, GroupSize - 1, 4615 "lane id must be in the interval [0,group size - 1]")) { 4616 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4617 return true; 4618 } 4619 return false; 4620 } 4621 4622 bool 4623 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4624 using namespace llvm::AMDGPU::Swizzle; 4625 4626 SMLoc S = Parser.getTok().getLoc(); 4627 int64_t GroupSize; 4628 4629 if (!parseSwizzleOperands(1, &GroupSize, 4630 2, 32, "group size must be in the interval [2,32]")) { 4631 return false; 4632 } 4633 if (!isPowerOf2_64(GroupSize)) { 4634 Error(S, "group size must be a power of two"); 4635 return false; 4636 } 4637 4638 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4639 return true; 4640 } 4641 4642 bool 4643 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4644 using namespace llvm::AMDGPU::Swizzle; 4645 4646 SMLoc S = Parser.getTok().getLoc(); 4647 int64_t GroupSize; 4648 4649 if (!parseSwizzleOperands(1, &GroupSize, 4650 1, 16, "group size must be in the interval [1,16]")) { 4651 return false; 4652 } 4653 if (!isPowerOf2_64(GroupSize)) { 4654 Error(S, "group size must be a power of two"); 4655 return false; 4656 } 4657 4658 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4659 return true; 4660 } 4661 4662 bool 4663 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4664 using namespace llvm::AMDGPU::Swizzle; 4665 4666 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4667 return false; 4668 } 4669 4670 StringRef Ctl; 4671 SMLoc StrLoc = Parser.getTok().getLoc(); 4672 if (!parseString(Ctl)) { 4673 return false; 4674 } 4675 if (Ctl.size() != BITMASK_WIDTH) { 4676 Error(StrLoc, "expected a 5-character mask"); 4677 return false; 4678 } 4679 4680 unsigned AndMask = 0; 4681 unsigned OrMask = 0; 4682 unsigned XorMask = 0; 4683 4684 for (size_t i = 0; i < Ctl.size(); ++i) { 4685 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4686 switch(Ctl[i]) { 4687 default: 4688 Error(StrLoc, "invalid mask"); 4689 return false; 4690 case '0': 4691 break; 4692 case '1': 4693 OrMask |= Mask; 4694 break; 4695 case 'p': 4696 AndMask |= Mask; 4697 break; 4698 case 'i': 4699 AndMask |= Mask; 4700 XorMask |= Mask; 4701 break; 4702 } 4703 } 4704 4705 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4706 return true; 4707 } 4708 4709 bool 4710 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4711 4712 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4713 4714 if (!parseExpr(Imm)) { 4715 return false; 4716 } 4717 if (!isUInt<16>(Imm)) { 4718 Error(OffsetLoc, "expected a 16-bit offset"); 4719 return false; 4720 } 4721 return true; 4722 } 4723 4724 bool 4725 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4726 using namespace llvm::AMDGPU::Swizzle; 4727 4728 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4729 4730 SMLoc ModeLoc = Parser.getTok().getLoc(); 4731 bool Ok = false; 4732 4733 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4734 Ok = parseSwizzleQuadPerm(Imm); 4735 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4736 Ok = parseSwizzleBitmaskPerm(Imm); 4737 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4738 Ok = parseSwizzleBroadcast(Imm); 4739 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4740 Ok = parseSwizzleSwap(Imm); 4741 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4742 Ok = parseSwizzleReverse(Imm); 4743 } else { 4744 Error(ModeLoc, "expected a swizzle mode"); 4745 } 4746 4747 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4748 } 4749 4750 return false; 4751 } 4752 4753 OperandMatchResultTy 4754 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4755 SMLoc S = Parser.getTok().getLoc(); 4756 int64_t Imm = 0; 4757 4758 if (trySkipId("offset")) { 4759 4760 bool Ok = false; 4761 if (skipToken(AsmToken::Colon, "expected a colon")) { 4762 if (trySkipId("swizzle")) { 4763 Ok = parseSwizzleMacro(Imm); 4764 } else { 4765 Ok = parseSwizzleOffset(Imm); 4766 } 4767 } 4768 4769 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4770 4771 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4772 } else { 4773 // Swizzle "offset" operand is optional. 4774 // If it is omitted, try parsing other optional operands. 4775 return parseOptionalOpr(Operands); 4776 } 4777 } 4778 4779 bool 4780 AMDGPUOperand::isSwizzle() const { 4781 return isImmTy(ImmTySwizzle); 4782 } 4783 4784 //===----------------------------------------------------------------------===// 4785 // VGPR Index Mode 4786 //===----------------------------------------------------------------------===// 4787 4788 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4789 4790 using namespace llvm::AMDGPU::VGPRIndexMode; 4791 4792 if (trySkipToken(AsmToken::RParen)) { 4793 return OFF; 4794 } 4795 4796 int64_t Imm = 0; 4797 4798 while (true) { 4799 unsigned Mode = 0; 4800 SMLoc S = Parser.getTok().getLoc(); 4801 4802 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4803 if (trySkipId(IdSymbolic[ModeId])) { 4804 Mode = 1 << ModeId; 4805 break; 4806 } 4807 } 4808 4809 if (Mode == 0) { 4810 Error(S, (Imm == 0)? 4811 "expected a VGPR index mode or a closing parenthesis" : 4812 "expected a VGPR index mode"); 4813 break; 4814 } 4815 4816 if (Imm & Mode) { 4817 Error(S, "duplicate VGPR index mode"); 4818 break; 4819 } 4820 Imm |= Mode; 4821 4822 if (trySkipToken(AsmToken::RParen)) 4823 break; 4824 if (!skipToken(AsmToken::Comma, 4825 "expected a comma or a closing parenthesis")) 4826 break; 4827 } 4828 4829 return Imm; 4830 } 4831 4832 OperandMatchResultTy 4833 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4834 4835 int64_t Imm = 0; 4836 SMLoc S = Parser.getTok().getLoc(); 4837 4838 if (getLexer().getKind() == AsmToken::Identifier && 4839 Parser.getTok().getString() == "gpr_idx" && 4840 getLexer().peekTok().is(AsmToken::LParen)) { 4841 4842 Parser.Lex(); 4843 Parser.Lex(); 4844 4845 // If parse failed, trigger an error but do not return error code 4846 // to avoid excessive error messages. 4847 Imm = parseGPRIdxMacro(); 4848 4849 } else { 4850 if (getParser().parseAbsoluteExpression(Imm)) 4851 return MatchOperand_NoMatch; 4852 if (Imm < 0 || !isUInt<4>(Imm)) { 4853 Error(S, "invalid immediate: only 4-bit values are legal"); 4854 } 4855 } 4856 4857 Operands.push_back( 4858 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 4859 return MatchOperand_Success; 4860 } 4861 4862 bool AMDGPUOperand::isGPRIdxMode() const { 4863 return isImmTy(ImmTyGprIdxMode); 4864 } 4865 4866 //===----------------------------------------------------------------------===// 4867 // sopp branch targets 4868 //===----------------------------------------------------------------------===// 4869 4870 OperandMatchResultTy 4871 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4872 SMLoc S = Parser.getTok().getLoc(); 4873 4874 switch (getLexer().getKind()) { 4875 default: return MatchOperand_ParseFail; 4876 case AsmToken::Integer: { 4877 int64_t Imm; 4878 if (getParser().parseAbsoluteExpression(Imm)) 4879 return MatchOperand_ParseFail; 4880 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4881 return MatchOperand_Success; 4882 } 4883 4884 case AsmToken::Identifier: 4885 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4886 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4887 Parser.getTok().getString()), getContext()), S)); 4888 Parser.Lex(); 4889 return MatchOperand_Success; 4890 } 4891 } 4892 4893 //===----------------------------------------------------------------------===// 4894 // mubuf 4895 //===----------------------------------------------------------------------===// 4896 4897 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4898 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4899 } 4900 4901 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4902 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4903 } 4904 4905 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4906 const OperandVector &Operands, 4907 bool IsAtomic, 4908 bool IsAtomicReturn, 4909 bool IsLds) { 4910 bool IsLdsOpcode = IsLds; 4911 bool HasLdsModifier = false; 4912 OptionalImmIndexMap OptionalIdx; 4913 assert(IsAtomicReturn ? IsAtomic : true); 4914 unsigned FirstOperandIdx = 1; 4915 4916 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 4917 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4918 4919 // Add the register arguments 4920 if (Op.isReg()) { 4921 Op.addRegOperands(Inst, 1); 4922 // Insert a tied src for atomic return dst. 4923 // This cannot be postponed as subsequent calls to 4924 // addImmOperands rely on correct number of MC operands. 4925 if (IsAtomicReturn && i == FirstOperandIdx) 4926 Op.addRegOperands(Inst, 1); 4927 continue; 4928 } 4929 4930 // Handle the case where soffset is an immediate 4931 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4932 Op.addImmOperands(Inst, 1); 4933 continue; 4934 } 4935 4936 HasLdsModifier = Op.isLDS(); 4937 4938 // Handle tokens like 'offen' which are sometimes hard-coded into the 4939 // asm string. There are no MCInst operands for these. 4940 if (Op.isToken()) { 4941 continue; 4942 } 4943 assert(Op.isImm()); 4944 4945 // Handle optional arguments 4946 OptionalIdx[Op.getImmTy()] = i; 4947 } 4948 4949 // This is a workaround for an llvm quirk which may result in an 4950 // incorrect instruction selection. Lds and non-lds versions of 4951 // MUBUF instructions are identical except that lds versions 4952 // have mandatory 'lds' modifier. However this modifier follows 4953 // optional modifiers and llvm asm matcher regards this 'lds' 4954 // modifier as an optional one. As a result, an lds version 4955 // of opcode may be selected even if it has no 'lds' modifier. 4956 if (IsLdsOpcode && !HasLdsModifier) { 4957 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4958 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4959 Inst.setOpcode(NoLdsOpcode); 4960 IsLdsOpcode = false; 4961 } 4962 } 4963 4964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4965 if (!IsAtomic) { // glc is hard-coded. 4966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4967 } 4968 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4969 4970 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4971 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4972 } 4973 } 4974 4975 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4976 OptionalImmIndexMap OptionalIdx; 4977 4978 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4979 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4980 4981 // Add the register arguments 4982 if (Op.isReg()) { 4983 Op.addRegOperands(Inst, 1); 4984 continue; 4985 } 4986 4987 // Handle the case where soffset is an immediate 4988 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4989 Op.addImmOperands(Inst, 1); 4990 continue; 4991 } 4992 4993 // Handle tokens like 'offen' which are sometimes hard-coded into the 4994 // asm string. There are no MCInst operands for these. 4995 if (Op.isToken()) { 4996 continue; 4997 } 4998 assert(Op.isImm()); 4999 5000 // Handle optional arguments 5001 OptionalIdx[Op.getImmTy()] = i; 5002 } 5003 5004 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5005 AMDGPUOperand::ImmTyOffset); 5006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5009 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5010 } 5011 5012 //===----------------------------------------------------------------------===// 5013 // mimg 5014 //===----------------------------------------------------------------------===// 5015 5016 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5017 bool IsAtomic) { 5018 unsigned I = 1; 5019 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5020 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5021 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5022 } 5023 5024 if (IsAtomic) { 5025 // Add src, same as dst 5026 assert(Desc.getNumDefs() == 1); 5027 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5028 } 5029 5030 OptionalImmIndexMap OptionalIdx; 5031 5032 for (unsigned E = Operands.size(); I != E; ++I) { 5033 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5034 5035 // Add the register arguments 5036 if (Op.isReg()) { 5037 Op.addRegOperands(Inst, 1); 5038 } else if (Op.isImmModifier()) { 5039 OptionalIdx[Op.getImmTy()] = I; 5040 } else { 5041 llvm_unreachable("unexpected operand type"); 5042 } 5043 } 5044 5045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5053 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5054 } 5055 5056 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5057 cvtMIMG(Inst, Operands, true); 5058 } 5059 5060 //===----------------------------------------------------------------------===// 5061 // smrd 5062 //===----------------------------------------------------------------------===// 5063 5064 bool AMDGPUOperand::isSMRDOffset8() const { 5065 return isImm() && isUInt<8>(getImm()); 5066 } 5067 5068 bool AMDGPUOperand::isSMRDOffset20() const { 5069 return isImm() && isUInt<20>(getImm()); 5070 } 5071 5072 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5073 // 32-bit literals are only supported on CI and we only want to use them 5074 // when the offset is > 8-bits. 5075 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5076 } 5077 5078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5079 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5080 } 5081 5082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5083 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5084 } 5085 5086 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5087 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5088 } 5089 5090 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5091 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5092 } 5093 5094 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5095 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5096 } 5097 5098 //===----------------------------------------------------------------------===// 5099 // vop3 5100 //===----------------------------------------------------------------------===// 5101 5102 static bool ConvertOmodMul(int64_t &Mul) { 5103 if (Mul != 1 && Mul != 2 && Mul != 4) 5104 return false; 5105 5106 Mul >>= 1; 5107 return true; 5108 } 5109 5110 static bool ConvertOmodDiv(int64_t &Div) { 5111 if (Div == 1) { 5112 Div = 0; 5113 return true; 5114 } 5115 5116 if (Div == 2) { 5117 Div = 3; 5118 return true; 5119 } 5120 5121 return false; 5122 } 5123 5124 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5125 if (BoundCtrl == 0) { 5126 BoundCtrl = 1; 5127 return true; 5128 } 5129 5130 if (BoundCtrl == -1) { 5131 BoundCtrl = 0; 5132 return true; 5133 } 5134 5135 return false; 5136 } 5137 5138 // Note: the order in this table matches the order of operands in AsmString. 5139 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5140 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5141 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5142 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5143 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5144 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5145 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5146 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5147 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5148 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5149 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5150 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5151 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5152 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5153 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5154 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5155 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5156 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5157 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5158 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5159 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5160 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5161 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5162 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5163 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5164 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5165 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5166 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5167 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5168 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5169 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5170 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5171 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5172 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5173 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5174 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5175 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5176 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5177 }; 5178 5179 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5180 unsigned size = Operands.size(); 5181 assert(size > 0); 5182 5183 OperandMatchResultTy res = parseOptionalOpr(Operands); 5184 5185 // This is a hack to enable hardcoded mandatory operands which follow 5186 // optional operands. 5187 // 5188 // Current design assumes that all operands after the first optional operand 5189 // are also optional. However implementation of some instructions violates 5190 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5191 // 5192 // To alleviate this problem, we have to (implicitly) parse extra operands 5193 // to make sure autogenerated parser of custom operands never hit hardcoded 5194 // mandatory operands. 5195 5196 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5197 5198 // We have parsed the first optional operand. 5199 // Parse as many operands as necessary to skip all mandatory operands. 5200 5201 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5202 if (res != MatchOperand_Success || 5203 getLexer().is(AsmToken::EndOfStatement)) break; 5204 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5205 res = parseOptionalOpr(Operands); 5206 } 5207 } 5208 5209 return res; 5210 } 5211 5212 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5213 OperandMatchResultTy res; 5214 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5215 // try to parse any optional operand here 5216 if (Op.IsBit) { 5217 res = parseNamedBit(Op.Name, Operands, Op.Type); 5218 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5219 res = parseOModOperand(Operands); 5220 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5221 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5222 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5223 res = parseSDWASel(Operands, Op.Name, Op.Type); 5224 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5225 res = parseSDWADstUnused(Operands); 5226 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5227 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5228 Op.Type == AMDGPUOperand::ImmTyNegLo || 5229 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5230 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5231 Op.ConvertResult); 5232 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5233 res = parseDfmtNfmt(Operands); 5234 } else { 5235 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5236 } 5237 if (res != MatchOperand_NoMatch) { 5238 return res; 5239 } 5240 } 5241 return MatchOperand_NoMatch; 5242 } 5243 5244 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5245 StringRef Name = Parser.getTok().getString(); 5246 if (Name == "mul") { 5247 return parseIntWithPrefix("mul", Operands, 5248 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5249 } 5250 5251 if (Name == "div") { 5252 return parseIntWithPrefix("div", Operands, 5253 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5254 } 5255 5256 return MatchOperand_NoMatch; 5257 } 5258 5259 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5260 cvtVOP3P(Inst, Operands); 5261 5262 int Opc = Inst.getOpcode(); 5263 5264 int SrcNum; 5265 const int Ops[] = { AMDGPU::OpName::src0, 5266 AMDGPU::OpName::src1, 5267 AMDGPU::OpName::src2 }; 5268 for (SrcNum = 0; 5269 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5270 ++SrcNum); 5271 assert(SrcNum > 0); 5272 5273 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5274 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5275 5276 if ((OpSel & (1 << SrcNum)) != 0) { 5277 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5278 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5279 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5280 } 5281 } 5282 5283 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5284 // 1. This operand is input modifiers 5285 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5286 // 2. This is not last operand 5287 && Desc.NumOperands > (OpNum + 1) 5288 // 3. Next operand is register class 5289 && Desc.OpInfo[OpNum + 1].RegClass != -1 5290 // 4. Next register is not tied to any other operand 5291 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5292 } 5293 5294 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5295 { 5296 OptionalImmIndexMap OptionalIdx; 5297 unsigned Opc = Inst.getOpcode(); 5298 5299 unsigned I = 1; 5300 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5301 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5302 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5303 } 5304 5305 for (unsigned E = Operands.size(); I != E; ++I) { 5306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5307 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5308 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5309 } else if (Op.isInterpSlot() || 5310 Op.isInterpAttr() || 5311 Op.isAttrChan()) { 5312 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5313 } else if (Op.isImmModifier()) { 5314 OptionalIdx[Op.getImmTy()] = I; 5315 } else { 5316 llvm_unreachable("unhandled operand type"); 5317 } 5318 } 5319 5320 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5322 } 5323 5324 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5326 } 5327 5328 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5330 } 5331 } 5332 5333 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5334 OptionalImmIndexMap &OptionalIdx) { 5335 unsigned Opc = Inst.getOpcode(); 5336 5337 unsigned I = 1; 5338 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5339 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5340 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5341 } 5342 5343 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5344 // This instruction has src modifiers 5345 for (unsigned E = Operands.size(); I != E; ++I) { 5346 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5347 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5348 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5349 } else if (Op.isImmModifier()) { 5350 OptionalIdx[Op.getImmTy()] = I; 5351 } else if (Op.isRegOrImm()) { 5352 Op.addRegOrImmOperands(Inst, 1); 5353 } else { 5354 llvm_unreachable("unhandled operand type"); 5355 } 5356 } 5357 } else { 5358 // No src modifiers 5359 for (unsigned E = Operands.size(); I != E; ++I) { 5360 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5361 if (Op.isMod()) { 5362 OptionalIdx[Op.getImmTy()] = I; 5363 } else { 5364 Op.addRegOrImmOperands(Inst, 1); 5365 } 5366 } 5367 } 5368 5369 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5370 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5371 } 5372 5373 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5375 } 5376 5377 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5378 // it has src2 register operand that is tied to dst operand 5379 // we don't allow modifiers for this operand in assembler so src2_modifiers 5380 // should be 0. 5381 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5382 Opc == AMDGPU::V_MAC_F32_e64_vi || 5383 Opc == AMDGPU::V_MAC_F16_e64_vi || 5384 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5385 auto it = Inst.begin(); 5386 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5387 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5388 ++it; 5389 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5390 } 5391 } 5392 5393 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5394 OptionalImmIndexMap OptionalIdx; 5395 cvtVOP3(Inst, Operands, OptionalIdx); 5396 } 5397 5398 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5399 const OperandVector &Operands) { 5400 OptionalImmIndexMap OptIdx; 5401 const int Opc = Inst.getOpcode(); 5402 const MCInstrDesc &Desc = MII.get(Opc); 5403 5404 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5405 5406 cvtVOP3(Inst, Operands, OptIdx); 5407 5408 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5409 assert(!IsPacked); 5410 Inst.addOperand(Inst.getOperand(0)); 5411 } 5412 5413 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5414 // instruction, and then figure out where to actually put the modifiers 5415 5416 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5417 5418 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5419 if (OpSelHiIdx != -1) { 5420 int DefaultVal = IsPacked ? -1 : 0; 5421 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5422 DefaultVal); 5423 } 5424 5425 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5426 if (NegLoIdx != -1) { 5427 assert(IsPacked); 5428 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5429 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5430 } 5431 5432 const int Ops[] = { AMDGPU::OpName::src0, 5433 AMDGPU::OpName::src1, 5434 AMDGPU::OpName::src2 }; 5435 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5436 AMDGPU::OpName::src1_modifiers, 5437 AMDGPU::OpName::src2_modifiers }; 5438 5439 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5440 5441 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5442 unsigned OpSelHi = 0; 5443 unsigned NegLo = 0; 5444 unsigned NegHi = 0; 5445 5446 if (OpSelHiIdx != -1) { 5447 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5448 } 5449 5450 if (NegLoIdx != -1) { 5451 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5452 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5453 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5454 } 5455 5456 for (int J = 0; J < 3; ++J) { 5457 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5458 if (OpIdx == -1) 5459 break; 5460 5461 uint32_t ModVal = 0; 5462 5463 if ((OpSel & (1 << J)) != 0) 5464 ModVal |= SISrcMods::OP_SEL_0; 5465 5466 if ((OpSelHi & (1 << J)) != 0) 5467 ModVal |= SISrcMods::OP_SEL_1; 5468 5469 if ((NegLo & (1 << J)) != 0) 5470 ModVal |= SISrcMods::NEG; 5471 5472 if ((NegHi & (1 << J)) != 0) 5473 ModVal |= SISrcMods::NEG_HI; 5474 5475 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5476 5477 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5478 } 5479 } 5480 5481 //===----------------------------------------------------------------------===// 5482 // dpp 5483 //===----------------------------------------------------------------------===// 5484 5485 bool AMDGPUOperand::isDPPCtrl() const { 5486 using namespace AMDGPU::DPP; 5487 5488 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5489 if (result) { 5490 int64_t Imm = getImm(); 5491 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5492 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5493 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5494 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5495 (Imm == DppCtrl::WAVE_SHL1) || 5496 (Imm == DppCtrl::WAVE_ROL1) || 5497 (Imm == DppCtrl::WAVE_SHR1) || 5498 (Imm == DppCtrl::WAVE_ROR1) || 5499 (Imm == DppCtrl::ROW_MIRROR) || 5500 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5501 (Imm == DppCtrl::BCAST15) || 5502 (Imm == DppCtrl::BCAST31); 5503 } 5504 return false; 5505 } 5506 5507 bool AMDGPUOperand::isS16Imm() const { 5508 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5509 } 5510 5511 bool AMDGPUOperand::isU16Imm() const { 5512 return isImm() && isUInt<16>(getImm()); 5513 } 5514 5515 OperandMatchResultTy 5516 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5517 using namespace AMDGPU::DPP; 5518 5519 SMLoc S = Parser.getTok().getLoc(); 5520 StringRef Prefix; 5521 int64_t Int; 5522 5523 if (getLexer().getKind() == AsmToken::Identifier) { 5524 Prefix = Parser.getTok().getString(); 5525 } else { 5526 return MatchOperand_NoMatch; 5527 } 5528 5529 if (Prefix == "row_mirror") { 5530 Int = DppCtrl::ROW_MIRROR; 5531 Parser.Lex(); 5532 } else if (Prefix == "row_half_mirror") { 5533 Int = DppCtrl::ROW_HALF_MIRROR; 5534 Parser.Lex(); 5535 } else { 5536 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5537 if (Prefix != "quad_perm" 5538 && Prefix != "row_shl" 5539 && Prefix != "row_shr" 5540 && Prefix != "row_ror" 5541 && Prefix != "wave_shl" 5542 && Prefix != "wave_rol" 5543 && Prefix != "wave_shr" 5544 && Prefix != "wave_ror" 5545 && Prefix != "row_bcast") { 5546 return MatchOperand_NoMatch; 5547 } 5548 5549 Parser.Lex(); 5550 if (getLexer().isNot(AsmToken::Colon)) 5551 return MatchOperand_ParseFail; 5552 5553 if (Prefix == "quad_perm") { 5554 // quad_perm:[%d,%d,%d,%d] 5555 Parser.Lex(); 5556 if (getLexer().isNot(AsmToken::LBrac)) 5557 return MatchOperand_ParseFail; 5558 Parser.Lex(); 5559 5560 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5561 return MatchOperand_ParseFail; 5562 5563 for (int i = 0; i < 3; ++i) { 5564 if (getLexer().isNot(AsmToken::Comma)) 5565 return MatchOperand_ParseFail; 5566 Parser.Lex(); 5567 5568 int64_t Temp; 5569 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5570 return MatchOperand_ParseFail; 5571 const int shift = i*2 + 2; 5572 Int += (Temp << shift); 5573 } 5574 5575 if (getLexer().isNot(AsmToken::RBrac)) 5576 return MatchOperand_ParseFail; 5577 Parser.Lex(); 5578 } else { 5579 // sel:%d 5580 Parser.Lex(); 5581 if (getParser().parseAbsoluteExpression(Int)) 5582 return MatchOperand_ParseFail; 5583 5584 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5585 Int |= DppCtrl::ROW_SHL0; 5586 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5587 Int |= DppCtrl::ROW_SHR0; 5588 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5589 Int |= DppCtrl::ROW_ROR0; 5590 } else if (Prefix == "wave_shl" && 1 == Int) { 5591 Int = DppCtrl::WAVE_SHL1; 5592 } else if (Prefix == "wave_rol" && 1 == Int) { 5593 Int = DppCtrl::WAVE_ROL1; 5594 } else if (Prefix == "wave_shr" && 1 == Int) { 5595 Int = DppCtrl::WAVE_SHR1; 5596 } else if (Prefix == "wave_ror" && 1 == Int) { 5597 Int = DppCtrl::WAVE_ROR1; 5598 } else if (Prefix == "row_bcast") { 5599 if (Int == 15) { 5600 Int = DppCtrl::BCAST15; 5601 } else if (Int == 31) { 5602 Int = DppCtrl::BCAST31; 5603 } else { 5604 return MatchOperand_ParseFail; 5605 } 5606 } else { 5607 return MatchOperand_ParseFail; 5608 } 5609 } 5610 } 5611 5612 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5613 return MatchOperand_Success; 5614 } 5615 5616 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5617 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5618 } 5619 5620 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 5621 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 5622 } 5623 5624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5625 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5626 } 5627 5628 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5629 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5630 } 5631 5632 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5633 OptionalImmIndexMap OptionalIdx; 5634 5635 unsigned I = 1; 5636 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5637 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5638 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5639 } 5640 5641 for (unsigned E = Operands.size(); I != E; ++I) { 5642 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5643 MCOI::TIED_TO); 5644 if (TiedTo != -1) { 5645 assert((unsigned)TiedTo < Inst.getNumOperands()); 5646 // handle tied old or src2 for MAC instructions 5647 Inst.addOperand(Inst.getOperand(TiedTo)); 5648 } 5649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5650 // Add the register arguments 5651 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5652 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5653 // Skip it. 5654 continue; 5655 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5656 Op.addRegWithFPInputModsOperands(Inst, 2); 5657 } else if (Op.isDPPCtrl()) { 5658 Op.addImmOperands(Inst, 1); 5659 } else if (Op.isImm()) { 5660 // Handle optional arguments 5661 OptionalIdx[Op.getImmTy()] = I; 5662 } else { 5663 llvm_unreachable("Invalid operand type"); 5664 } 5665 } 5666 5667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5670 } 5671 5672 //===----------------------------------------------------------------------===// 5673 // sdwa 5674 //===----------------------------------------------------------------------===// 5675 5676 OperandMatchResultTy 5677 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5678 AMDGPUOperand::ImmTy Type) { 5679 using namespace llvm::AMDGPU::SDWA; 5680 5681 SMLoc S = Parser.getTok().getLoc(); 5682 StringRef Value; 5683 OperandMatchResultTy res; 5684 5685 res = parseStringWithPrefix(Prefix, Value); 5686 if (res != MatchOperand_Success) { 5687 return res; 5688 } 5689 5690 int64_t Int; 5691 Int = StringSwitch<int64_t>(Value) 5692 .Case("BYTE_0", SdwaSel::BYTE_0) 5693 .Case("BYTE_1", SdwaSel::BYTE_1) 5694 .Case("BYTE_2", SdwaSel::BYTE_2) 5695 .Case("BYTE_3", SdwaSel::BYTE_3) 5696 .Case("WORD_0", SdwaSel::WORD_0) 5697 .Case("WORD_1", SdwaSel::WORD_1) 5698 .Case("DWORD", SdwaSel::DWORD) 5699 .Default(0xffffffff); 5700 Parser.Lex(); // eat last token 5701 5702 if (Int == 0xffffffff) { 5703 return MatchOperand_ParseFail; 5704 } 5705 5706 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5707 return MatchOperand_Success; 5708 } 5709 5710 OperandMatchResultTy 5711 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5712 using namespace llvm::AMDGPU::SDWA; 5713 5714 SMLoc S = Parser.getTok().getLoc(); 5715 StringRef Value; 5716 OperandMatchResultTy res; 5717 5718 res = parseStringWithPrefix("dst_unused", Value); 5719 if (res != MatchOperand_Success) { 5720 return res; 5721 } 5722 5723 int64_t Int; 5724 Int = StringSwitch<int64_t>(Value) 5725 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5726 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5727 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5728 .Default(0xffffffff); 5729 Parser.Lex(); // eat last token 5730 5731 if (Int == 0xffffffff) { 5732 return MatchOperand_ParseFail; 5733 } 5734 5735 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5736 return MatchOperand_Success; 5737 } 5738 5739 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5740 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5741 } 5742 5743 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5744 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5745 } 5746 5747 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5748 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5749 } 5750 5751 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5752 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5753 } 5754 5755 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5756 uint64_t BasicInstType, bool skipVcc) { 5757 using namespace llvm::AMDGPU::SDWA; 5758 5759 OptionalImmIndexMap OptionalIdx; 5760 bool skippedVcc = false; 5761 5762 unsigned I = 1; 5763 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5764 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5765 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5766 } 5767 5768 for (unsigned E = Operands.size(); I != E; ++I) { 5769 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5770 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5771 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5772 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5773 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5774 // Skip VCC only if we didn't skip it on previous iteration. 5775 if (BasicInstType == SIInstrFlags::VOP2 && 5776 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5777 skippedVcc = true; 5778 continue; 5779 } else if (BasicInstType == SIInstrFlags::VOPC && 5780 Inst.getNumOperands() == 0) { 5781 skippedVcc = true; 5782 continue; 5783 } 5784 } 5785 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5786 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5787 } else if (Op.isImm()) { 5788 // Handle optional arguments 5789 OptionalIdx[Op.getImmTy()] = I; 5790 } else { 5791 llvm_unreachable("Invalid operand type"); 5792 } 5793 skippedVcc = false; 5794 } 5795 5796 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5797 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5798 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5799 switch (BasicInstType) { 5800 case SIInstrFlags::VOP1: 5801 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5802 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5804 } 5805 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5808 break; 5809 5810 case SIInstrFlags::VOP2: 5811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5812 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5814 } 5815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5818 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5819 break; 5820 5821 case SIInstrFlags::VOPC: 5822 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5824 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5825 break; 5826 5827 default: 5828 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5829 } 5830 } 5831 5832 // special case v_mac_{f16, f32}: 5833 // it has src2 register operand that is tied to dst operand 5834 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5835 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5836 auto it = Inst.begin(); 5837 std::advance( 5838 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5839 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5840 } 5841 } 5842 5843 /// Force static initialization. 5844 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5845 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5846 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5847 } 5848 5849 #define GET_REGISTER_MATCHER 5850 #define GET_MATCHER_IMPLEMENTATION 5851 #define GET_MNEMONIC_SPELL_CHECKER 5852 #include "AMDGPUGenAsmMatcher.inc" 5853 5854 // This fuction should be defined after auto-generated include so that we have 5855 // MatchClassKind enum defined 5856 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5857 unsigned Kind) { 5858 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5859 // But MatchInstructionImpl() expects to meet token and fails to validate 5860 // operand. This method checks if we are given immediate operand but expect to 5861 // get corresponding token. 5862 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5863 switch (Kind) { 5864 case MCK_addr64: 5865 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5866 case MCK_gds: 5867 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5868 case MCK_lds: 5869 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5870 case MCK_glc: 5871 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5872 case MCK_idxen: 5873 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5874 case MCK_offen: 5875 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5876 case MCK_SSrcB32: 5877 // When operands have expression values, they will return true for isToken, 5878 // because it is not possible to distinguish between a token and an 5879 // expression at parse time. MatchInstructionImpl() will always try to 5880 // match an operand as a token, when isToken returns true, and when the 5881 // name of the expression is not a valid token, the match will fail, 5882 // so we need to handle it here. 5883 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5884 case MCK_SSrcF32: 5885 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5886 case MCK_SoppBrTarget: 5887 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5888 case MCK_VReg32OrOff: 5889 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5890 case MCK_InterpSlot: 5891 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5892 case MCK_Attr: 5893 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5894 case MCK_AttrChan: 5895 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5896 default: 5897 return Match_InvalidOperand; 5898 } 5899 } 5900 5901 //===----------------------------------------------------------------------===// 5902 // endpgm 5903 //===----------------------------------------------------------------------===// 5904 5905 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 5906 SMLoc S = Parser.getTok().getLoc(); 5907 int64_t Imm = 0; 5908 5909 if (!parseExpr(Imm)) { 5910 // The operand is optional, if not present default to 0 5911 Imm = 0; 5912 } 5913 5914 if (!isUInt<16>(Imm)) { 5915 Error(S, "expected a 16-bit value"); 5916 return MatchOperand_ParseFail; 5917 } 5918 5919 Operands.push_back( 5920 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 5921 return MatchOperand_Success; 5922 } 5923 5924 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 5925