1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyEndpgm, 178 ImmTyHigh 179 }; 180 181 struct TokOp { 182 const char *Data; 183 unsigned Length; 184 }; 185 186 struct ImmOp { 187 int64_t Val; 188 ImmTy Type; 189 bool IsFPImm; 190 Modifiers Mods; 191 }; 192 193 struct RegOp { 194 unsigned RegNo; 195 bool IsForcedVOP3; 196 Modifiers Mods; 197 }; 198 199 union { 200 TokOp Tok; 201 ImmOp Imm; 202 RegOp Reg; 203 const MCExpr *Expr; 204 }; 205 206 bool isToken() const override { 207 if (Kind == Token) 208 return true; 209 210 if (Kind != Expression || !Expr) 211 return false; 212 213 // When parsing operands, we can't always tell if something was meant to be 214 // a token, like 'gds', or an expression that references a global variable. 215 // In this case, we assume the string is an expression, and if we need to 216 // interpret is a token, then we treat the symbol name as the token. 217 return isa<MCSymbolRefExpr>(Expr); 218 } 219 220 bool isImm() const override { 221 return Kind == Immediate; 222 } 223 224 bool isInlinableImm(MVT type) const; 225 bool isLiteralImm(MVT type) const; 226 227 bool isRegKind() const { 228 return Kind == Register; 229 } 230 231 bool isReg() const override { 232 return isRegKind() && !hasModifiers(); 233 } 234 235 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 236 return isRegClass(RCID) || isInlinableImm(type); 237 } 238 239 bool isRegOrImmWithInt16InputMods() const { 240 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 241 } 242 243 bool isRegOrImmWithInt32InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 245 } 246 247 bool isRegOrImmWithInt64InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 249 } 250 251 bool isRegOrImmWithFP16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 253 } 254 255 bool isRegOrImmWithFP32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 257 } 258 259 bool isRegOrImmWithFP64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 261 } 262 263 bool isVReg() const { 264 return isRegClass(AMDGPU::VGPR_32RegClassID) || 265 isRegClass(AMDGPU::VReg_64RegClassID) || 266 isRegClass(AMDGPU::VReg_96RegClassID) || 267 isRegClass(AMDGPU::VReg_128RegClassID) || 268 isRegClass(AMDGPU::VReg_256RegClassID) || 269 isRegClass(AMDGPU::VReg_512RegClassID); 270 } 271 272 bool isVReg32() const { 273 return isRegClass(AMDGPU::VGPR_32RegClassID); 274 } 275 276 bool isVReg32OrOff() const { 277 return isOff() || isVReg32(); 278 } 279 280 bool isSDWAOperand(MVT type) const; 281 bool isSDWAFP16Operand() const; 282 bool isSDWAFP32Operand() const; 283 bool isSDWAInt16Operand() const; 284 bool isSDWAInt32Operand() const; 285 286 bool isImmTy(ImmTy ImmT) const { 287 return isImm() && Imm.Type == ImmT; 288 } 289 290 bool isImmModifier() const { 291 return isImm() && Imm.Type != ImmTyNone; 292 } 293 294 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 295 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 296 bool isDMask() const { return isImmTy(ImmTyDMask); } 297 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 298 bool isDA() const { return isImmTy(ImmTyDA); } 299 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 300 bool isLWE() const { return isImmTy(ImmTyLWE); } 301 bool isOff() const { return isImmTy(ImmTyOff); } 302 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 303 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 304 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 305 bool isOffen() const { return isImmTy(ImmTyOffen); } 306 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 307 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 308 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 309 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 310 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 311 312 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 313 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 314 bool isGDS() const { return isImmTy(ImmTyGDS); } 315 bool isLDS() const { return isImmTy(ImmTyLDS); } 316 bool isGLC() const { return isImmTy(ImmTyGLC); } 317 bool isSLC() const { return isImmTy(ImmTySLC); } 318 bool isTFE() const { return isImmTy(ImmTyTFE); } 319 bool isD16() const { return isImmTy(ImmTyD16); } 320 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 321 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 322 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 323 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 324 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 325 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 326 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 327 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 328 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 329 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 330 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 331 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 332 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 333 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 334 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 335 bool isHigh() const { return isImmTy(ImmTyHigh); } 336 337 bool isMod() const { 338 return isClampSI() || isOModSI(); 339 } 340 341 bool isRegOrImm() const { 342 return isReg() || isImm(); 343 } 344 345 bool isRegClass(unsigned RCID) const; 346 347 bool isInlineValue() const; 348 349 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 350 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 351 } 352 353 bool isSCSrcB16() const { 354 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 355 } 356 357 bool isSCSrcV2B16() const { 358 return isSCSrcB16(); 359 } 360 361 bool isSCSrcB32() const { 362 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 363 } 364 365 bool isSCSrcB64() const { 366 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 367 } 368 369 bool isSCSrcF16() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 371 } 372 373 bool isSCSrcV2F16() const { 374 return isSCSrcF16(); 375 } 376 377 bool isSCSrcF32() const { 378 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 379 } 380 381 bool isSCSrcF64() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 383 } 384 385 bool isSSrcB32() const { 386 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 387 } 388 389 bool isSSrcB16() const { 390 return isSCSrcB16() || isLiteralImm(MVT::i16); 391 } 392 393 bool isSSrcV2B16() const { 394 llvm_unreachable("cannot happen"); 395 return isSSrcB16(); 396 } 397 398 bool isSSrcB64() const { 399 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 400 // See isVSrc64(). 401 return isSCSrcB64() || isLiteralImm(MVT::i64); 402 } 403 404 bool isSSrcF32() const { 405 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 406 } 407 408 bool isSSrcF64() const { 409 return isSCSrcB64() || isLiteralImm(MVT::f64); 410 } 411 412 bool isSSrcF16() const { 413 return isSCSrcB16() || isLiteralImm(MVT::f16); 414 } 415 416 bool isSSrcV2F16() const { 417 llvm_unreachable("cannot happen"); 418 return isSSrcF16(); 419 } 420 421 bool isSSrcOrLdsB32() const { 422 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 423 isLiteralImm(MVT::i32) || isExpr(); 424 } 425 426 bool isVCSrcB32() const { 427 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 428 } 429 430 bool isVCSrcB64() const { 431 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 432 } 433 434 bool isVCSrcB16() const { 435 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 436 } 437 438 bool isVCSrcV2B16() const { 439 return isVCSrcB16(); 440 } 441 442 bool isVCSrcF32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 444 } 445 446 bool isVCSrcF64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 448 } 449 450 bool isVCSrcF16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 452 } 453 454 bool isVCSrcV2F16() const { 455 return isVCSrcF16(); 456 } 457 458 bool isVSrcB32() const { 459 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 460 } 461 462 bool isVSrcB64() const { 463 return isVCSrcF64() || isLiteralImm(MVT::i64); 464 } 465 466 bool isVSrcB16() const { 467 return isVCSrcF16() || isLiteralImm(MVT::i16); 468 } 469 470 bool isVSrcV2B16() const { 471 llvm_unreachable("cannot happen"); 472 return isVSrcB16(); 473 } 474 475 bool isVSrcF32() const { 476 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 477 } 478 479 bool isVSrcF64() const { 480 return isVCSrcF64() || isLiteralImm(MVT::f64); 481 } 482 483 bool isVSrcF16() const { 484 return isVCSrcF16() || isLiteralImm(MVT::f16); 485 } 486 487 bool isVSrcV2F16() const { 488 llvm_unreachable("cannot happen"); 489 return isVSrcF16(); 490 } 491 492 bool isKImmFP32() const { 493 return isLiteralImm(MVT::f32); 494 } 495 496 bool isKImmFP16() const { 497 return isLiteralImm(MVT::f16); 498 } 499 500 bool isMem() const override { 501 return false; 502 } 503 504 bool isExpr() const { 505 return Kind == Expression; 506 } 507 508 bool isSoppBrTarget() const { 509 return isExpr() || isImm(); 510 } 511 512 bool isSWaitCnt() const; 513 bool isHwreg() const; 514 bool isSendMsg() const; 515 bool isSwizzle() const; 516 bool isSMRDOffset8() const; 517 bool isSMRDOffset20() const; 518 bool isSMRDLiteralOffset() const; 519 bool isDPPCtrl() const; 520 bool isGPRIdxMode() const; 521 bool isS16Imm() const; 522 bool isU16Imm() const; 523 bool isEndpgm() const; 524 525 StringRef getExpressionAsToken() const { 526 assert(isExpr()); 527 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 528 return S->getSymbol().getName(); 529 } 530 531 StringRef getToken() const { 532 assert(isToken()); 533 534 if (Kind == Expression) 535 return getExpressionAsToken(); 536 537 return StringRef(Tok.Data, Tok.Length); 538 } 539 540 int64_t getImm() const { 541 assert(isImm()); 542 return Imm.Val; 543 } 544 545 ImmTy getImmTy() const { 546 assert(isImm()); 547 return Imm.Type; 548 } 549 550 unsigned getReg() const override { 551 return Reg.RegNo; 552 } 553 554 SMLoc getStartLoc() const override { 555 return StartLoc; 556 } 557 558 SMLoc getEndLoc() const override { 559 return EndLoc; 560 } 561 562 SMRange getLocRange() const { 563 return SMRange(StartLoc, EndLoc); 564 } 565 566 Modifiers getModifiers() const { 567 assert(isRegKind() || isImmTy(ImmTyNone)); 568 return isRegKind() ? Reg.Mods : Imm.Mods; 569 } 570 571 void setModifiers(Modifiers Mods) { 572 assert(isRegKind() || isImmTy(ImmTyNone)); 573 if (isRegKind()) 574 Reg.Mods = Mods; 575 else 576 Imm.Mods = Mods; 577 } 578 579 bool hasModifiers() const { 580 return getModifiers().hasModifiers(); 581 } 582 583 bool hasFPModifiers() const { 584 return getModifiers().hasFPModifiers(); 585 } 586 587 bool hasIntModifiers() const { 588 return getModifiers().hasIntModifiers(); 589 } 590 591 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 592 593 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 594 595 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 596 597 template <unsigned Bitwidth> 598 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 599 600 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 601 addKImmFPOperands<16>(Inst, N); 602 } 603 604 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 605 addKImmFPOperands<32>(Inst, N); 606 } 607 608 void addRegOperands(MCInst &Inst, unsigned N) const; 609 610 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 611 if (isRegKind()) 612 addRegOperands(Inst, N); 613 else if (isExpr()) 614 Inst.addOperand(MCOperand::createExpr(Expr)); 615 else 616 addImmOperands(Inst, N); 617 } 618 619 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 620 Modifiers Mods = getModifiers(); 621 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 622 if (isRegKind()) { 623 addRegOperands(Inst, N); 624 } else { 625 addImmOperands(Inst, N, false); 626 } 627 } 628 629 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 630 assert(!hasIntModifiers()); 631 addRegOrImmWithInputModsOperands(Inst, N); 632 } 633 634 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasFPModifiers()); 636 addRegOrImmWithInputModsOperands(Inst, N); 637 } 638 639 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 640 Modifiers Mods = getModifiers(); 641 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 642 assert(isRegKind()); 643 addRegOperands(Inst, N); 644 } 645 646 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 647 assert(!hasIntModifiers()); 648 addRegWithInputModsOperands(Inst, N); 649 } 650 651 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 652 assert(!hasFPModifiers()); 653 addRegWithInputModsOperands(Inst, N); 654 } 655 656 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 657 if (isImm()) 658 addImmOperands(Inst, N); 659 else { 660 assert(isExpr()); 661 Inst.addOperand(MCOperand::createExpr(Expr)); 662 } 663 } 664 665 static void printImmTy(raw_ostream& OS, ImmTy Type) { 666 switch (Type) { 667 case ImmTyNone: OS << "None"; break; 668 case ImmTyGDS: OS << "GDS"; break; 669 case ImmTyLDS: OS << "LDS"; break; 670 case ImmTyOffen: OS << "Offen"; break; 671 case ImmTyIdxen: OS << "Idxen"; break; 672 case ImmTyAddr64: OS << "Addr64"; break; 673 case ImmTyOffset: OS << "Offset"; break; 674 case ImmTyInstOffset: OS << "InstOffset"; break; 675 case ImmTyOffset0: OS << "Offset0"; break; 676 case ImmTyOffset1: OS << "Offset1"; break; 677 case ImmTyGLC: OS << "GLC"; break; 678 case ImmTySLC: OS << "SLC"; break; 679 case ImmTyTFE: OS << "TFE"; break; 680 case ImmTyD16: OS << "D16"; break; 681 case ImmTyFORMAT: OS << "FORMAT"; break; 682 case ImmTyClampSI: OS << "ClampSI"; break; 683 case ImmTyOModSI: OS << "OModSI"; break; 684 case ImmTyDppCtrl: OS << "DppCtrl"; break; 685 case ImmTyDppRowMask: OS << "DppRowMask"; break; 686 case ImmTyDppBankMask: OS << "DppBankMask"; break; 687 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 688 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 689 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 690 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 691 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 692 case ImmTyDMask: OS << "DMask"; break; 693 case ImmTyUNorm: OS << "UNorm"; break; 694 case ImmTyDA: OS << "DA"; break; 695 case ImmTyR128A16: OS << "R128A16"; break; 696 case ImmTyLWE: OS << "LWE"; break; 697 case ImmTyOff: OS << "Off"; break; 698 case ImmTyExpTgt: OS << "ExpTgt"; break; 699 case ImmTyExpCompr: OS << "ExpCompr"; break; 700 case ImmTyExpVM: OS << "ExpVM"; break; 701 case ImmTyHwreg: OS << "Hwreg"; break; 702 case ImmTySendMsg: OS << "SendMsg"; break; 703 case ImmTyInterpSlot: OS << "InterpSlot"; break; 704 case ImmTyInterpAttr: OS << "InterpAttr"; break; 705 case ImmTyAttrChan: OS << "AttrChan"; break; 706 case ImmTyOpSel: OS << "OpSel"; break; 707 case ImmTyOpSelHi: OS << "OpSelHi"; break; 708 case ImmTyNegLo: OS << "NegLo"; break; 709 case ImmTyNegHi: OS << "NegHi"; break; 710 case ImmTySwizzle: OS << "Swizzle"; break; 711 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 712 case ImmTyHigh: OS << "High"; break; 713 case ImmTyEndpgm: 714 OS << "Endpgm"; 715 break; 716 } 717 } 718 719 void print(raw_ostream &OS) const override { 720 switch (Kind) { 721 case Register: 722 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 723 break; 724 case Immediate: 725 OS << '<' << getImm(); 726 if (getImmTy() != ImmTyNone) { 727 OS << " type: "; printImmTy(OS, getImmTy()); 728 } 729 OS << " mods: " << Imm.Mods << '>'; 730 break; 731 case Token: 732 OS << '\'' << getToken() << '\''; 733 break; 734 case Expression: 735 OS << "<expr " << *Expr << '>'; 736 break; 737 } 738 } 739 740 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 741 int64_t Val, SMLoc Loc, 742 ImmTy Type = ImmTyNone, 743 bool IsFPImm = false) { 744 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 745 Op->Imm.Val = Val; 746 Op->Imm.IsFPImm = IsFPImm; 747 Op->Imm.Type = Type; 748 Op->Imm.Mods = Modifiers(); 749 Op->StartLoc = Loc; 750 Op->EndLoc = Loc; 751 return Op; 752 } 753 754 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 755 StringRef Str, SMLoc Loc, 756 bool HasExplicitEncodingSize = true) { 757 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 758 Res->Tok.Data = Str.data(); 759 Res->Tok.Length = Str.size(); 760 Res->StartLoc = Loc; 761 Res->EndLoc = Loc; 762 return Res; 763 } 764 765 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 766 unsigned RegNo, SMLoc S, 767 SMLoc E, 768 bool ForceVOP3) { 769 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 770 Op->Reg.RegNo = RegNo; 771 Op->Reg.Mods = Modifiers(); 772 Op->Reg.IsForcedVOP3 = ForceVOP3; 773 Op->StartLoc = S; 774 Op->EndLoc = E; 775 return Op; 776 } 777 778 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 779 const class MCExpr *Expr, SMLoc S) { 780 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 781 Op->Expr = Expr; 782 Op->StartLoc = S; 783 Op->EndLoc = S; 784 return Op; 785 } 786 }; 787 788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 789 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 790 return OS; 791 } 792 793 //===----------------------------------------------------------------------===// 794 // AsmParser 795 //===----------------------------------------------------------------------===// 796 797 // Holds info related to the current kernel, e.g. count of SGPRs used. 798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 799 // .amdgpu_hsa_kernel or at EOF. 800 class KernelScopeInfo { 801 int SgprIndexUnusedMin = -1; 802 int VgprIndexUnusedMin = -1; 803 MCContext *Ctx = nullptr; 804 805 void usesSgprAt(int i) { 806 if (i >= SgprIndexUnusedMin) { 807 SgprIndexUnusedMin = ++i; 808 if (Ctx) { 809 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 810 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 811 } 812 } 813 } 814 815 void usesVgprAt(int i) { 816 if (i >= VgprIndexUnusedMin) { 817 VgprIndexUnusedMin = ++i; 818 if (Ctx) { 819 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 820 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 821 } 822 } 823 } 824 825 public: 826 KernelScopeInfo() = default; 827 828 void initialize(MCContext &Context) { 829 Ctx = &Context; 830 usesSgprAt(SgprIndexUnusedMin = -1); 831 usesVgprAt(VgprIndexUnusedMin = -1); 832 } 833 834 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 835 switch (RegKind) { 836 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 837 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 838 default: break; 839 } 840 } 841 }; 842 843 class AMDGPUAsmParser : public MCTargetAsmParser { 844 MCAsmParser &Parser; 845 846 // Number of extra operands parsed after the first optional operand. 847 // This may be necessary to skip hardcoded mandatory operands. 848 static const unsigned MAX_OPR_LOOKAHEAD = 8; 849 850 unsigned ForcedEncodingSize = 0; 851 bool ForcedDPP = false; 852 bool ForcedSDWA = false; 853 KernelScopeInfo KernelScope; 854 855 /// @name Auto-generated Match Functions 856 /// { 857 858 #define GET_ASSEMBLER_HEADER 859 #include "AMDGPUGenAsmMatcher.inc" 860 861 /// } 862 863 private: 864 bool ParseAsAbsoluteExpression(uint32_t &Ret); 865 bool OutOfRangeError(SMRange Range); 866 /// Calculate VGPR/SGPR blocks required for given target, reserved 867 /// registers, and user-specified NextFreeXGPR values. 868 /// 869 /// \param Features [in] Target features, used for bug corrections. 870 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 871 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 872 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 873 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 874 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 875 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 876 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 877 /// \param VGPRBlocks [out] Result VGPR block count. 878 /// \param SGPRBlocks [out] Result SGPR block count. 879 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 880 bool FlatScrUsed, bool XNACKUsed, 881 unsigned NextFreeVGPR, SMRange VGPRRange, 882 unsigned NextFreeSGPR, SMRange SGPRRange, 883 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 884 bool ParseDirectiveAMDGCNTarget(); 885 bool ParseDirectiveAMDHSAKernel(); 886 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 887 bool ParseDirectiveHSACodeObjectVersion(); 888 bool ParseDirectiveHSACodeObjectISA(); 889 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 890 bool ParseDirectiveAMDKernelCodeT(); 891 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 892 bool ParseDirectiveAMDGPUHsaKernel(); 893 894 bool ParseDirectiveISAVersion(); 895 bool ParseDirectiveHSAMetadata(); 896 bool ParseDirectivePALMetadataBegin(); 897 bool ParseDirectivePALMetadata(); 898 899 /// Common code to parse out a block of text (typically YAML) between start and 900 /// end directives. 901 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 902 const char *AssemblerDirectiveEnd, 903 std::string &CollectString); 904 905 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 906 RegisterKind RegKind, unsigned Reg1, 907 unsigned RegNum); 908 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 909 unsigned& RegNum, unsigned& RegWidth, 910 unsigned *DwordRegIndex); 911 bool isRegister(); 912 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 913 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 914 void initializeGprCountSymbol(RegisterKind RegKind); 915 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 916 unsigned RegWidth); 917 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 918 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 919 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 920 bool IsGdsHardcoded); 921 922 public: 923 enum AMDGPUMatchResultTy { 924 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 925 }; 926 927 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 928 929 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 930 const MCInstrInfo &MII, 931 const MCTargetOptions &Options) 932 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 933 MCAsmParserExtension::Initialize(Parser); 934 935 if (getFeatureBits().none()) { 936 // Set default features. 937 copySTI().ToggleFeature("southern-islands"); 938 } 939 940 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 941 942 { 943 // TODO: make those pre-defined variables read-only. 944 // Currently there is none suitable machinery in the core llvm-mc for this. 945 // MCSymbol::isRedefinable is intended for another purpose, and 946 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 947 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 948 MCContext &Ctx = getContext(); 949 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 950 MCSymbol *Sym = 951 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 952 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 953 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 954 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 955 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 956 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 957 } else { 958 MCSymbol *Sym = 959 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 960 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 961 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 962 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 963 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 964 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 965 } 966 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 967 initializeGprCountSymbol(IS_VGPR); 968 initializeGprCountSymbol(IS_SGPR); 969 } else 970 KernelScope.initialize(getContext()); 971 } 972 } 973 974 bool hasXNACK() const { 975 return AMDGPU::hasXNACK(getSTI()); 976 } 977 978 bool hasMIMG_R128() const { 979 return AMDGPU::hasMIMG_R128(getSTI()); 980 } 981 982 bool hasPackedD16() const { 983 return AMDGPU::hasPackedD16(getSTI()); 984 } 985 986 bool isSI() const { 987 return AMDGPU::isSI(getSTI()); 988 } 989 990 bool isCI() const { 991 return AMDGPU::isCI(getSTI()); 992 } 993 994 bool isVI() const { 995 return AMDGPU::isVI(getSTI()); 996 } 997 998 bool isGFX9() const { 999 return AMDGPU::isGFX9(getSTI()); 1000 } 1001 1002 bool hasInv2PiInlineImm() const { 1003 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1004 } 1005 1006 bool hasFlatOffsets() const { 1007 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1008 } 1009 1010 bool hasSGPR102_SGPR103() const { 1011 return !isVI(); 1012 } 1013 1014 bool hasIntClamp() const { 1015 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1016 } 1017 1018 AMDGPUTargetStreamer &getTargetStreamer() { 1019 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1020 return static_cast<AMDGPUTargetStreamer &>(TS); 1021 } 1022 1023 const MCRegisterInfo *getMRI() const { 1024 // We need this const_cast because for some reason getContext() is not const 1025 // in MCAsmParser. 1026 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1027 } 1028 1029 const MCInstrInfo *getMII() const { 1030 return &MII; 1031 } 1032 1033 const FeatureBitset &getFeatureBits() const { 1034 return getSTI().getFeatureBits(); 1035 } 1036 1037 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1038 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1039 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1040 1041 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1042 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1043 bool isForcedDPP() const { return ForcedDPP; } 1044 bool isForcedSDWA() const { return ForcedSDWA; } 1045 ArrayRef<unsigned> getMatchedVariants() const; 1046 1047 std::unique_ptr<AMDGPUOperand> parseRegister(); 1048 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1049 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1050 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1051 unsigned Kind) override; 1052 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1053 OperandVector &Operands, MCStreamer &Out, 1054 uint64_t &ErrorInfo, 1055 bool MatchingInlineAsm) override; 1056 bool ParseDirective(AsmToken DirectiveID) override; 1057 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1058 StringRef parseMnemonicSuffix(StringRef Name); 1059 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1060 SMLoc NameLoc, OperandVector &Operands) override; 1061 //bool ProcessInstruction(MCInst &Inst); 1062 1063 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1064 1065 OperandMatchResultTy 1066 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1067 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1068 bool (*ConvertResult)(int64_t &) = nullptr); 1069 1070 OperandMatchResultTy parseOperandArrayWithPrefix( 1071 const char *Prefix, 1072 OperandVector &Operands, 1073 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1074 bool (*ConvertResult)(int64_t&) = nullptr); 1075 1076 OperandMatchResultTy 1077 parseNamedBit(const char *Name, OperandVector &Operands, 1078 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1079 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1080 StringRef &Value); 1081 1082 bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false); 1083 bool parseSP3NegModifier(); 1084 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1085 OperandMatchResultTy parseReg(OperandVector &Operands); 1086 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1087 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1088 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1089 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1090 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1091 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1092 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1093 1094 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1095 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1096 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1097 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1098 1099 bool parseCnt(int64_t &IntVal); 1100 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1101 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1102 1103 private: 1104 struct OperandInfoTy { 1105 int64_t Id; 1106 bool IsSymbolic = false; 1107 1108 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1109 }; 1110 1111 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1112 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1113 1114 void errorExpTgt(); 1115 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1116 1117 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1118 bool validateSOPLiteral(const MCInst &Inst) const; 1119 bool validateConstantBusLimitations(const MCInst &Inst); 1120 bool validateEarlyClobberLimitations(const MCInst &Inst); 1121 bool validateIntClampSupported(const MCInst &Inst); 1122 bool validateMIMGAtomicDMask(const MCInst &Inst); 1123 bool validateMIMGGatherDMask(const MCInst &Inst); 1124 bool validateMIMGDataSize(const MCInst &Inst); 1125 bool validateMIMGD16(const MCInst &Inst); 1126 bool validateLdsDirect(const MCInst &Inst); 1127 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1128 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1129 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1130 1131 bool isId(const StringRef Id) const; 1132 bool isId(const AsmToken &Token, const StringRef Id) const; 1133 bool isToken(const AsmToken::TokenKind Kind) const; 1134 bool trySkipId(const StringRef Id); 1135 bool trySkipToken(const AsmToken::TokenKind Kind); 1136 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1137 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1138 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1139 AsmToken::TokenKind getTokenKind() const; 1140 bool parseExpr(int64_t &Imm); 1141 StringRef getTokenStr() const; 1142 AsmToken peekToken(); 1143 AsmToken getToken() const; 1144 SMLoc getLoc() const; 1145 void lex(); 1146 1147 public: 1148 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1149 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1150 1151 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1152 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1153 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1154 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1155 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1156 1157 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1158 const unsigned MinVal, 1159 const unsigned MaxVal, 1160 const StringRef ErrMsg); 1161 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1162 bool parseSwizzleOffset(int64_t &Imm); 1163 bool parseSwizzleMacro(int64_t &Imm); 1164 bool parseSwizzleQuadPerm(int64_t &Imm); 1165 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1166 bool parseSwizzleBroadcast(int64_t &Imm); 1167 bool parseSwizzleSwap(int64_t &Imm); 1168 bool parseSwizzleReverse(int64_t &Imm); 1169 1170 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1171 int64_t parseGPRIdxMacro(); 1172 1173 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1174 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1175 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1176 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1177 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1178 1179 AMDGPUOperand::Ptr defaultGLC() const; 1180 AMDGPUOperand::Ptr defaultSLC() const; 1181 1182 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1183 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1184 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1185 AMDGPUOperand::Ptr defaultOffsetU12() const; 1186 AMDGPUOperand::Ptr defaultOffsetS13() const; 1187 1188 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1189 1190 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1191 OptionalImmIndexMap &OptionalIdx); 1192 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1193 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1194 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1195 1196 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1197 1198 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1199 bool IsAtomic = false); 1200 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1201 1202 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1203 AMDGPUOperand::Ptr defaultRowMask() const; 1204 AMDGPUOperand::Ptr defaultBankMask() const; 1205 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1206 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1207 1208 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1209 AMDGPUOperand::ImmTy Type); 1210 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1211 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1212 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1213 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1214 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1215 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1216 uint64_t BasicInstType, bool skipVcc = false); 1217 1218 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1219 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1220 }; 1221 1222 struct OptionalOperand { 1223 const char *Name; 1224 AMDGPUOperand::ImmTy Type; 1225 bool IsBit; 1226 bool (*ConvertResult)(int64_t&); 1227 }; 1228 1229 } // end anonymous namespace 1230 1231 // May be called with integer type with equivalent bitwidth. 1232 static const fltSemantics *getFltSemantics(unsigned Size) { 1233 switch (Size) { 1234 case 4: 1235 return &APFloat::IEEEsingle(); 1236 case 8: 1237 return &APFloat::IEEEdouble(); 1238 case 2: 1239 return &APFloat::IEEEhalf(); 1240 default: 1241 llvm_unreachable("unsupported fp type"); 1242 } 1243 } 1244 1245 static const fltSemantics *getFltSemantics(MVT VT) { 1246 return getFltSemantics(VT.getSizeInBits() / 8); 1247 } 1248 1249 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1250 switch (OperandType) { 1251 case AMDGPU::OPERAND_REG_IMM_INT32: 1252 case AMDGPU::OPERAND_REG_IMM_FP32: 1253 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1254 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1255 return &APFloat::IEEEsingle(); 1256 case AMDGPU::OPERAND_REG_IMM_INT64: 1257 case AMDGPU::OPERAND_REG_IMM_FP64: 1258 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1259 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1260 return &APFloat::IEEEdouble(); 1261 case AMDGPU::OPERAND_REG_IMM_INT16: 1262 case AMDGPU::OPERAND_REG_IMM_FP16: 1263 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1264 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1265 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1266 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1267 return &APFloat::IEEEhalf(); 1268 default: 1269 llvm_unreachable("unsupported fp type"); 1270 } 1271 } 1272 1273 //===----------------------------------------------------------------------===// 1274 // Operand 1275 //===----------------------------------------------------------------------===// 1276 1277 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1278 bool Lost; 1279 1280 // Convert literal to single precision 1281 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1282 APFloat::rmNearestTiesToEven, 1283 &Lost); 1284 // We allow precision lost but not overflow or underflow 1285 if (Status != APFloat::opOK && 1286 Lost && 1287 ((Status & APFloat::opOverflow) != 0 || 1288 (Status & APFloat::opUnderflow) != 0)) { 1289 return false; 1290 } 1291 1292 return true; 1293 } 1294 1295 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1296 return isUIntN(Size, Val) || isIntN(Size, Val); 1297 } 1298 1299 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1300 1301 // This is a hack to enable named inline values like 1302 // shared_base with both 32-bit and 64-bit operands. 1303 // Note that these values are defined as 1304 // 32-bit operands only. 1305 if (isInlineValue()) { 1306 return true; 1307 } 1308 1309 if (!isImmTy(ImmTyNone)) { 1310 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1311 return false; 1312 } 1313 // TODO: We should avoid using host float here. It would be better to 1314 // check the float bit values which is what a few other places do. 1315 // We've had bot failures before due to weird NaN support on mips hosts. 1316 1317 APInt Literal(64, Imm.Val); 1318 1319 if (Imm.IsFPImm) { // We got fp literal token 1320 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1321 return AMDGPU::isInlinableLiteral64(Imm.Val, 1322 AsmParser->hasInv2PiInlineImm()); 1323 } 1324 1325 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1326 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1327 return false; 1328 1329 if (type.getScalarSizeInBits() == 16) { 1330 return AMDGPU::isInlinableLiteral16( 1331 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1332 AsmParser->hasInv2PiInlineImm()); 1333 } 1334 1335 // Check if single precision literal is inlinable 1336 return AMDGPU::isInlinableLiteral32( 1337 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1338 AsmParser->hasInv2PiInlineImm()); 1339 } 1340 1341 // We got int literal token. 1342 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1343 return AMDGPU::isInlinableLiteral64(Imm.Val, 1344 AsmParser->hasInv2PiInlineImm()); 1345 } 1346 1347 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1348 return false; 1349 } 1350 1351 if (type.getScalarSizeInBits() == 16) { 1352 return AMDGPU::isInlinableLiteral16( 1353 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1354 AsmParser->hasInv2PiInlineImm()); 1355 } 1356 1357 return AMDGPU::isInlinableLiteral32( 1358 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1359 AsmParser->hasInv2PiInlineImm()); 1360 } 1361 1362 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1363 // Check that this immediate can be added as literal 1364 if (!isImmTy(ImmTyNone)) { 1365 return false; 1366 } 1367 1368 if (!Imm.IsFPImm) { 1369 // We got int literal token. 1370 1371 if (type == MVT::f64 && hasFPModifiers()) { 1372 // Cannot apply fp modifiers to int literals preserving the same semantics 1373 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1374 // disable these cases. 1375 return false; 1376 } 1377 1378 unsigned Size = type.getSizeInBits(); 1379 if (Size == 64) 1380 Size = 32; 1381 1382 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1383 // types. 1384 return isSafeTruncation(Imm.Val, Size); 1385 } 1386 1387 // We got fp literal token 1388 if (type == MVT::f64) { // Expected 64-bit fp operand 1389 // We would set low 64-bits of literal to zeroes but we accept this literals 1390 return true; 1391 } 1392 1393 if (type == MVT::i64) { // Expected 64-bit int operand 1394 // We don't allow fp literals in 64-bit integer instructions. It is 1395 // unclear how we should encode them. 1396 return false; 1397 } 1398 1399 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1400 return canLosslesslyConvertToFPType(FPLiteral, type); 1401 } 1402 1403 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1404 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1405 } 1406 1407 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1408 if (AsmParser->isVI()) 1409 return isVReg32(); 1410 else if (AsmParser->isGFX9()) 1411 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1412 else 1413 return false; 1414 } 1415 1416 bool AMDGPUOperand::isSDWAFP16Operand() const { 1417 return isSDWAOperand(MVT::f16); 1418 } 1419 1420 bool AMDGPUOperand::isSDWAFP32Operand() const { 1421 return isSDWAOperand(MVT::f32); 1422 } 1423 1424 bool AMDGPUOperand::isSDWAInt16Operand() const { 1425 return isSDWAOperand(MVT::i16); 1426 } 1427 1428 bool AMDGPUOperand::isSDWAInt32Operand() const { 1429 return isSDWAOperand(MVT::i32); 1430 } 1431 1432 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1433 { 1434 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1435 assert(Size == 2 || Size == 4 || Size == 8); 1436 1437 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1438 1439 if (Imm.Mods.Abs) { 1440 Val &= ~FpSignMask; 1441 } 1442 if (Imm.Mods.Neg) { 1443 Val ^= FpSignMask; 1444 } 1445 1446 return Val; 1447 } 1448 1449 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1450 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1451 Inst.getNumOperands())) { 1452 addLiteralImmOperand(Inst, Imm.Val, 1453 ApplyModifiers & 1454 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1455 } else { 1456 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1457 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1458 } 1459 } 1460 1461 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1462 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1463 auto OpNum = Inst.getNumOperands(); 1464 // Check that this operand accepts literals 1465 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1466 1467 if (ApplyModifiers) { 1468 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1469 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1470 Val = applyInputFPModifiers(Val, Size); 1471 } 1472 1473 APInt Literal(64, Val); 1474 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1475 1476 if (Imm.IsFPImm) { // We got fp literal token 1477 switch (OpTy) { 1478 case AMDGPU::OPERAND_REG_IMM_INT64: 1479 case AMDGPU::OPERAND_REG_IMM_FP64: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1482 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1483 AsmParser->hasInv2PiInlineImm())) { 1484 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1485 return; 1486 } 1487 1488 // Non-inlineable 1489 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1490 // For fp operands we check if low 32 bits are zeros 1491 if (Literal.getLoBits(32) != 0) { 1492 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1493 "Can't encode literal as exact 64-bit floating-point operand. " 1494 "Low 32-bits will be set to zero"); 1495 } 1496 1497 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1498 return; 1499 } 1500 1501 // We don't allow fp literals in 64-bit integer instructions. It is 1502 // unclear how we should encode them. This case should be checked earlier 1503 // in predicate methods (isLiteralImm()) 1504 llvm_unreachable("fp literal in 64-bit integer instruction."); 1505 1506 case AMDGPU::OPERAND_REG_IMM_INT32: 1507 case AMDGPU::OPERAND_REG_IMM_FP32: 1508 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1509 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1510 case AMDGPU::OPERAND_REG_IMM_INT16: 1511 case AMDGPU::OPERAND_REG_IMM_FP16: 1512 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1513 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1514 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1515 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1516 bool lost; 1517 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1518 // Convert literal to single precision 1519 FPLiteral.convert(*getOpFltSemantics(OpTy), 1520 APFloat::rmNearestTiesToEven, &lost); 1521 // We allow precision lost but not overflow or underflow. This should be 1522 // checked earlier in isLiteralImm() 1523 1524 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1525 Inst.addOperand(MCOperand::createImm(ImmVal)); 1526 return; 1527 } 1528 default: 1529 llvm_unreachable("invalid operand size"); 1530 } 1531 1532 return; 1533 } 1534 1535 // We got int literal token. 1536 // Only sign extend inline immediates. 1537 switch (OpTy) { 1538 case AMDGPU::OPERAND_REG_IMM_INT32: 1539 case AMDGPU::OPERAND_REG_IMM_FP32: 1540 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1541 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1542 if (isSafeTruncation(Val, 32) && 1543 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1544 AsmParser->hasInv2PiInlineImm())) { 1545 Inst.addOperand(MCOperand::createImm(Val)); 1546 return; 1547 } 1548 1549 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1550 return; 1551 1552 case AMDGPU::OPERAND_REG_IMM_INT64: 1553 case AMDGPU::OPERAND_REG_IMM_FP64: 1554 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1555 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1556 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1557 Inst.addOperand(MCOperand::createImm(Val)); 1558 return; 1559 } 1560 1561 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1562 return; 1563 1564 case AMDGPU::OPERAND_REG_IMM_INT16: 1565 case AMDGPU::OPERAND_REG_IMM_FP16: 1566 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1567 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1568 if (isSafeTruncation(Val, 16) && 1569 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1570 AsmParser->hasInv2PiInlineImm())) { 1571 Inst.addOperand(MCOperand::createImm(Val)); 1572 return; 1573 } 1574 1575 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1576 return; 1577 1578 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1579 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1580 assert(isSafeTruncation(Val, 16)); 1581 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1582 AsmParser->hasInv2PiInlineImm())); 1583 1584 Inst.addOperand(MCOperand::createImm(Val)); 1585 return; 1586 } 1587 default: 1588 llvm_unreachable("invalid operand size"); 1589 } 1590 } 1591 1592 template <unsigned Bitwidth> 1593 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1594 APInt Literal(64, Imm.Val); 1595 1596 if (!Imm.IsFPImm) { 1597 // We got int literal token. 1598 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1599 return; 1600 } 1601 1602 bool Lost; 1603 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1604 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1605 APFloat::rmNearestTiesToEven, &Lost); 1606 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1607 } 1608 1609 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1610 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1611 } 1612 1613 static bool isInlineValue(unsigned Reg) { 1614 switch (Reg) { 1615 case AMDGPU::SRC_SHARED_BASE: 1616 case AMDGPU::SRC_SHARED_LIMIT: 1617 case AMDGPU::SRC_PRIVATE_BASE: 1618 case AMDGPU::SRC_PRIVATE_LIMIT: 1619 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1620 return true; 1621 default: 1622 return false; 1623 } 1624 } 1625 1626 bool AMDGPUOperand::isInlineValue() const { 1627 return isRegKind() && ::isInlineValue(getReg()); 1628 } 1629 1630 //===----------------------------------------------------------------------===// 1631 // AsmParser 1632 //===----------------------------------------------------------------------===// 1633 1634 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1635 if (Is == IS_VGPR) { 1636 switch (RegWidth) { 1637 default: return -1; 1638 case 1: return AMDGPU::VGPR_32RegClassID; 1639 case 2: return AMDGPU::VReg_64RegClassID; 1640 case 3: return AMDGPU::VReg_96RegClassID; 1641 case 4: return AMDGPU::VReg_128RegClassID; 1642 case 8: return AMDGPU::VReg_256RegClassID; 1643 case 16: return AMDGPU::VReg_512RegClassID; 1644 } 1645 } else if (Is == IS_TTMP) { 1646 switch (RegWidth) { 1647 default: return -1; 1648 case 1: return AMDGPU::TTMP_32RegClassID; 1649 case 2: return AMDGPU::TTMP_64RegClassID; 1650 case 4: return AMDGPU::TTMP_128RegClassID; 1651 case 8: return AMDGPU::TTMP_256RegClassID; 1652 case 16: return AMDGPU::TTMP_512RegClassID; 1653 } 1654 } else if (Is == IS_SGPR) { 1655 switch (RegWidth) { 1656 default: return -1; 1657 case 1: return AMDGPU::SGPR_32RegClassID; 1658 case 2: return AMDGPU::SGPR_64RegClassID; 1659 case 4: return AMDGPU::SGPR_128RegClassID; 1660 case 8: return AMDGPU::SGPR_256RegClassID; 1661 case 16: return AMDGPU::SGPR_512RegClassID; 1662 } 1663 } 1664 return -1; 1665 } 1666 1667 static unsigned getSpecialRegForName(StringRef RegName) { 1668 return StringSwitch<unsigned>(RegName) 1669 .Case("exec", AMDGPU::EXEC) 1670 .Case("vcc", AMDGPU::VCC) 1671 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1672 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1673 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1674 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1675 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1676 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1677 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1678 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1679 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1680 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1681 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1682 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1683 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1684 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1685 .Case("m0", AMDGPU::M0) 1686 .Case("scc", AMDGPU::SCC) 1687 .Case("tba", AMDGPU::TBA) 1688 .Case("tma", AMDGPU::TMA) 1689 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1690 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1691 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1692 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1693 .Case("vcc_lo", AMDGPU::VCC_LO) 1694 .Case("vcc_hi", AMDGPU::VCC_HI) 1695 .Case("exec_lo", AMDGPU::EXEC_LO) 1696 .Case("exec_hi", AMDGPU::EXEC_HI) 1697 .Case("tma_lo", AMDGPU::TMA_LO) 1698 .Case("tma_hi", AMDGPU::TMA_HI) 1699 .Case("tba_lo", AMDGPU::TBA_LO) 1700 .Case("tba_hi", AMDGPU::TBA_HI) 1701 .Default(0); 1702 } 1703 1704 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1705 SMLoc &EndLoc) { 1706 auto R = parseRegister(); 1707 if (!R) return true; 1708 assert(R->isReg()); 1709 RegNo = R->getReg(); 1710 StartLoc = R->getStartLoc(); 1711 EndLoc = R->getEndLoc(); 1712 return false; 1713 } 1714 1715 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1716 RegisterKind RegKind, unsigned Reg1, 1717 unsigned RegNum) { 1718 switch (RegKind) { 1719 case IS_SPECIAL: 1720 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1721 Reg = AMDGPU::EXEC; 1722 RegWidth = 2; 1723 return true; 1724 } 1725 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1726 Reg = AMDGPU::FLAT_SCR; 1727 RegWidth = 2; 1728 return true; 1729 } 1730 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1731 Reg = AMDGPU::XNACK_MASK; 1732 RegWidth = 2; 1733 return true; 1734 } 1735 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1736 Reg = AMDGPU::VCC; 1737 RegWidth = 2; 1738 return true; 1739 } 1740 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1741 Reg = AMDGPU::TBA; 1742 RegWidth = 2; 1743 return true; 1744 } 1745 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1746 Reg = AMDGPU::TMA; 1747 RegWidth = 2; 1748 return true; 1749 } 1750 return false; 1751 case IS_VGPR: 1752 case IS_SGPR: 1753 case IS_TTMP: 1754 if (Reg1 != Reg + RegWidth) { 1755 return false; 1756 } 1757 RegWidth++; 1758 return true; 1759 default: 1760 llvm_unreachable("unexpected register kind"); 1761 } 1762 } 1763 1764 static const StringRef Registers[] = { 1765 { "v" }, 1766 { "s" }, 1767 { "ttmp" }, 1768 }; 1769 1770 bool 1771 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1772 const AsmToken &NextToken) const { 1773 1774 // A list of consecutive registers: [s0,s1,s2,s3] 1775 if (Token.is(AsmToken::LBrac)) 1776 return true; 1777 1778 if (!Token.is(AsmToken::Identifier)) 1779 return false; 1780 1781 // A single register like s0 or a range of registers like s[0:1] 1782 1783 StringRef RegName = Token.getString(); 1784 1785 for (StringRef Reg : Registers) { 1786 if (RegName.startswith(Reg)) { 1787 if (Reg.size() < RegName.size()) { 1788 unsigned RegNum; 1789 // A single register with an index: rXX 1790 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1791 return true; 1792 } else { 1793 // A range of registers: r[XX:YY]. 1794 if (NextToken.is(AsmToken::LBrac)) 1795 return true; 1796 } 1797 } 1798 } 1799 1800 return getSpecialRegForName(RegName); 1801 } 1802 1803 bool 1804 AMDGPUAsmParser::isRegister() 1805 { 1806 return isRegister(getToken(), peekToken()); 1807 } 1808 1809 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1810 unsigned &RegNum, unsigned &RegWidth, 1811 unsigned *DwordRegIndex) { 1812 if (DwordRegIndex) { *DwordRegIndex = 0; } 1813 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1814 if (getLexer().is(AsmToken::Identifier)) { 1815 StringRef RegName = Parser.getTok().getString(); 1816 if ((Reg = getSpecialRegForName(RegName))) { 1817 Parser.Lex(); 1818 RegKind = IS_SPECIAL; 1819 } else { 1820 unsigned RegNumIndex = 0; 1821 if (RegName[0] == 'v') { 1822 RegNumIndex = 1; 1823 RegKind = IS_VGPR; 1824 } else if (RegName[0] == 's') { 1825 RegNumIndex = 1; 1826 RegKind = IS_SGPR; 1827 } else if (RegName.startswith("ttmp")) { 1828 RegNumIndex = strlen("ttmp"); 1829 RegKind = IS_TTMP; 1830 } else { 1831 return false; 1832 } 1833 if (RegName.size() > RegNumIndex) { 1834 // Single 32-bit register: vXX. 1835 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1836 return false; 1837 Parser.Lex(); 1838 RegWidth = 1; 1839 } else { 1840 // Range of registers: v[XX:YY]. ":YY" is optional. 1841 Parser.Lex(); 1842 int64_t RegLo, RegHi; 1843 if (getLexer().isNot(AsmToken::LBrac)) 1844 return false; 1845 Parser.Lex(); 1846 1847 if (getParser().parseAbsoluteExpression(RegLo)) 1848 return false; 1849 1850 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1851 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1852 return false; 1853 Parser.Lex(); 1854 1855 if (isRBrace) { 1856 RegHi = RegLo; 1857 } else { 1858 if (getParser().parseAbsoluteExpression(RegHi)) 1859 return false; 1860 1861 if (getLexer().isNot(AsmToken::RBrac)) 1862 return false; 1863 Parser.Lex(); 1864 } 1865 RegNum = (unsigned) RegLo; 1866 RegWidth = (RegHi - RegLo) + 1; 1867 } 1868 } 1869 } else if (getLexer().is(AsmToken::LBrac)) { 1870 // List of consecutive registers: [s0,s1,s2,s3] 1871 Parser.Lex(); 1872 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1873 return false; 1874 if (RegWidth != 1) 1875 return false; 1876 RegisterKind RegKind1; 1877 unsigned Reg1, RegNum1, RegWidth1; 1878 do { 1879 if (getLexer().is(AsmToken::Comma)) { 1880 Parser.Lex(); 1881 } else if (getLexer().is(AsmToken::RBrac)) { 1882 Parser.Lex(); 1883 break; 1884 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1885 if (RegWidth1 != 1) { 1886 return false; 1887 } 1888 if (RegKind1 != RegKind) { 1889 return false; 1890 } 1891 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1892 return false; 1893 } 1894 } else { 1895 return false; 1896 } 1897 } while (true); 1898 } else { 1899 return false; 1900 } 1901 switch (RegKind) { 1902 case IS_SPECIAL: 1903 RegNum = 0; 1904 RegWidth = 1; 1905 break; 1906 case IS_VGPR: 1907 case IS_SGPR: 1908 case IS_TTMP: 1909 { 1910 unsigned Size = 1; 1911 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1912 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1913 Size = std::min(RegWidth, 4u); 1914 } 1915 if (RegNum % Size != 0) 1916 return false; 1917 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1918 RegNum = RegNum / Size; 1919 int RCID = getRegClass(RegKind, RegWidth); 1920 if (RCID == -1) 1921 return false; 1922 const MCRegisterClass RC = TRI->getRegClass(RCID); 1923 if (RegNum >= RC.getNumRegs()) 1924 return false; 1925 Reg = RC.getRegister(RegNum); 1926 break; 1927 } 1928 1929 default: 1930 llvm_unreachable("unexpected register kind"); 1931 } 1932 1933 if (!subtargetHasRegister(*TRI, Reg)) 1934 return false; 1935 return true; 1936 } 1937 1938 Optional<StringRef> 1939 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1940 switch (RegKind) { 1941 case IS_VGPR: 1942 return StringRef(".amdgcn.next_free_vgpr"); 1943 case IS_SGPR: 1944 return StringRef(".amdgcn.next_free_sgpr"); 1945 default: 1946 return None; 1947 } 1948 } 1949 1950 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1951 auto SymbolName = getGprCountSymbolName(RegKind); 1952 assert(SymbolName && "initializing invalid register kind"); 1953 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1954 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1955 } 1956 1957 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1958 unsigned DwordRegIndex, 1959 unsigned RegWidth) { 1960 // Symbols are only defined for GCN targets 1961 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1962 return true; 1963 1964 auto SymbolName = getGprCountSymbolName(RegKind); 1965 if (!SymbolName) 1966 return true; 1967 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1968 1969 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1970 int64_t OldCount; 1971 1972 if (!Sym->isVariable()) 1973 return !Error(getParser().getTok().getLoc(), 1974 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1975 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1976 return !Error( 1977 getParser().getTok().getLoc(), 1978 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1979 1980 if (OldCount <= NewMax) 1981 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1982 1983 return true; 1984 } 1985 1986 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1987 const auto &Tok = Parser.getTok(); 1988 SMLoc StartLoc = Tok.getLoc(); 1989 SMLoc EndLoc = Tok.getEndLoc(); 1990 RegisterKind RegKind; 1991 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1992 1993 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1994 //FIXME: improve error messages (bug 41303). 1995 Error(StartLoc, "not a valid operand."); 1996 return nullptr; 1997 } 1998 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1999 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2000 return nullptr; 2001 } else 2002 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2003 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 2004 } 2005 2006 bool 2007 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) { 2008 if (HasSP3AbsModifier) { 2009 // This is a workaround for handling expressions 2010 // as arguments of SP3 'abs' modifier, for example: 2011 // |1.0| 2012 // |-1| 2013 // |1+x| 2014 // This syntax is not compatible with syntax of standard 2015 // MC expressions (due to the trailing '|'). 2016 2017 SMLoc EndLoc; 2018 const MCExpr *Expr; 2019 SMLoc StartLoc = getLoc(); 2020 2021 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 2022 return true; 2023 } 2024 2025 if (!Expr->evaluateAsAbsolute(Val)) 2026 return Error(StartLoc, "expected absolute expression"); 2027 2028 return false; 2029 } 2030 2031 return getParser().parseAbsoluteExpression(Val); 2032 } 2033 2034 OperandMatchResultTy 2035 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2036 // TODO: add syntactic sugar for 1/(2*PI) 2037 2038 const auto& Tok = getToken(); 2039 const auto& NextTok = peekToken(); 2040 bool IsReal = Tok.is(AsmToken::Real); 2041 SMLoc S = Tok.getLoc(); 2042 bool Negate = false; 2043 2044 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2045 lex(); 2046 IsReal = true; 2047 Negate = true; 2048 } 2049 2050 if (IsReal) { 2051 // Floating-point expressions are not supported. 2052 // Can only allow floating-point literals with an 2053 // optional sign. 2054 2055 StringRef Num = getTokenStr(); 2056 lex(); 2057 2058 APFloat RealVal(APFloat::IEEEdouble()); 2059 auto roundMode = APFloat::rmNearestTiesToEven; 2060 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2061 return MatchOperand_ParseFail; 2062 } 2063 if (Negate) 2064 RealVal.changeSign(); 2065 2066 Operands.push_back( 2067 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2068 AMDGPUOperand::ImmTyNone, true)); 2069 2070 return MatchOperand_Success; 2071 2072 // FIXME: Should enable arbitrary expressions here 2073 } else if (Tok.is(AsmToken::Integer) || 2074 (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){ 2075 2076 int64_t IntVal; 2077 if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier)) 2078 return MatchOperand_ParseFail; 2079 2080 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2081 return MatchOperand_Success; 2082 } 2083 2084 return MatchOperand_NoMatch; 2085 } 2086 2087 OperandMatchResultTy 2088 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2089 if (!isRegister()) 2090 return MatchOperand_NoMatch; 2091 2092 if (auto R = parseRegister()) { 2093 assert(R->isReg()); 2094 R->Reg.IsForcedVOP3 = isForcedVOP3(); 2095 Operands.push_back(std::move(R)); 2096 return MatchOperand_Success; 2097 } 2098 return MatchOperand_ParseFail; 2099 } 2100 2101 OperandMatchResultTy 2102 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 2103 auto res = parseReg(Operands); 2104 return (res == MatchOperand_NoMatch)? 2105 parseImm(Operands, AbsMod) : 2106 res; 2107 } 2108 2109 // Check if the current token is an SP3 'neg' modifier. 2110 // Currently this modifier is allowed in the following context: 2111 // 2112 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2113 // 2. Before an 'abs' modifier: -abs(...) 2114 // 3. Before an SP3 'abs' modifier: -|...| 2115 // 2116 // In all other cases "-" is handled as a part 2117 // of an expression that follows the sign. 2118 // 2119 // Note: When "-" is followed by an integer literal, 2120 // this is interpreted as integer negation rather 2121 // than a floating-point NEG modifier applied to N. 2122 // Beside being contr-intuitive, such use of floating-point 2123 // NEG modifier would have resulted in different meaning 2124 // of integer literals used with VOP1/2/C and VOP3, 2125 // for example: 2126 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2127 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2128 // Negative fp literals with preceding "-" are 2129 // handled likewise for unifomtity 2130 // 2131 bool 2132 AMDGPUAsmParser::parseSP3NegModifier() { 2133 2134 AsmToken NextToken[2]; 2135 peekTokens(NextToken); 2136 2137 if (isToken(AsmToken::Minus) && 2138 (isRegister(NextToken[0], NextToken[1]) || 2139 NextToken[0].is(AsmToken::Pipe) || 2140 isId(NextToken[0], "abs"))) { 2141 lex(); 2142 return true; 2143 } 2144 2145 return false; 2146 } 2147 2148 OperandMatchResultTy 2149 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2150 bool AllowImm) { 2151 bool Negate, Negate2 = false, Abs = false, Abs2 = false; 2152 2153 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2154 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2155 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2156 return MatchOperand_ParseFail; 2157 } 2158 2159 Negate = parseSP3NegModifier(); 2160 2161 if (getLexer().getKind() == AsmToken::Identifier && 2162 Parser.getTok().getString() == "neg") { 2163 if (Negate) { 2164 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2165 return MatchOperand_ParseFail; 2166 } 2167 Parser.Lex(); 2168 Negate2 = true; 2169 if (getLexer().isNot(AsmToken::LParen)) { 2170 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2171 return MatchOperand_ParseFail; 2172 } 2173 Parser.Lex(); 2174 } 2175 2176 if (getLexer().getKind() == AsmToken::Identifier && 2177 Parser.getTok().getString() == "abs") { 2178 Parser.Lex(); 2179 Abs2 = true; 2180 if (getLexer().isNot(AsmToken::LParen)) { 2181 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2182 return MatchOperand_ParseFail; 2183 } 2184 Parser.Lex(); 2185 } 2186 2187 if (getLexer().getKind() == AsmToken::Pipe) { 2188 if (Abs2) { 2189 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2190 return MatchOperand_ParseFail; 2191 } 2192 Parser.Lex(); 2193 Abs = true; 2194 } 2195 2196 OperandMatchResultTy Res; 2197 if (AllowImm) { 2198 Res = parseRegOrImm(Operands, Abs); 2199 } else { 2200 Res = parseReg(Operands); 2201 } 2202 if (Res != MatchOperand_Success) { 2203 return (Negate || Negate2 || Abs || Abs2)? MatchOperand_ParseFail : Res; 2204 } 2205 2206 AMDGPUOperand::Modifiers Mods; 2207 if (Abs) { 2208 if (getLexer().getKind() != AsmToken::Pipe) { 2209 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2210 return MatchOperand_ParseFail; 2211 } 2212 Parser.Lex(); 2213 Mods.Abs = true; 2214 } 2215 if (Abs2) { 2216 if (getLexer().isNot(AsmToken::RParen)) { 2217 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2218 return MatchOperand_ParseFail; 2219 } 2220 Parser.Lex(); 2221 Mods.Abs = true; 2222 } 2223 2224 if (Negate) { 2225 Mods.Neg = true; 2226 } else if (Negate2) { 2227 if (getLexer().isNot(AsmToken::RParen)) { 2228 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2229 return MatchOperand_ParseFail; 2230 } 2231 Parser.Lex(); 2232 Mods.Neg = true; 2233 } 2234 2235 if (Mods.hasFPModifiers()) { 2236 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2237 Op.setModifiers(Mods); 2238 } 2239 return MatchOperand_Success; 2240 } 2241 2242 OperandMatchResultTy 2243 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2244 bool AllowImm) { 2245 bool Sext = false; 2246 2247 if (getLexer().getKind() == AsmToken::Identifier && 2248 Parser.getTok().getString() == "sext") { 2249 Parser.Lex(); 2250 Sext = true; 2251 if (getLexer().isNot(AsmToken::LParen)) { 2252 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2253 return MatchOperand_ParseFail; 2254 } 2255 Parser.Lex(); 2256 } 2257 2258 OperandMatchResultTy Res; 2259 if (AllowImm) { 2260 Res = parseRegOrImm(Operands); 2261 } else { 2262 Res = parseReg(Operands); 2263 } 2264 if (Res != MatchOperand_Success) { 2265 return Sext? MatchOperand_ParseFail : Res; 2266 } 2267 2268 AMDGPUOperand::Modifiers Mods; 2269 if (Sext) { 2270 if (getLexer().isNot(AsmToken::RParen)) { 2271 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2272 return MatchOperand_ParseFail; 2273 } 2274 Parser.Lex(); 2275 Mods.Sext = true; 2276 } 2277 2278 if (Mods.hasIntModifiers()) { 2279 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2280 Op.setModifiers(Mods); 2281 } 2282 2283 return MatchOperand_Success; 2284 } 2285 2286 OperandMatchResultTy 2287 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2288 return parseRegOrImmWithFPInputMods(Operands, false); 2289 } 2290 2291 OperandMatchResultTy 2292 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2293 return parseRegOrImmWithIntInputMods(Operands, false); 2294 } 2295 2296 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2297 auto Loc = getLoc(); 2298 if (trySkipId("off")) { 2299 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2300 AMDGPUOperand::ImmTyOff, false)); 2301 return MatchOperand_Success; 2302 } 2303 2304 if (!isRegister()) 2305 return MatchOperand_NoMatch; 2306 2307 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2308 if (Reg) { 2309 Operands.push_back(std::move(Reg)); 2310 return MatchOperand_Success; 2311 } 2312 2313 return MatchOperand_ParseFail; 2314 2315 } 2316 2317 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2318 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2319 2320 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2321 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2322 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2323 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2324 return Match_InvalidOperand; 2325 2326 if ((TSFlags & SIInstrFlags::VOP3) && 2327 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2328 getForcedEncodingSize() != 64) 2329 return Match_PreferE32; 2330 2331 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2332 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2333 // v_mac_f32/16 allow only dst_sel == DWORD; 2334 auto OpNum = 2335 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2336 const auto &Op = Inst.getOperand(OpNum); 2337 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2338 return Match_InvalidOperand; 2339 } 2340 } 2341 2342 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2343 // FIXME: Produces error without correct column reported. 2344 auto OpNum = 2345 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2346 const auto &Op = Inst.getOperand(OpNum); 2347 if (Op.getImm() != 0) 2348 return Match_InvalidOperand; 2349 } 2350 2351 return Match_Success; 2352 } 2353 2354 // What asm variants we should check 2355 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2356 if (getForcedEncodingSize() == 32) { 2357 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2358 return makeArrayRef(Variants); 2359 } 2360 2361 if (isForcedVOP3()) { 2362 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2363 return makeArrayRef(Variants); 2364 } 2365 2366 if (isForcedSDWA()) { 2367 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2368 AMDGPUAsmVariants::SDWA9}; 2369 return makeArrayRef(Variants); 2370 } 2371 2372 if (isForcedDPP()) { 2373 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2374 return makeArrayRef(Variants); 2375 } 2376 2377 static const unsigned Variants[] = { 2378 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2379 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2380 }; 2381 2382 return makeArrayRef(Variants); 2383 } 2384 2385 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2386 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2387 const unsigned Num = Desc.getNumImplicitUses(); 2388 for (unsigned i = 0; i < Num; ++i) { 2389 unsigned Reg = Desc.ImplicitUses[i]; 2390 switch (Reg) { 2391 case AMDGPU::FLAT_SCR: 2392 case AMDGPU::VCC: 2393 case AMDGPU::M0: 2394 return Reg; 2395 default: 2396 break; 2397 } 2398 } 2399 return AMDGPU::NoRegister; 2400 } 2401 2402 // NB: This code is correct only when used to check constant 2403 // bus limitations because GFX7 support no f16 inline constants. 2404 // Note that there are no cases when a GFX7 opcode violates 2405 // constant bus limitations due to the use of an f16 constant. 2406 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2407 unsigned OpIdx) const { 2408 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2409 2410 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2411 return false; 2412 } 2413 2414 const MCOperand &MO = Inst.getOperand(OpIdx); 2415 2416 int64_t Val = MO.getImm(); 2417 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2418 2419 switch (OpSize) { // expected operand size 2420 case 8: 2421 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2422 case 4: 2423 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2424 case 2: { 2425 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2426 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2427 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2428 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2429 } else { 2430 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2431 } 2432 } 2433 default: 2434 llvm_unreachable("invalid operand size"); 2435 } 2436 } 2437 2438 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2439 const MCOperand &MO = Inst.getOperand(OpIdx); 2440 if (MO.isImm()) { 2441 return !isInlineConstant(Inst, OpIdx); 2442 } 2443 return !MO.isReg() || 2444 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2445 } 2446 2447 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2448 const unsigned Opcode = Inst.getOpcode(); 2449 const MCInstrDesc &Desc = MII.get(Opcode); 2450 unsigned ConstantBusUseCount = 0; 2451 2452 if (Desc.TSFlags & 2453 (SIInstrFlags::VOPC | 2454 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2455 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2456 SIInstrFlags::SDWA)) { 2457 // Check special imm operands (used by madmk, etc) 2458 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2459 ++ConstantBusUseCount; 2460 } 2461 2462 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2463 if (SGPRUsed != AMDGPU::NoRegister) { 2464 ++ConstantBusUseCount; 2465 } 2466 2467 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2468 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2469 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2470 2471 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2472 2473 for (int OpIdx : OpIndices) { 2474 if (OpIdx == -1) break; 2475 2476 const MCOperand &MO = Inst.getOperand(OpIdx); 2477 if (usesConstantBus(Inst, OpIdx)) { 2478 if (MO.isReg()) { 2479 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2480 // Pairs of registers with a partial intersections like these 2481 // s0, s[0:1] 2482 // flat_scratch_lo, flat_scratch 2483 // flat_scratch_lo, flat_scratch_hi 2484 // are theoretically valid but they are disabled anyway. 2485 // Note that this code mimics SIInstrInfo::verifyInstruction 2486 if (Reg != SGPRUsed) { 2487 ++ConstantBusUseCount; 2488 } 2489 SGPRUsed = Reg; 2490 } else { // Expression or a literal 2491 ++ConstantBusUseCount; 2492 } 2493 } 2494 } 2495 } 2496 2497 return ConstantBusUseCount <= 1; 2498 } 2499 2500 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2501 const unsigned Opcode = Inst.getOpcode(); 2502 const MCInstrDesc &Desc = MII.get(Opcode); 2503 2504 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2505 if (DstIdx == -1 || 2506 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2507 return true; 2508 } 2509 2510 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2511 2512 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2513 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2514 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2515 2516 assert(DstIdx != -1); 2517 const MCOperand &Dst = Inst.getOperand(DstIdx); 2518 assert(Dst.isReg()); 2519 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2520 2521 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2522 2523 for (int SrcIdx : SrcIndices) { 2524 if (SrcIdx == -1) break; 2525 const MCOperand &Src = Inst.getOperand(SrcIdx); 2526 if (Src.isReg()) { 2527 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2528 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2529 return false; 2530 } 2531 } 2532 } 2533 2534 return true; 2535 } 2536 2537 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2538 2539 const unsigned Opc = Inst.getOpcode(); 2540 const MCInstrDesc &Desc = MII.get(Opc); 2541 2542 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2543 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2544 assert(ClampIdx != -1); 2545 return Inst.getOperand(ClampIdx).getImm() == 0; 2546 } 2547 2548 return true; 2549 } 2550 2551 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2552 2553 const unsigned Opc = Inst.getOpcode(); 2554 const MCInstrDesc &Desc = MII.get(Opc); 2555 2556 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2557 return true; 2558 2559 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2560 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2561 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2562 2563 assert(VDataIdx != -1); 2564 assert(DMaskIdx != -1); 2565 assert(TFEIdx != -1); 2566 2567 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2568 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2569 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2570 if (DMask == 0) 2571 DMask = 1; 2572 2573 unsigned DataSize = 2574 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2575 if (hasPackedD16()) { 2576 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2577 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2578 DataSize = (DataSize + 1) / 2; 2579 } 2580 2581 return (VDataSize / 4) == DataSize + TFESize; 2582 } 2583 2584 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2585 2586 const unsigned Opc = Inst.getOpcode(); 2587 const MCInstrDesc &Desc = MII.get(Opc); 2588 2589 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2590 return true; 2591 if (!Desc.mayLoad() || !Desc.mayStore()) 2592 return true; // Not atomic 2593 2594 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2595 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2596 2597 // This is an incomplete check because image_atomic_cmpswap 2598 // may only use 0x3 and 0xf while other atomic operations 2599 // may use 0x1 and 0x3. However these limitations are 2600 // verified when we check that dmask matches dst size. 2601 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2602 } 2603 2604 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2605 2606 const unsigned Opc = Inst.getOpcode(); 2607 const MCInstrDesc &Desc = MII.get(Opc); 2608 2609 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2610 return true; 2611 2612 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2613 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2614 2615 // GATHER4 instructions use dmask in a different fashion compared to 2616 // other MIMG instructions. The only useful DMASK values are 2617 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2618 // (red,red,red,red) etc.) The ISA document doesn't mention 2619 // this. 2620 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2621 } 2622 2623 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2624 2625 const unsigned Opc = Inst.getOpcode(); 2626 const MCInstrDesc &Desc = MII.get(Opc); 2627 2628 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2629 return true; 2630 2631 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2632 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2633 if (isCI() || isSI()) 2634 return false; 2635 } 2636 2637 return true; 2638 } 2639 2640 static bool IsRevOpcode(const unsigned Opcode) 2641 { 2642 switch (Opcode) { 2643 case AMDGPU::V_SUBREV_F32_e32: 2644 case AMDGPU::V_SUBREV_F32_e64: 2645 case AMDGPU::V_SUBREV_F32_e32_si: 2646 case AMDGPU::V_SUBREV_F32_e32_vi: 2647 case AMDGPU::V_SUBREV_F32_e64_si: 2648 case AMDGPU::V_SUBREV_F32_e64_vi: 2649 case AMDGPU::V_SUBREV_I32_e32: 2650 case AMDGPU::V_SUBREV_I32_e64: 2651 case AMDGPU::V_SUBREV_I32_e32_si: 2652 case AMDGPU::V_SUBREV_I32_e64_si: 2653 case AMDGPU::V_SUBBREV_U32_e32: 2654 case AMDGPU::V_SUBBREV_U32_e64: 2655 case AMDGPU::V_SUBBREV_U32_e32_si: 2656 case AMDGPU::V_SUBBREV_U32_e32_vi: 2657 case AMDGPU::V_SUBBREV_U32_e64_si: 2658 case AMDGPU::V_SUBBREV_U32_e64_vi: 2659 case AMDGPU::V_SUBREV_U32_e32: 2660 case AMDGPU::V_SUBREV_U32_e64: 2661 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2662 case AMDGPU::V_SUBREV_U32_e32_vi: 2663 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2664 case AMDGPU::V_SUBREV_U32_e64_vi: 2665 case AMDGPU::V_SUBREV_F16_e32: 2666 case AMDGPU::V_SUBREV_F16_e64: 2667 case AMDGPU::V_SUBREV_F16_e32_vi: 2668 case AMDGPU::V_SUBREV_F16_e64_vi: 2669 case AMDGPU::V_SUBREV_U16_e32: 2670 case AMDGPU::V_SUBREV_U16_e64: 2671 case AMDGPU::V_SUBREV_U16_e32_vi: 2672 case AMDGPU::V_SUBREV_U16_e64_vi: 2673 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2674 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2675 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2676 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2677 case AMDGPU::V_LSHLREV_B32_e32_si: 2678 case AMDGPU::V_LSHLREV_B32_e64_si: 2679 case AMDGPU::V_LSHLREV_B16_e32_vi: 2680 case AMDGPU::V_LSHLREV_B16_e64_vi: 2681 case AMDGPU::V_LSHLREV_B32_e32_vi: 2682 case AMDGPU::V_LSHLREV_B32_e64_vi: 2683 case AMDGPU::V_LSHLREV_B64_vi: 2684 case AMDGPU::V_LSHRREV_B32_e32_si: 2685 case AMDGPU::V_LSHRREV_B32_e64_si: 2686 case AMDGPU::V_LSHRREV_B16_e32_vi: 2687 case AMDGPU::V_LSHRREV_B16_e64_vi: 2688 case AMDGPU::V_LSHRREV_B32_e32_vi: 2689 case AMDGPU::V_LSHRREV_B32_e64_vi: 2690 case AMDGPU::V_LSHRREV_B64_vi: 2691 case AMDGPU::V_ASHRREV_I32_e64_si: 2692 case AMDGPU::V_ASHRREV_I32_e32_si: 2693 case AMDGPU::V_ASHRREV_I16_e32_vi: 2694 case AMDGPU::V_ASHRREV_I16_e64_vi: 2695 case AMDGPU::V_ASHRREV_I32_e32_vi: 2696 case AMDGPU::V_ASHRREV_I32_e64_vi: 2697 case AMDGPU::V_ASHRREV_I64_vi: 2698 case AMDGPU::V_PK_LSHLREV_B16_vi: 2699 case AMDGPU::V_PK_LSHRREV_B16_vi: 2700 case AMDGPU::V_PK_ASHRREV_I16_vi: 2701 return true; 2702 default: 2703 return false; 2704 } 2705 } 2706 2707 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2708 2709 using namespace SIInstrFlags; 2710 const unsigned Opcode = Inst.getOpcode(); 2711 const MCInstrDesc &Desc = MII.get(Opcode); 2712 2713 // lds_direct register is defined so that it can be used 2714 // with 9-bit operands only. Ignore encodings which do not accept these. 2715 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2716 return true; 2717 2718 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2719 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2720 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2721 2722 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2723 2724 // lds_direct cannot be specified as either src1 or src2. 2725 for (int SrcIdx : SrcIndices) { 2726 if (SrcIdx == -1) break; 2727 const MCOperand &Src = Inst.getOperand(SrcIdx); 2728 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2729 return false; 2730 } 2731 } 2732 2733 if (Src0Idx == -1) 2734 return true; 2735 2736 const MCOperand &Src = Inst.getOperand(Src0Idx); 2737 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2738 return true; 2739 2740 // lds_direct is specified as src0. Check additional limitations. 2741 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2742 } 2743 2744 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2745 unsigned Opcode = Inst.getOpcode(); 2746 const MCInstrDesc &Desc = MII.get(Opcode); 2747 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2748 return true; 2749 2750 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2751 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2752 2753 const int OpIndices[] = { Src0Idx, Src1Idx }; 2754 2755 unsigned NumLiterals = 0; 2756 uint32_t LiteralValue; 2757 2758 for (int OpIdx : OpIndices) { 2759 if (OpIdx == -1) break; 2760 2761 const MCOperand &MO = Inst.getOperand(OpIdx); 2762 if (MO.isImm() && 2763 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2764 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2765 !isInlineConstant(Inst, OpIdx)) { 2766 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2767 if (NumLiterals == 0 || LiteralValue != Value) { 2768 LiteralValue = Value; 2769 ++NumLiterals; 2770 } 2771 } 2772 } 2773 2774 return NumLiterals <= 1; 2775 } 2776 2777 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2778 const SMLoc &IDLoc) { 2779 if (!validateLdsDirect(Inst)) { 2780 Error(IDLoc, 2781 "invalid use of lds_direct"); 2782 return false; 2783 } 2784 if (!validateSOPLiteral(Inst)) { 2785 Error(IDLoc, 2786 "only one literal operand is allowed"); 2787 return false; 2788 } 2789 if (!validateConstantBusLimitations(Inst)) { 2790 Error(IDLoc, 2791 "invalid operand (violates constant bus restrictions)"); 2792 return false; 2793 } 2794 if (!validateEarlyClobberLimitations(Inst)) { 2795 Error(IDLoc, 2796 "destination must be different than all sources"); 2797 return false; 2798 } 2799 if (!validateIntClampSupported(Inst)) { 2800 Error(IDLoc, 2801 "integer clamping is not supported on this GPU"); 2802 return false; 2803 } 2804 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2805 if (!validateMIMGD16(Inst)) { 2806 Error(IDLoc, 2807 "d16 modifier is not supported on this GPU"); 2808 return false; 2809 } 2810 if (!validateMIMGDataSize(Inst)) { 2811 Error(IDLoc, 2812 "image data size does not match dmask and tfe"); 2813 return false; 2814 } 2815 if (!validateMIMGAtomicDMask(Inst)) { 2816 Error(IDLoc, 2817 "invalid atomic image dmask"); 2818 return false; 2819 } 2820 if (!validateMIMGGatherDMask(Inst)) { 2821 Error(IDLoc, 2822 "invalid image_gather dmask: only one bit must be set"); 2823 return false; 2824 } 2825 2826 return true; 2827 } 2828 2829 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2830 const FeatureBitset &FBS, 2831 unsigned VariantID = 0); 2832 2833 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2834 OperandVector &Operands, 2835 MCStreamer &Out, 2836 uint64_t &ErrorInfo, 2837 bool MatchingInlineAsm) { 2838 MCInst Inst; 2839 unsigned Result = Match_Success; 2840 for (auto Variant : getMatchedVariants()) { 2841 uint64_t EI; 2842 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2843 Variant); 2844 // We order match statuses from least to most specific. We use most specific 2845 // status as resulting 2846 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2847 if ((R == Match_Success) || 2848 (R == Match_PreferE32) || 2849 (R == Match_MissingFeature && Result != Match_PreferE32) || 2850 (R == Match_InvalidOperand && Result != Match_MissingFeature 2851 && Result != Match_PreferE32) || 2852 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2853 && Result != Match_MissingFeature 2854 && Result != Match_PreferE32)) { 2855 Result = R; 2856 ErrorInfo = EI; 2857 } 2858 if (R == Match_Success) 2859 break; 2860 } 2861 2862 switch (Result) { 2863 default: break; 2864 case Match_Success: 2865 if (!validateInstruction(Inst, IDLoc)) { 2866 return true; 2867 } 2868 Inst.setLoc(IDLoc); 2869 Out.EmitInstruction(Inst, getSTI()); 2870 return false; 2871 2872 case Match_MissingFeature: 2873 return Error(IDLoc, "instruction not supported on this GPU"); 2874 2875 case Match_MnemonicFail: { 2876 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2877 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2878 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2879 return Error(IDLoc, "invalid instruction" + Suggestion, 2880 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2881 } 2882 2883 case Match_InvalidOperand: { 2884 SMLoc ErrorLoc = IDLoc; 2885 if (ErrorInfo != ~0ULL) { 2886 if (ErrorInfo >= Operands.size()) { 2887 return Error(IDLoc, "too few operands for instruction"); 2888 } 2889 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2890 if (ErrorLoc == SMLoc()) 2891 ErrorLoc = IDLoc; 2892 } 2893 return Error(ErrorLoc, "invalid operand for instruction"); 2894 } 2895 2896 case Match_PreferE32: 2897 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2898 "should be encoded as e32"); 2899 } 2900 llvm_unreachable("Implement any new match types added!"); 2901 } 2902 2903 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2904 int64_t Tmp = -1; 2905 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2906 return true; 2907 } 2908 if (getParser().parseAbsoluteExpression(Tmp)) { 2909 return true; 2910 } 2911 Ret = static_cast<uint32_t>(Tmp); 2912 return false; 2913 } 2914 2915 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2916 uint32_t &Minor) { 2917 if (ParseAsAbsoluteExpression(Major)) 2918 return TokError("invalid major version"); 2919 2920 if (getLexer().isNot(AsmToken::Comma)) 2921 return TokError("minor version number required, comma expected"); 2922 Lex(); 2923 2924 if (ParseAsAbsoluteExpression(Minor)) 2925 return TokError("invalid minor version"); 2926 2927 return false; 2928 } 2929 2930 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2931 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2932 return TokError("directive only supported for amdgcn architecture"); 2933 2934 std::string Target; 2935 2936 SMLoc TargetStart = getTok().getLoc(); 2937 if (getParser().parseEscapedString(Target)) 2938 return true; 2939 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2940 2941 std::string ExpectedTarget; 2942 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2943 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2944 2945 if (Target != ExpectedTargetOS.str()) 2946 return getParser().Error(TargetRange.Start, "target must match options", 2947 TargetRange); 2948 2949 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2950 return false; 2951 } 2952 2953 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2954 return getParser().Error(Range.Start, "value out of range", Range); 2955 } 2956 2957 bool AMDGPUAsmParser::calculateGPRBlocks( 2958 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2959 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2960 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2961 unsigned &SGPRBlocks) { 2962 // TODO(scott.linder): These calculations are duplicated from 2963 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2964 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2965 2966 unsigned NumVGPRs = NextFreeVGPR; 2967 unsigned NumSGPRs = NextFreeSGPR; 2968 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2969 2970 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2971 NumSGPRs > MaxAddressableNumSGPRs) 2972 return OutOfRangeError(SGPRRange); 2973 2974 NumSGPRs += 2975 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2976 2977 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2978 NumSGPRs > MaxAddressableNumSGPRs) 2979 return OutOfRangeError(SGPRRange); 2980 2981 if (Features.test(FeatureSGPRInitBug)) 2982 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2983 2984 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2985 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2986 2987 return false; 2988 } 2989 2990 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2991 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2992 return TokError("directive only supported for amdgcn architecture"); 2993 2994 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2995 return TokError("directive only supported for amdhsa OS"); 2996 2997 StringRef KernelName; 2998 if (getParser().parseIdentifier(KernelName)) 2999 return true; 3000 3001 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 3002 3003 StringSet<> Seen; 3004 3005 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3006 3007 SMRange VGPRRange; 3008 uint64_t NextFreeVGPR = 0; 3009 SMRange SGPRRange; 3010 uint64_t NextFreeSGPR = 0; 3011 unsigned UserSGPRCount = 0; 3012 bool ReserveVCC = true; 3013 bool ReserveFlatScr = true; 3014 bool ReserveXNACK = hasXNACK(); 3015 3016 while (true) { 3017 while (getLexer().is(AsmToken::EndOfStatement)) 3018 Lex(); 3019 3020 if (getLexer().isNot(AsmToken::Identifier)) 3021 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3022 3023 StringRef ID = getTok().getIdentifier(); 3024 SMRange IDRange = getTok().getLocRange(); 3025 Lex(); 3026 3027 if (ID == ".end_amdhsa_kernel") 3028 break; 3029 3030 if (Seen.find(ID) != Seen.end()) 3031 return TokError(".amdhsa_ directives cannot be repeated"); 3032 Seen.insert(ID); 3033 3034 SMLoc ValStart = getTok().getLoc(); 3035 int64_t IVal; 3036 if (getParser().parseAbsoluteExpression(IVal)) 3037 return true; 3038 SMLoc ValEnd = getTok().getLoc(); 3039 SMRange ValRange = SMRange(ValStart, ValEnd); 3040 3041 if (IVal < 0) 3042 return OutOfRangeError(ValRange); 3043 3044 uint64_t Val = IVal; 3045 3046 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3047 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3048 return OutOfRangeError(RANGE); \ 3049 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3050 3051 if (ID == ".amdhsa_group_segment_fixed_size") { 3052 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3053 return OutOfRangeError(ValRange); 3054 KD.group_segment_fixed_size = Val; 3055 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3056 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3057 return OutOfRangeError(ValRange); 3058 KD.private_segment_fixed_size = Val; 3059 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3060 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3061 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3062 Val, ValRange); 3063 UserSGPRCount += 4; 3064 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3065 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3066 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3067 ValRange); 3068 UserSGPRCount += 2; 3069 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3070 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3071 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3072 ValRange); 3073 UserSGPRCount += 2; 3074 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3075 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3077 Val, ValRange); 3078 UserSGPRCount += 2; 3079 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3080 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3081 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3082 ValRange); 3083 UserSGPRCount += 2; 3084 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3085 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3086 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3087 ValRange); 3088 UserSGPRCount += 2; 3089 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3090 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3091 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3092 Val, ValRange); 3093 UserSGPRCount += 1; 3094 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3095 PARSE_BITS_ENTRY( 3096 KD.compute_pgm_rsrc2, 3097 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3098 ValRange); 3099 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3100 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3101 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3102 ValRange); 3103 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3104 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3105 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3106 ValRange); 3107 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3109 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3110 ValRange); 3111 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3112 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3113 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3114 ValRange); 3115 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3116 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3117 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3118 ValRange); 3119 } else if (ID == ".amdhsa_next_free_vgpr") { 3120 VGPRRange = ValRange; 3121 NextFreeVGPR = Val; 3122 } else if (ID == ".amdhsa_next_free_sgpr") { 3123 SGPRRange = ValRange; 3124 NextFreeSGPR = Val; 3125 } else if (ID == ".amdhsa_reserve_vcc") { 3126 if (!isUInt<1>(Val)) 3127 return OutOfRangeError(ValRange); 3128 ReserveVCC = Val; 3129 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3130 if (IVersion.Major < 7) 3131 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3132 IDRange); 3133 if (!isUInt<1>(Val)) 3134 return OutOfRangeError(ValRange); 3135 ReserveFlatScr = Val; 3136 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3137 if (IVersion.Major < 8) 3138 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3139 IDRange); 3140 if (!isUInt<1>(Val)) 3141 return OutOfRangeError(ValRange); 3142 ReserveXNACK = Val; 3143 } else if (ID == ".amdhsa_float_round_mode_32") { 3144 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3145 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3146 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3148 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3149 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3150 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3151 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3152 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3153 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3154 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3155 ValRange); 3156 } else if (ID == ".amdhsa_dx10_clamp") { 3157 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3158 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3159 } else if (ID == ".amdhsa_ieee_mode") { 3160 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3161 Val, ValRange); 3162 } else if (ID == ".amdhsa_fp16_overflow") { 3163 if (IVersion.Major < 9) 3164 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3165 IDRange); 3166 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3167 ValRange); 3168 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3169 PARSE_BITS_ENTRY( 3170 KD.compute_pgm_rsrc2, 3171 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3172 ValRange); 3173 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3175 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3176 Val, ValRange); 3177 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3178 PARSE_BITS_ENTRY( 3179 KD.compute_pgm_rsrc2, 3180 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3181 ValRange); 3182 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3183 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3184 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3185 Val, ValRange); 3186 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3187 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3188 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3189 Val, ValRange); 3190 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3191 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3192 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3193 Val, ValRange); 3194 } else if (ID == ".amdhsa_exception_int_div_zero") { 3195 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3196 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3197 Val, ValRange); 3198 } else { 3199 return getParser().Error(IDRange.Start, 3200 "unknown .amdhsa_kernel directive", IDRange); 3201 } 3202 3203 #undef PARSE_BITS_ENTRY 3204 } 3205 3206 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3207 return TokError(".amdhsa_next_free_vgpr directive is required"); 3208 3209 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3210 return TokError(".amdhsa_next_free_sgpr directive is required"); 3211 3212 unsigned VGPRBlocks; 3213 unsigned SGPRBlocks; 3214 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3215 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3216 SGPRRange, VGPRBlocks, SGPRBlocks)) 3217 return true; 3218 3219 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3220 VGPRBlocks)) 3221 return OutOfRangeError(VGPRRange); 3222 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3223 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3224 3225 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3226 SGPRBlocks)) 3227 return OutOfRangeError(SGPRRange); 3228 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3229 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3230 SGPRBlocks); 3231 3232 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3233 return TokError("too many user SGPRs enabled"); 3234 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3235 UserSGPRCount); 3236 3237 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3238 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3239 ReserveFlatScr, ReserveXNACK); 3240 return false; 3241 } 3242 3243 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3244 uint32_t Major; 3245 uint32_t Minor; 3246 3247 if (ParseDirectiveMajorMinor(Major, Minor)) 3248 return true; 3249 3250 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3251 return false; 3252 } 3253 3254 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3255 uint32_t Major; 3256 uint32_t Minor; 3257 uint32_t Stepping; 3258 StringRef VendorName; 3259 StringRef ArchName; 3260 3261 // If this directive has no arguments, then use the ISA version for the 3262 // targeted GPU. 3263 if (getLexer().is(AsmToken::EndOfStatement)) { 3264 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3265 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3266 ISA.Stepping, 3267 "AMD", "AMDGPU"); 3268 return false; 3269 } 3270 3271 if (ParseDirectiveMajorMinor(Major, Minor)) 3272 return true; 3273 3274 if (getLexer().isNot(AsmToken::Comma)) 3275 return TokError("stepping version number required, comma expected"); 3276 Lex(); 3277 3278 if (ParseAsAbsoluteExpression(Stepping)) 3279 return TokError("invalid stepping version"); 3280 3281 if (getLexer().isNot(AsmToken::Comma)) 3282 return TokError("vendor name required, comma expected"); 3283 Lex(); 3284 3285 if (getLexer().isNot(AsmToken::String)) 3286 return TokError("invalid vendor name"); 3287 3288 VendorName = getLexer().getTok().getStringContents(); 3289 Lex(); 3290 3291 if (getLexer().isNot(AsmToken::Comma)) 3292 return TokError("arch name required, comma expected"); 3293 Lex(); 3294 3295 if (getLexer().isNot(AsmToken::String)) 3296 return TokError("invalid arch name"); 3297 3298 ArchName = getLexer().getTok().getStringContents(); 3299 Lex(); 3300 3301 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3302 VendorName, ArchName); 3303 return false; 3304 } 3305 3306 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3307 amd_kernel_code_t &Header) { 3308 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3309 // assembly for backwards compatibility. 3310 if (ID == "max_scratch_backing_memory_byte_size") { 3311 Parser.eatToEndOfStatement(); 3312 return false; 3313 } 3314 3315 SmallString<40> ErrStr; 3316 raw_svector_ostream Err(ErrStr); 3317 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3318 return TokError(Err.str()); 3319 } 3320 Lex(); 3321 return false; 3322 } 3323 3324 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3325 amd_kernel_code_t Header; 3326 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3327 3328 while (true) { 3329 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3330 // will set the current token to EndOfStatement. 3331 while(getLexer().is(AsmToken::EndOfStatement)) 3332 Lex(); 3333 3334 if (getLexer().isNot(AsmToken::Identifier)) 3335 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3336 3337 StringRef ID = getLexer().getTok().getIdentifier(); 3338 Lex(); 3339 3340 if (ID == ".end_amd_kernel_code_t") 3341 break; 3342 3343 if (ParseAMDKernelCodeTValue(ID, Header)) 3344 return true; 3345 } 3346 3347 getTargetStreamer().EmitAMDKernelCodeT(Header); 3348 3349 return false; 3350 } 3351 3352 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3353 if (getLexer().isNot(AsmToken::Identifier)) 3354 return TokError("expected symbol name"); 3355 3356 StringRef KernelName = Parser.getTok().getString(); 3357 3358 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3359 ELF::STT_AMDGPU_HSA_KERNEL); 3360 Lex(); 3361 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3362 KernelScope.initialize(getContext()); 3363 return false; 3364 } 3365 3366 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3367 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3368 return Error(getParser().getTok().getLoc(), 3369 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3370 "architectures"); 3371 } 3372 3373 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3374 3375 std::string ISAVersionStringFromSTI; 3376 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3377 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3378 3379 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3380 return Error(getParser().getTok().getLoc(), 3381 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3382 "arguments specified through the command line"); 3383 } 3384 3385 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3386 Lex(); 3387 3388 return false; 3389 } 3390 3391 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3392 const char *AssemblerDirectiveBegin; 3393 const char *AssemblerDirectiveEnd; 3394 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3395 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3396 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3397 HSAMD::V3::AssemblerDirectiveEnd) 3398 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3399 HSAMD::AssemblerDirectiveEnd); 3400 3401 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3402 return Error(getParser().getTok().getLoc(), 3403 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3404 "not available on non-amdhsa OSes")).str()); 3405 } 3406 3407 std::string HSAMetadataString; 3408 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3409 HSAMetadataString)) 3410 return true; 3411 3412 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3413 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3414 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3415 } else { 3416 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3417 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3418 } 3419 3420 return false; 3421 } 3422 3423 /// Common code to parse out a block of text (typically YAML) between start and 3424 /// end directives. 3425 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3426 const char *AssemblerDirectiveEnd, 3427 std::string &CollectString) { 3428 3429 raw_string_ostream CollectStream(CollectString); 3430 3431 getLexer().setSkipSpace(false); 3432 3433 bool FoundEnd = false; 3434 while (!getLexer().is(AsmToken::Eof)) { 3435 while (getLexer().is(AsmToken::Space)) { 3436 CollectStream << getLexer().getTok().getString(); 3437 Lex(); 3438 } 3439 3440 if (getLexer().is(AsmToken::Identifier)) { 3441 StringRef ID = getLexer().getTok().getIdentifier(); 3442 if (ID == AssemblerDirectiveEnd) { 3443 Lex(); 3444 FoundEnd = true; 3445 break; 3446 } 3447 } 3448 3449 CollectStream << Parser.parseStringToEndOfStatement() 3450 << getContext().getAsmInfo()->getSeparatorString(); 3451 3452 Parser.eatToEndOfStatement(); 3453 } 3454 3455 getLexer().setSkipSpace(true); 3456 3457 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3458 return TokError(Twine("expected directive ") + 3459 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3460 } 3461 3462 CollectStream.flush(); 3463 return false; 3464 } 3465 3466 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3467 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3468 std::string String; 3469 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3470 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3471 return true; 3472 3473 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3474 if (!PALMetadata->setFromString(String)) 3475 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3476 return false; 3477 } 3478 3479 /// Parse the assembler directive for old linear-format PAL metadata. 3480 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3481 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3482 return Error(getParser().getTok().getLoc(), 3483 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3484 "not available on non-amdpal OSes")).str()); 3485 } 3486 3487 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3488 PALMetadata->setLegacy(); 3489 for (;;) { 3490 uint32_t Key, Value; 3491 if (ParseAsAbsoluteExpression(Key)) { 3492 return TokError(Twine("invalid value in ") + 3493 Twine(PALMD::AssemblerDirective)); 3494 } 3495 if (getLexer().isNot(AsmToken::Comma)) { 3496 return TokError(Twine("expected an even number of values in ") + 3497 Twine(PALMD::AssemblerDirective)); 3498 } 3499 Lex(); 3500 if (ParseAsAbsoluteExpression(Value)) { 3501 return TokError(Twine("invalid value in ") + 3502 Twine(PALMD::AssemblerDirective)); 3503 } 3504 PALMetadata->setRegister(Key, Value); 3505 if (getLexer().isNot(AsmToken::Comma)) 3506 break; 3507 Lex(); 3508 } 3509 return false; 3510 } 3511 3512 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3513 StringRef IDVal = DirectiveID.getString(); 3514 3515 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3516 if (IDVal == ".amdgcn_target") 3517 return ParseDirectiveAMDGCNTarget(); 3518 3519 if (IDVal == ".amdhsa_kernel") 3520 return ParseDirectiveAMDHSAKernel(); 3521 3522 // TODO: Restructure/combine with PAL metadata directive. 3523 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3524 return ParseDirectiveHSAMetadata(); 3525 } else { 3526 if (IDVal == ".hsa_code_object_version") 3527 return ParseDirectiveHSACodeObjectVersion(); 3528 3529 if (IDVal == ".hsa_code_object_isa") 3530 return ParseDirectiveHSACodeObjectISA(); 3531 3532 if (IDVal == ".amd_kernel_code_t") 3533 return ParseDirectiveAMDKernelCodeT(); 3534 3535 if (IDVal == ".amdgpu_hsa_kernel") 3536 return ParseDirectiveAMDGPUHsaKernel(); 3537 3538 if (IDVal == ".amd_amdgpu_isa") 3539 return ParseDirectiveISAVersion(); 3540 3541 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3542 return ParseDirectiveHSAMetadata(); 3543 } 3544 3545 if (IDVal == PALMD::AssemblerDirectiveBegin) 3546 return ParseDirectivePALMetadataBegin(); 3547 3548 if (IDVal == PALMD::AssemblerDirective) 3549 return ParseDirectivePALMetadata(); 3550 3551 return true; 3552 } 3553 3554 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3555 unsigned RegNo) const { 3556 3557 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3558 R.isValid(); ++R) { 3559 if (*R == RegNo) 3560 return isGFX9(); 3561 } 3562 3563 switch (RegNo) { 3564 case AMDGPU::TBA: 3565 case AMDGPU::TBA_LO: 3566 case AMDGPU::TBA_HI: 3567 case AMDGPU::TMA: 3568 case AMDGPU::TMA_LO: 3569 case AMDGPU::TMA_HI: 3570 return !isGFX9(); 3571 case AMDGPU::XNACK_MASK: 3572 case AMDGPU::XNACK_MASK_LO: 3573 case AMDGPU::XNACK_MASK_HI: 3574 return !isCI() && !isSI() && hasXNACK(); 3575 default: 3576 break; 3577 } 3578 3579 if (isInlineValue(RegNo)) 3580 return !isCI() && !isSI() && !isVI(); 3581 3582 if (isCI()) 3583 return true; 3584 3585 if (isSI()) { 3586 // No flat_scr 3587 switch (RegNo) { 3588 case AMDGPU::FLAT_SCR: 3589 case AMDGPU::FLAT_SCR_LO: 3590 case AMDGPU::FLAT_SCR_HI: 3591 return false; 3592 default: 3593 return true; 3594 } 3595 } 3596 3597 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3598 // SI/CI have. 3599 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3600 R.isValid(); ++R) { 3601 if (*R == RegNo) 3602 return false; 3603 } 3604 3605 return true; 3606 } 3607 3608 OperandMatchResultTy 3609 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3610 // Try to parse with a custom parser 3611 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3612 3613 // If we successfully parsed the operand or if there as an error parsing, 3614 // we are done. 3615 // 3616 // If we are parsing after we reach EndOfStatement then this means we 3617 // are appending default values to the Operands list. This is only done 3618 // by custom parser, so we shouldn't continue on to the generic parsing. 3619 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3620 getLexer().is(AsmToken::EndOfStatement)) 3621 return ResTy; 3622 3623 ResTy = parseRegOrImm(Operands); 3624 3625 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) 3626 return ResTy; 3627 3628 const auto &Tok = Parser.getTok(); 3629 SMLoc S = Tok.getLoc(); 3630 3631 const MCExpr *Expr = nullptr; 3632 if (!Parser.parseExpression(Expr)) { 3633 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3634 return MatchOperand_Success; 3635 } 3636 3637 // Possibly this is an instruction flag like 'gds'. 3638 if (Tok.getKind() == AsmToken::Identifier) { 3639 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3640 Parser.Lex(); 3641 return MatchOperand_Success; 3642 } 3643 3644 return MatchOperand_NoMatch; 3645 } 3646 3647 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3648 // Clear any forced encodings from the previous instruction. 3649 setForcedEncodingSize(0); 3650 setForcedDPP(false); 3651 setForcedSDWA(false); 3652 3653 if (Name.endswith("_e64")) { 3654 setForcedEncodingSize(64); 3655 return Name.substr(0, Name.size() - 4); 3656 } else if (Name.endswith("_e32")) { 3657 setForcedEncodingSize(32); 3658 return Name.substr(0, Name.size() - 4); 3659 } else if (Name.endswith("_dpp")) { 3660 setForcedDPP(true); 3661 return Name.substr(0, Name.size() - 4); 3662 } else if (Name.endswith("_sdwa")) { 3663 setForcedSDWA(true); 3664 return Name.substr(0, Name.size() - 5); 3665 } 3666 return Name; 3667 } 3668 3669 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3670 StringRef Name, 3671 SMLoc NameLoc, OperandVector &Operands) { 3672 // Add the instruction mnemonic 3673 Name = parseMnemonicSuffix(Name); 3674 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3675 3676 while (!getLexer().is(AsmToken::EndOfStatement)) { 3677 OperandMatchResultTy Res = parseOperand(Operands, Name); 3678 3679 // Eat the comma or space if there is one. 3680 if (getLexer().is(AsmToken::Comma)) 3681 Parser.Lex(); 3682 3683 switch (Res) { 3684 case MatchOperand_Success: break; 3685 case MatchOperand_ParseFail: 3686 Error(getLexer().getLoc(), "failed parsing operand."); 3687 while (!getLexer().is(AsmToken::EndOfStatement)) { 3688 Parser.Lex(); 3689 } 3690 return true; 3691 case MatchOperand_NoMatch: 3692 Error(getLexer().getLoc(), "not a valid operand."); 3693 while (!getLexer().is(AsmToken::EndOfStatement)) { 3694 Parser.Lex(); 3695 } 3696 return true; 3697 } 3698 } 3699 3700 return false; 3701 } 3702 3703 //===----------------------------------------------------------------------===// 3704 // Utility functions 3705 //===----------------------------------------------------------------------===// 3706 3707 OperandMatchResultTy 3708 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3709 switch(getLexer().getKind()) { 3710 default: return MatchOperand_NoMatch; 3711 case AsmToken::Identifier: { 3712 StringRef Name = Parser.getTok().getString(); 3713 if (!Name.equals(Prefix)) { 3714 return MatchOperand_NoMatch; 3715 } 3716 3717 Parser.Lex(); 3718 if (getLexer().isNot(AsmToken::Colon)) 3719 return MatchOperand_ParseFail; 3720 3721 Parser.Lex(); 3722 3723 bool IsMinus = false; 3724 if (getLexer().getKind() == AsmToken::Minus) { 3725 Parser.Lex(); 3726 IsMinus = true; 3727 } 3728 3729 if (getLexer().isNot(AsmToken::Integer)) 3730 return MatchOperand_ParseFail; 3731 3732 if (getParser().parseAbsoluteExpression(Int)) 3733 return MatchOperand_ParseFail; 3734 3735 if (IsMinus) 3736 Int = -Int; 3737 break; 3738 } 3739 } 3740 return MatchOperand_Success; 3741 } 3742 3743 OperandMatchResultTy 3744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3745 AMDGPUOperand::ImmTy ImmTy, 3746 bool (*ConvertResult)(int64_t&)) { 3747 SMLoc S = Parser.getTok().getLoc(); 3748 int64_t Value = 0; 3749 3750 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3751 if (Res != MatchOperand_Success) 3752 return Res; 3753 3754 if (ConvertResult && !ConvertResult(Value)) { 3755 return MatchOperand_ParseFail; 3756 } 3757 3758 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3759 return MatchOperand_Success; 3760 } 3761 3762 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3763 const char *Prefix, 3764 OperandVector &Operands, 3765 AMDGPUOperand::ImmTy ImmTy, 3766 bool (*ConvertResult)(int64_t&)) { 3767 StringRef Name = Parser.getTok().getString(); 3768 if (!Name.equals(Prefix)) 3769 return MatchOperand_NoMatch; 3770 3771 Parser.Lex(); 3772 if (getLexer().isNot(AsmToken::Colon)) 3773 return MatchOperand_ParseFail; 3774 3775 Parser.Lex(); 3776 if (getLexer().isNot(AsmToken::LBrac)) 3777 return MatchOperand_ParseFail; 3778 Parser.Lex(); 3779 3780 unsigned Val = 0; 3781 SMLoc S = Parser.getTok().getLoc(); 3782 3783 // FIXME: How to verify the number of elements matches the number of src 3784 // operands? 3785 for (int I = 0; I < 4; ++I) { 3786 if (I != 0) { 3787 if (getLexer().is(AsmToken::RBrac)) 3788 break; 3789 3790 if (getLexer().isNot(AsmToken::Comma)) 3791 return MatchOperand_ParseFail; 3792 Parser.Lex(); 3793 } 3794 3795 if (getLexer().isNot(AsmToken::Integer)) 3796 return MatchOperand_ParseFail; 3797 3798 int64_t Op; 3799 if (getParser().parseAbsoluteExpression(Op)) 3800 return MatchOperand_ParseFail; 3801 3802 if (Op != 0 && Op != 1) 3803 return MatchOperand_ParseFail; 3804 Val |= (Op << I); 3805 } 3806 3807 Parser.Lex(); 3808 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3809 return MatchOperand_Success; 3810 } 3811 3812 OperandMatchResultTy 3813 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3814 AMDGPUOperand::ImmTy ImmTy) { 3815 int64_t Bit = 0; 3816 SMLoc S = Parser.getTok().getLoc(); 3817 3818 // We are at the end of the statement, and this is a default argument, so 3819 // use a default value. 3820 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3821 switch(getLexer().getKind()) { 3822 case AsmToken::Identifier: { 3823 StringRef Tok = Parser.getTok().getString(); 3824 if (Tok == Name) { 3825 if (Tok == "r128" && isGFX9()) 3826 Error(S, "r128 modifier is not supported on this GPU"); 3827 if (Tok == "a16" && !isGFX9()) 3828 Error(S, "a16 modifier is not supported on this GPU"); 3829 Bit = 1; 3830 Parser.Lex(); 3831 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3832 Bit = 0; 3833 Parser.Lex(); 3834 } else { 3835 return MatchOperand_NoMatch; 3836 } 3837 break; 3838 } 3839 default: 3840 return MatchOperand_NoMatch; 3841 } 3842 } 3843 3844 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3845 return MatchOperand_Success; 3846 } 3847 3848 static void addOptionalImmOperand( 3849 MCInst& Inst, const OperandVector& Operands, 3850 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3851 AMDGPUOperand::ImmTy ImmT, 3852 int64_t Default = 0) { 3853 auto i = OptionalIdx.find(ImmT); 3854 if (i != OptionalIdx.end()) { 3855 unsigned Idx = i->second; 3856 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3857 } else { 3858 Inst.addOperand(MCOperand::createImm(Default)); 3859 } 3860 } 3861 3862 OperandMatchResultTy 3863 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3864 if (getLexer().isNot(AsmToken::Identifier)) { 3865 return MatchOperand_NoMatch; 3866 } 3867 StringRef Tok = Parser.getTok().getString(); 3868 if (Tok != Prefix) { 3869 return MatchOperand_NoMatch; 3870 } 3871 3872 Parser.Lex(); 3873 if (getLexer().isNot(AsmToken::Colon)) { 3874 return MatchOperand_ParseFail; 3875 } 3876 3877 Parser.Lex(); 3878 if (getLexer().isNot(AsmToken::Identifier)) { 3879 return MatchOperand_ParseFail; 3880 } 3881 3882 Value = Parser.getTok().getString(); 3883 return MatchOperand_Success; 3884 } 3885 3886 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3887 // values to live in a joint format operand in the MCInst encoding. 3888 OperandMatchResultTy 3889 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3890 SMLoc S = Parser.getTok().getLoc(); 3891 int64_t Dfmt = 0, Nfmt = 0; 3892 // dfmt and nfmt can appear in either order, and each is optional. 3893 bool GotDfmt = false, GotNfmt = false; 3894 while (!GotDfmt || !GotNfmt) { 3895 if (!GotDfmt) { 3896 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3897 if (Res != MatchOperand_NoMatch) { 3898 if (Res != MatchOperand_Success) 3899 return Res; 3900 if (Dfmt >= 16) { 3901 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3902 return MatchOperand_ParseFail; 3903 } 3904 GotDfmt = true; 3905 Parser.Lex(); 3906 continue; 3907 } 3908 } 3909 if (!GotNfmt) { 3910 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3911 if (Res != MatchOperand_NoMatch) { 3912 if (Res != MatchOperand_Success) 3913 return Res; 3914 if (Nfmt >= 8) { 3915 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3916 return MatchOperand_ParseFail; 3917 } 3918 GotNfmt = true; 3919 Parser.Lex(); 3920 continue; 3921 } 3922 } 3923 break; 3924 } 3925 if (!GotDfmt && !GotNfmt) 3926 return MatchOperand_NoMatch; 3927 auto Format = Dfmt | Nfmt << 4; 3928 Operands.push_back( 3929 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3930 return MatchOperand_Success; 3931 } 3932 3933 //===----------------------------------------------------------------------===// 3934 // ds 3935 //===----------------------------------------------------------------------===// 3936 3937 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3938 const OperandVector &Operands) { 3939 OptionalImmIndexMap OptionalIdx; 3940 3941 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3942 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3943 3944 // Add the register arguments 3945 if (Op.isReg()) { 3946 Op.addRegOperands(Inst, 1); 3947 continue; 3948 } 3949 3950 // Handle optional arguments 3951 OptionalIdx[Op.getImmTy()] = i; 3952 } 3953 3954 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3957 3958 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3959 } 3960 3961 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3962 bool IsGdsHardcoded) { 3963 OptionalImmIndexMap OptionalIdx; 3964 3965 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3966 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3967 3968 // Add the register arguments 3969 if (Op.isReg()) { 3970 Op.addRegOperands(Inst, 1); 3971 continue; 3972 } 3973 3974 if (Op.isToken() && Op.getToken() == "gds") { 3975 IsGdsHardcoded = true; 3976 continue; 3977 } 3978 3979 // Handle optional arguments 3980 OptionalIdx[Op.getImmTy()] = i; 3981 } 3982 3983 AMDGPUOperand::ImmTy OffsetType = 3984 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3985 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3986 AMDGPUOperand::ImmTyOffset; 3987 3988 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3989 3990 if (!IsGdsHardcoded) { 3991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3992 } 3993 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3994 } 3995 3996 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3997 OptionalImmIndexMap OptionalIdx; 3998 3999 unsigned OperandIdx[4]; 4000 unsigned EnMask = 0; 4001 int SrcIdx = 0; 4002 4003 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4004 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4005 4006 // Add the register arguments 4007 if (Op.isReg()) { 4008 assert(SrcIdx < 4); 4009 OperandIdx[SrcIdx] = Inst.size(); 4010 Op.addRegOperands(Inst, 1); 4011 ++SrcIdx; 4012 continue; 4013 } 4014 4015 if (Op.isOff()) { 4016 assert(SrcIdx < 4); 4017 OperandIdx[SrcIdx] = Inst.size(); 4018 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4019 ++SrcIdx; 4020 continue; 4021 } 4022 4023 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4024 Op.addImmOperands(Inst, 1); 4025 continue; 4026 } 4027 4028 if (Op.isToken() && Op.getToken() == "done") 4029 continue; 4030 4031 // Handle optional arguments 4032 OptionalIdx[Op.getImmTy()] = i; 4033 } 4034 4035 assert(SrcIdx == 4); 4036 4037 bool Compr = false; 4038 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4039 Compr = true; 4040 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4041 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4042 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4043 } 4044 4045 for (auto i = 0; i < SrcIdx; ++i) { 4046 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4047 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4048 } 4049 } 4050 4051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4053 4054 Inst.addOperand(MCOperand::createImm(EnMask)); 4055 } 4056 4057 //===----------------------------------------------------------------------===// 4058 // s_waitcnt 4059 //===----------------------------------------------------------------------===// 4060 4061 static bool 4062 encodeCnt( 4063 const AMDGPU::IsaVersion ISA, 4064 int64_t &IntVal, 4065 int64_t CntVal, 4066 bool Saturate, 4067 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4068 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4069 { 4070 bool Failed = false; 4071 4072 IntVal = encode(ISA, IntVal, CntVal); 4073 if (CntVal != decode(ISA, IntVal)) { 4074 if (Saturate) { 4075 IntVal = encode(ISA, IntVal, -1); 4076 } else { 4077 Failed = true; 4078 } 4079 } 4080 return Failed; 4081 } 4082 4083 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4084 StringRef CntName = Parser.getTok().getString(); 4085 int64_t CntVal; 4086 4087 Parser.Lex(); 4088 if (getLexer().isNot(AsmToken::LParen)) 4089 return true; 4090 4091 Parser.Lex(); 4092 if (getLexer().isNot(AsmToken::Integer)) 4093 return true; 4094 4095 SMLoc ValLoc = Parser.getTok().getLoc(); 4096 if (getParser().parseAbsoluteExpression(CntVal)) 4097 return true; 4098 4099 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4100 4101 bool Failed = true; 4102 bool Sat = CntName.endswith("_sat"); 4103 4104 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4105 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4106 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4107 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4108 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4109 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4110 } 4111 4112 if (Failed) { 4113 Error(ValLoc, "too large value for " + CntName); 4114 return true; 4115 } 4116 4117 if (getLexer().isNot(AsmToken::RParen)) { 4118 return true; 4119 } 4120 4121 Parser.Lex(); 4122 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4123 const AsmToken NextToken = getLexer().peekTok(); 4124 if (NextToken.is(AsmToken::Identifier)) { 4125 Parser.Lex(); 4126 } 4127 } 4128 4129 return false; 4130 } 4131 4132 OperandMatchResultTy 4133 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4135 int64_t Waitcnt = getWaitcntBitMask(ISA); 4136 SMLoc S = Parser.getTok().getLoc(); 4137 4138 switch(getLexer().getKind()) { 4139 default: return MatchOperand_ParseFail; 4140 case AsmToken::Integer: 4141 // The operand can be an integer value. 4142 if (getParser().parseAbsoluteExpression(Waitcnt)) 4143 return MatchOperand_ParseFail; 4144 break; 4145 4146 case AsmToken::Identifier: 4147 do { 4148 if (parseCnt(Waitcnt)) 4149 return MatchOperand_ParseFail; 4150 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4151 break; 4152 } 4153 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4154 return MatchOperand_Success; 4155 } 4156 4157 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4158 int64_t &Width) { 4159 using namespace llvm::AMDGPU::Hwreg; 4160 4161 if (Parser.getTok().getString() != "hwreg") 4162 return true; 4163 Parser.Lex(); 4164 4165 if (getLexer().isNot(AsmToken::LParen)) 4166 return true; 4167 Parser.Lex(); 4168 4169 if (getLexer().is(AsmToken::Identifier)) { 4170 HwReg.IsSymbolic = true; 4171 HwReg.Id = ID_UNKNOWN_; 4172 const StringRef tok = Parser.getTok().getString(); 4173 int Last = ID_SYMBOLIC_LAST_; 4174 if (isSI() || isCI() || isVI()) 4175 Last = ID_SYMBOLIC_FIRST_GFX9_; 4176 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4177 if (tok == IdSymbolic[i]) { 4178 HwReg.Id = i; 4179 break; 4180 } 4181 } 4182 Parser.Lex(); 4183 } else { 4184 HwReg.IsSymbolic = false; 4185 if (getLexer().isNot(AsmToken::Integer)) 4186 return true; 4187 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4188 return true; 4189 } 4190 4191 if (getLexer().is(AsmToken::RParen)) { 4192 Parser.Lex(); 4193 return false; 4194 } 4195 4196 // optional params 4197 if (getLexer().isNot(AsmToken::Comma)) 4198 return true; 4199 Parser.Lex(); 4200 4201 if (getLexer().isNot(AsmToken::Integer)) 4202 return true; 4203 if (getParser().parseAbsoluteExpression(Offset)) 4204 return true; 4205 4206 if (getLexer().isNot(AsmToken::Comma)) 4207 return true; 4208 Parser.Lex(); 4209 4210 if (getLexer().isNot(AsmToken::Integer)) 4211 return true; 4212 if (getParser().parseAbsoluteExpression(Width)) 4213 return true; 4214 4215 if (getLexer().isNot(AsmToken::RParen)) 4216 return true; 4217 Parser.Lex(); 4218 4219 return false; 4220 } 4221 4222 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4223 using namespace llvm::AMDGPU::Hwreg; 4224 4225 int64_t Imm16Val = 0; 4226 SMLoc S = Parser.getTok().getLoc(); 4227 4228 switch(getLexer().getKind()) { 4229 default: return MatchOperand_NoMatch; 4230 case AsmToken::Integer: 4231 // The operand can be an integer value. 4232 if (getParser().parseAbsoluteExpression(Imm16Val)) 4233 return MatchOperand_NoMatch; 4234 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4235 Error(S, "invalid immediate: only 16-bit values are legal"); 4236 // Do not return error code, but create an imm operand anyway and proceed 4237 // to the next operand, if any. That avoids unneccessary error messages. 4238 } 4239 break; 4240 4241 case AsmToken::Identifier: { 4242 OperandInfoTy HwReg(ID_UNKNOWN_); 4243 int64_t Offset = OFFSET_DEFAULT_; 4244 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4245 if (parseHwregConstruct(HwReg, Offset, Width)) 4246 return MatchOperand_ParseFail; 4247 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4248 if (HwReg.IsSymbolic) 4249 Error(S, "invalid symbolic name of hardware register"); 4250 else 4251 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4252 } 4253 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4254 Error(S, "invalid bit offset: only 5-bit values are legal"); 4255 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4256 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4257 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4258 } 4259 break; 4260 } 4261 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4262 return MatchOperand_Success; 4263 } 4264 4265 bool AMDGPUOperand::isSWaitCnt() const { 4266 return isImm(); 4267 } 4268 4269 bool AMDGPUOperand::isHwreg() const { 4270 return isImmTy(ImmTyHwreg); 4271 } 4272 4273 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4274 using namespace llvm::AMDGPU::SendMsg; 4275 4276 if (Parser.getTok().getString() != "sendmsg") 4277 return true; 4278 Parser.Lex(); 4279 4280 if (getLexer().isNot(AsmToken::LParen)) 4281 return true; 4282 Parser.Lex(); 4283 4284 if (getLexer().is(AsmToken::Identifier)) { 4285 Msg.IsSymbolic = true; 4286 Msg.Id = ID_UNKNOWN_; 4287 const std::string tok = Parser.getTok().getString(); 4288 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4289 switch(i) { 4290 default: continue; // Omit gaps. 4291 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4292 } 4293 if (tok == IdSymbolic[i]) { 4294 Msg.Id = i; 4295 break; 4296 } 4297 } 4298 Parser.Lex(); 4299 } else { 4300 Msg.IsSymbolic = false; 4301 if (getLexer().isNot(AsmToken::Integer)) 4302 return true; 4303 if (getParser().parseAbsoluteExpression(Msg.Id)) 4304 return true; 4305 if (getLexer().is(AsmToken::Integer)) 4306 if (getParser().parseAbsoluteExpression(Msg.Id)) 4307 Msg.Id = ID_UNKNOWN_; 4308 } 4309 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4310 return false; 4311 4312 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4313 if (getLexer().isNot(AsmToken::RParen)) 4314 return true; 4315 Parser.Lex(); 4316 return false; 4317 } 4318 4319 if (getLexer().isNot(AsmToken::Comma)) 4320 return true; 4321 Parser.Lex(); 4322 4323 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4324 Operation.Id = ID_UNKNOWN_; 4325 if (getLexer().is(AsmToken::Identifier)) { 4326 Operation.IsSymbolic = true; 4327 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4328 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4329 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4330 const StringRef Tok = Parser.getTok().getString(); 4331 for (int i = F; i < L; ++i) { 4332 if (Tok == S[i]) { 4333 Operation.Id = i; 4334 break; 4335 } 4336 } 4337 Parser.Lex(); 4338 } else { 4339 Operation.IsSymbolic = false; 4340 if (getLexer().isNot(AsmToken::Integer)) 4341 return true; 4342 if (getParser().parseAbsoluteExpression(Operation.Id)) 4343 return true; 4344 } 4345 4346 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4347 // Stream id is optional. 4348 if (getLexer().is(AsmToken::RParen)) { 4349 Parser.Lex(); 4350 return false; 4351 } 4352 4353 if (getLexer().isNot(AsmToken::Comma)) 4354 return true; 4355 Parser.Lex(); 4356 4357 if (getLexer().isNot(AsmToken::Integer)) 4358 return true; 4359 if (getParser().parseAbsoluteExpression(StreamId)) 4360 return true; 4361 } 4362 4363 if (getLexer().isNot(AsmToken::RParen)) 4364 return true; 4365 Parser.Lex(); 4366 return false; 4367 } 4368 4369 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4370 if (getLexer().getKind() != AsmToken::Identifier) 4371 return MatchOperand_NoMatch; 4372 4373 StringRef Str = Parser.getTok().getString(); 4374 int Slot = StringSwitch<int>(Str) 4375 .Case("p10", 0) 4376 .Case("p20", 1) 4377 .Case("p0", 2) 4378 .Default(-1); 4379 4380 SMLoc S = Parser.getTok().getLoc(); 4381 if (Slot == -1) 4382 return MatchOperand_ParseFail; 4383 4384 Parser.Lex(); 4385 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4386 AMDGPUOperand::ImmTyInterpSlot)); 4387 return MatchOperand_Success; 4388 } 4389 4390 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4391 if (getLexer().getKind() != AsmToken::Identifier) 4392 return MatchOperand_NoMatch; 4393 4394 StringRef Str = Parser.getTok().getString(); 4395 if (!Str.startswith("attr")) 4396 return MatchOperand_NoMatch; 4397 4398 StringRef Chan = Str.take_back(2); 4399 int AttrChan = StringSwitch<int>(Chan) 4400 .Case(".x", 0) 4401 .Case(".y", 1) 4402 .Case(".z", 2) 4403 .Case(".w", 3) 4404 .Default(-1); 4405 if (AttrChan == -1) 4406 return MatchOperand_ParseFail; 4407 4408 Str = Str.drop_back(2).drop_front(4); 4409 4410 uint8_t Attr; 4411 if (Str.getAsInteger(10, Attr)) 4412 return MatchOperand_ParseFail; 4413 4414 SMLoc S = Parser.getTok().getLoc(); 4415 Parser.Lex(); 4416 if (Attr > 63) { 4417 Error(S, "out of bounds attr"); 4418 return MatchOperand_Success; 4419 } 4420 4421 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4422 4423 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4424 AMDGPUOperand::ImmTyInterpAttr)); 4425 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4426 AMDGPUOperand::ImmTyAttrChan)); 4427 return MatchOperand_Success; 4428 } 4429 4430 void AMDGPUAsmParser::errorExpTgt() { 4431 Error(Parser.getTok().getLoc(), "invalid exp target"); 4432 } 4433 4434 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4435 uint8_t &Val) { 4436 if (Str == "null") { 4437 Val = 9; 4438 return MatchOperand_Success; 4439 } 4440 4441 if (Str.startswith("mrt")) { 4442 Str = Str.drop_front(3); 4443 if (Str == "z") { // == mrtz 4444 Val = 8; 4445 return MatchOperand_Success; 4446 } 4447 4448 if (Str.getAsInteger(10, Val)) 4449 return MatchOperand_ParseFail; 4450 4451 if (Val > 7) 4452 errorExpTgt(); 4453 4454 return MatchOperand_Success; 4455 } 4456 4457 if (Str.startswith("pos")) { 4458 Str = Str.drop_front(3); 4459 if (Str.getAsInteger(10, Val)) 4460 return MatchOperand_ParseFail; 4461 4462 if (Val > 3) 4463 errorExpTgt(); 4464 4465 Val += 12; 4466 return MatchOperand_Success; 4467 } 4468 4469 if (Str.startswith("param")) { 4470 Str = Str.drop_front(5); 4471 if (Str.getAsInteger(10, Val)) 4472 return MatchOperand_ParseFail; 4473 4474 if (Val >= 32) 4475 errorExpTgt(); 4476 4477 Val += 32; 4478 return MatchOperand_Success; 4479 } 4480 4481 if (Str.startswith("invalid_target_")) { 4482 Str = Str.drop_front(15); 4483 if (Str.getAsInteger(10, Val)) 4484 return MatchOperand_ParseFail; 4485 4486 errorExpTgt(); 4487 return MatchOperand_Success; 4488 } 4489 4490 return MatchOperand_NoMatch; 4491 } 4492 4493 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4494 uint8_t Val; 4495 StringRef Str = Parser.getTok().getString(); 4496 4497 auto Res = parseExpTgtImpl(Str, Val); 4498 if (Res != MatchOperand_Success) 4499 return Res; 4500 4501 SMLoc S = Parser.getTok().getLoc(); 4502 Parser.Lex(); 4503 4504 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4505 AMDGPUOperand::ImmTyExpTgt)); 4506 return MatchOperand_Success; 4507 } 4508 4509 OperandMatchResultTy 4510 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4511 using namespace llvm::AMDGPU::SendMsg; 4512 4513 int64_t Imm16Val = 0; 4514 SMLoc S = Parser.getTok().getLoc(); 4515 4516 switch(getLexer().getKind()) { 4517 default: 4518 return MatchOperand_NoMatch; 4519 case AsmToken::Integer: 4520 // The operand can be an integer value. 4521 if (getParser().parseAbsoluteExpression(Imm16Val)) 4522 return MatchOperand_NoMatch; 4523 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4524 Error(S, "invalid immediate: only 16-bit values are legal"); 4525 // Do not return error code, but create an imm operand anyway and proceed 4526 // to the next operand, if any. That avoids unneccessary error messages. 4527 } 4528 break; 4529 case AsmToken::Identifier: { 4530 OperandInfoTy Msg(ID_UNKNOWN_); 4531 OperandInfoTy Operation(OP_UNKNOWN_); 4532 int64_t StreamId = STREAM_ID_DEFAULT_; 4533 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4534 return MatchOperand_ParseFail; 4535 do { 4536 // Validate and encode message ID. 4537 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4538 || Msg.Id == ID_SYSMSG)) { 4539 if (Msg.IsSymbolic) 4540 Error(S, "invalid/unsupported symbolic name of message"); 4541 else 4542 Error(S, "invalid/unsupported code of message"); 4543 break; 4544 } 4545 Imm16Val = (Msg.Id << ID_SHIFT_); 4546 // Validate and encode operation ID. 4547 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4548 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4549 if (Operation.IsSymbolic) 4550 Error(S, "invalid symbolic name of GS_OP"); 4551 else 4552 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4553 break; 4554 } 4555 if (Operation.Id == OP_GS_NOP 4556 && Msg.Id != ID_GS_DONE) { 4557 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4558 break; 4559 } 4560 Imm16Val |= (Operation.Id << OP_SHIFT_); 4561 } 4562 if (Msg.Id == ID_SYSMSG) { 4563 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4564 if (Operation.IsSymbolic) 4565 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4566 else 4567 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4568 break; 4569 } 4570 Imm16Val |= (Operation.Id << OP_SHIFT_); 4571 } 4572 // Validate and encode stream ID. 4573 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4574 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4575 Error(S, "invalid stream id: only 2-bit values are legal"); 4576 break; 4577 } 4578 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4579 } 4580 } while (false); 4581 } 4582 break; 4583 } 4584 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4585 return MatchOperand_Success; 4586 } 4587 4588 bool AMDGPUOperand::isSendMsg() const { 4589 return isImmTy(ImmTySendMsg); 4590 } 4591 4592 //===----------------------------------------------------------------------===// 4593 // parser helpers 4594 //===----------------------------------------------------------------------===// 4595 4596 bool 4597 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4598 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4599 } 4600 4601 bool 4602 AMDGPUAsmParser::isId(const StringRef Id) const { 4603 return isId(getToken(), Id); 4604 } 4605 4606 bool 4607 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4608 return getTokenKind() == Kind; 4609 } 4610 4611 bool 4612 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4613 if (isId(Id)) { 4614 lex(); 4615 return true; 4616 } 4617 return false; 4618 } 4619 4620 bool 4621 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4622 if (isToken(Kind)) { 4623 lex(); 4624 return true; 4625 } 4626 return false; 4627 } 4628 4629 bool 4630 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4631 const StringRef ErrMsg) { 4632 if (!trySkipToken(Kind)) { 4633 Error(getLoc(), ErrMsg); 4634 return false; 4635 } 4636 return true; 4637 } 4638 4639 bool 4640 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4641 return !getParser().parseAbsoluteExpression(Imm); 4642 } 4643 4644 bool 4645 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4646 if (isToken(AsmToken::String)) { 4647 Val = getToken().getStringContents(); 4648 lex(); 4649 return true; 4650 } else { 4651 Error(getLoc(), ErrMsg); 4652 return false; 4653 } 4654 } 4655 4656 AsmToken 4657 AMDGPUAsmParser::getToken() const { 4658 return Parser.getTok(); 4659 } 4660 4661 AsmToken 4662 AMDGPUAsmParser::peekToken() { 4663 return getLexer().peekTok(); 4664 } 4665 4666 void 4667 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 4668 auto TokCount = getLexer().peekTokens(Tokens); 4669 4670 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 4671 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 4672 } 4673 4674 AsmToken::TokenKind 4675 AMDGPUAsmParser::getTokenKind() const { 4676 return getLexer().getKind(); 4677 } 4678 4679 SMLoc 4680 AMDGPUAsmParser::getLoc() const { 4681 return getToken().getLoc(); 4682 } 4683 4684 StringRef 4685 AMDGPUAsmParser::getTokenStr() const { 4686 return getToken().getString(); 4687 } 4688 4689 void 4690 AMDGPUAsmParser::lex() { 4691 Parser.Lex(); 4692 } 4693 4694 //===----------------------------------------------------------------------===// 4695 // swizzle 4696 //===----------------------------------------------------------------------===// 4697 4698 LLVM_READNONE 4699 static unsigned 4700 encodeBitmaskPerm(const unsigned AndMask, 4701 const unsigned OrMask, 4702 const unsigned XorMask) { 4703 using namespace llvm::AMDGPU::Swizzle; 4704 4705 return BITMASK_PERM_ENC | 4706 (AndMask << BITMASK_AND_SHIFT) | 4707 (OrMask << BITMASK_OR_SHIFT) | 4708 (XorMask << BITMASK_XOR_SHIFT); 4709 } 4710 4711 bool 4712 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4713 const unsigned MinVal, 4714 const unsigned MaxVal, 4715 const StringRef ErrMsg) { 4716 for (unsigned i = 0; i < OpNum; ++i) { 4717 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4718 return false; 4719 } 4720 SMLoc ExprLoc = Parser.getTok().getLoc(); 4721 if (!parseExpr(Op[i])) { 4722 return false; 4723 } 4724 if (Op[i] < MinVal || Op[i] > MaxVal) { 4725 Error(ExprLoc, ErrMsg); 4726 return false; 4727 } 4728 } 4729 4730 return true; 4731 } 4732 4733 bool 4734 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4735 using namespace llvm::AMDGPU::Swizzle; 4736 4737 int64_t Lane[LANE_NUM]; 4738 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4739 "expected a 2-bit lane id")) { 4740 Imm = QUAD_PERM_ENC; 4741 for (unsigned I = 0; I < LANE_NUM; ++I) { 4742 Imm |= Lane[I] << (LANE_SHIFT * I); 4743 } 4744 return true; 4745 } 4746 return false; 4747 } 4748 4749 bool 4750 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4751 using namespace llvm::AMDGPU::Swizzle; 4752 4753 SMLoc S = Parser.getTok().getLoc(); 4754 int64_t GroupSize; 4755 int64_t LaneIdx; 4756 4757 if (!parseSwizzleOperands(1, &GroupSize, 4758 2, 32, 4759 "group size must be in the interval [2,32]")) { 4760 return false; 4761 } 4762 if (!isPowerOf2_64(GroupSize)) { 4763 Error(S, "group size must be a power of two"); 4764 return false; 4765 } 4766 if (parseSwizzleOperands(1, &LaneIdx, 4767 0, GroupSize - 1, 4768 "lane id must be in the interval [0,group size - 1]")) { 4769 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4770 return true; 4771 } 4772 return false; 4773 } 4774 4775 bool 4776 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4777 using namespace llvm::AMDGPU::Swizzle; 4778 4779 SMLoc S = Parser.getTok().getLoc(); 4780 int64_t GroupSize; 4781 4782 if (!parseSwizzleOperands(1, &GroupSize, 4783 2, 32, "group size must be in the interval [2,32]")) { 4784 return false; 4785 } 4786 if (!isPowerOf2_64(GroupSize)) { 4787 Error(S, "group size must be a power of two"); 4788 return false; 4789 } 4790 4791 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4792 return true; 4793 } 4794 4795 bool 4796 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4797 using namespace llvm::AMDGPU::Swizzle; 4798 4799 SMLoc S = Parser.getTok().getLoc(); 4800 int64_t GroupSize; 4801 4802 if (!parseSwizzleOperands(1, &GroupSize, 4803 1, 16, "group size must be in the interval [1,16]")) { 4804 return false; 4805 } 4806 if (!isPowerOf2_64(GroupSize)) { 4807 Error(S, "group size must be a power of two"); 4808 return false; 4809 } 4810 4811 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4812 return true; 4813 } 4814 4815 bool 4816 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4817 using namespace llvm::AMDGPU::Swizzle; 4818 4819 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4820 return false; 4821 } 4822 4823 StringRef Ctl; 4824 SMLoc StrLoc = Parser.getTok().getLoc(); 4825 if (!parseString(Ctl)) { 4826 return false; 4827 } 4828 if (Ctl.size() != BITMASK_WIDTH) { 4829 Error(StrLoc, "expected a 5-character mask"); 4830 return false; 4831 } 4832 4833 unsigned AndMask = 0; 4834 unsigned OrMask = 0; 4835 unsigned XorMask = 0; 4836 4837 for (size_t i = 0; i < Ctl.size(); ++i) { 4838 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4839 switch(Ctl[i]) { 4840 default: 4841 Error(StrLoc, "invalid mask"); 4842 return false; 4843 case '0': 4844 break; 4845 case '1': 4846 OrMask |= Mask; 4847 break; 4848 case 'p': 4849 AndMask |= Mask; 4850 break; 4851 case 'i': 4852 AndMask |= Mask; 4853 XorMask |= Mask; 4854 break; 4855 } 4856 } 4857 4858 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4859 return true; 4860 } 4861 4862 bool 4863 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4864 4865 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4866 4867 if (!parseExpr(Imm)) { 4868 return false; 4869 } 4870 if (!isUInt<16>(Imm)) { 4871 Error(OffsetLoc, "expected a 16-bit offset"); 4872 return false; 4873 } 4874 return true; 4875 } 4876 4877 bool 4878 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4879 using namespace llvm::AMDGPU::Swizzle; 4880 4881 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4882 4883 SMLoc ModeLoc = Parser.getTok().getLoc(); 4884 bool Ok = false; 4885 4886 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4887 Ok = parseSwizzleQuadPerm(Imm); 4888 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4889 Ok = parseSwizzleBitmaskPerm(Imm); 4890 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4891 Ok = parseSwizzleBroadcast(Imm); 4892 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4893 Ok = parseSwizzleSwap(Imm); 4894 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4895 Ok = parseSwizzleReverse(Imm); 4896 } else { 4897 Error(ModeLoc, "expected a swizzle mode"); 4898 } 4899 4900 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4901 } 4902 4903 return false; 4904 } 4905 4906 OperandMatchResultTy 4907 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4908 SMLoc S = Parser.getTok().getLoc(); 4909 int64_t Imm = 0; 4910 4911 if (trySkipId("offset")) { 4912 4913 bool Ok = false; 4914 if (skipToken(AsmToken::Colon, "expected a colon")) { 4915 if (trySkipId("swizzle")) { 4916 Ok = parseSwizzleMacro(Imm); 4917 } else { 4918 Ok = parseSwizzleOffset(Imm); 4919 } 4920 } 4921 4922 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4923 4924 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4925 } else { 4926 // Swizzle "offset" operand is optional. 4927 // If it is omitted, try parsing other optional operands. 4928 return parseOptionalOpr(Operands); 4929 } 4930 } 4931 4932 bool 4933 AMDGPUOperand::isSwizzle() const { 4934 return isImmTy(ImmTySwizzle); 4935 } 4936 4937 //===----------------------------------------------------------------------===// 4938 // VGPR Index Mode 4939 //===----------------------------------------------------------------------===// 4940 4941 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4942 4943 using namespace llvm::AMDGPU::VGPRIndexMode; 4944 4945 if (trySkipToken(AsmToken::RParen)) { 4946 return OFF; 4947 } 4948 4949 int64_t Imm = 0; 4950 4951 while (true) { 4952 unsigned Mode = 0; 4953 SMLoc S = Parser.getTok().getLoc(); 4954 4955 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4956 if (trySkipId(IdSymbolic[ModeId])) { 4957 Mode = 1 << ModeId; 4958 break; 4959 } 4960 } 4961 4962 if (Mode == 0) { 4963 Error(S, (Imm == 0)? 4964 "expected a VGPR index mode or a closing parenthesis" : 4965 "expected a VGPR index mode"); 4966 break; 4967 } 4968 4969 if (Imm & Mode) { 4970 Error(S, "duplicate VGPR index mode"); 4971 break; 4972 } 4973 Imm |= Mode; 4974 4975 if (trySkipToken(AsmToken::RParen)) 4976 break; 4977 if (!skipToken(AsmToken::Comma, 4978 "expected a comma or a closing parenthesis")) 4979 break; 4980 } 4981 4982 return Imm; 4983 } 4984 4985 OperandMatchResultTy 4986 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4987 4988 int64_t Imm = 0; 4989 SMLoc S = Parser.getTok().getLoc(); 4990 4991 if (getLexer().getKind() == AsmToken::Identifier && 4992 Parser.getTok().getString() == "gpr_idx" && 4993 getLexer().peekTok().is(AsmToken::LParen)) { 4994 4995 Parser.Lex(); 4996 Parser.Lex(); 4997 4998 // If parse failed, trigger an error but do not return error code 4999 // to avoid excessive error messages. 5000 Imm = parseGPRIdxMacro(); 5001 5002 } else { 5003 if (getParser().parseAbsoluteExpression(Imm)) 5004 return MatchOperand_NoMatch; 5005 if (Imm < 0 || !isUInt<4>(Imm)) { 5006 Error(S, "invalid immediate: only 4-bit values are legal"); 5007 } 5008 } 5009 5010 Operands.push_back( 5011 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5012 return MatchOperand_Success; 5013 } 5014 5015 bool AMDGPUOperand::isGPRIdxMode() const { 5016 return isImmTy(ImmTyGprIdxMode); 5017 } 5018 5019 //===----------------------------------------------------------------------===// 5020 // sopp branch targets 5021 //===----------------------------------------------------------------------===// 5022 5023 OperandMatchResultTy 5024 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5025 SMLoc S = Parser.getTok().getLoc(); 5026 5027 switch (getLexer().getKind()) { 5028 default: return MatchOperand_ParseFail; 5029 case AsmToken::Integer: { 5030 int64_t Imm; 5031 if (getParser().parseAbsoluteExpression(Imm)) 5032 return MatchOperand_ParseFail; 5033 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5034 return MatchOperand_Success; 5035 } 5036 5037 case AsmToken::Identifier: 5038 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5039 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5040 Parser.getTok().getString()), getContext()), S)); 5041 Parser.Lex(); 5042 return MatchOperand_Success; 5043 } 5044 } 5045 5046 //===----------------------------------------------------------------------===// 5047 // mubuf 5048 //===----------------------------------------------------------------------===// 5049 5050 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5051 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5052 } 5053 5054 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5055 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5056 } 5057 5058 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5059 const OperandVector &Operands, 5060 bool IsAtomic, 5061 bool IsAtomicReturn, 5062 bool IsLds) { 5063 bool IsLdsOpcode = IsLds; 5064 bool HasLdsModifier = false; 5065 OptionalImmIndexMap OptionalIdx; 5066 assert(IsAtomicReturn ? IsAtomic : true); 5067 unsigned FirstOperandIdx = 1; 5068 5069 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5070 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5071 5072 // Add the register arguments 5073 if (Op.isReg()) { 5074 Op.addRegOperands(Inst, 1); 5075 // Insert a tied src for atomic return dst. 5076 // This cannot be postponed as subsequent calls to 5077 // addImmOperands rely on correct number of MC operands. 5078 if (IsAtomicReturn && i == FirstOperandIdx) 5079 Op.addRegOperands(Inst, 1); 5080 continue; 5081 } 5082 5083 // Handle the case where soffset is an immediate 5084 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5085 Op.addImmOperands(Inst, 1); 5086 continue; 5087 } 5088 5089 HasLdsModifier = Op.isLDS(); 5090 5091 // Handle tokens like 'offen' which are sometimes hard-coded into the 5092 // asm string. There are no MCInst operands for these. 5093 if (Op.isToken()) { 5094 continue; 5095 } 5096 assert(Op.isImm()); 5097 5098 // Handle optional arguments 5099 OptionalIdx[Op.getImmTy()] = i; 5100 } 5101 5102 // This is a workaround for an llvm quirk which may result in an 5103 // incorrect instruction selection. Lds and non-lds versions of 5104 // MUBUF instructions are identical except that lds versions 5105 // have mandatory 'lds' modifier. However this modifier follows 5106 // optional modifiers and llvm asm matcher regards this 'lds' 5107 // modifier as an optional one. As a result, an lds version 5108 // of opcode may be selected even if it has no 'lds' modifier. 5109 if (IsLdsOpcode && !HasLdsModifier) { 5110 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5111 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5112 Inst.setOpcode(NoLdsOpcode); 5113 IsLdsOpcode = false; 5114 } 5115 } 5116 5117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5118 if (!IsAtomic) { // glc is hard-coded. 5119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5120 } 5121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5122 5123 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5125 } 5126 } 5127 5128 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5129 OptionalImmIndexMap OptionalIdx; 5130 5131 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5132 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5133 5134 // Add the register arguments 5135 if (Op.isReg()) { 5136 Op.addRegOperands(Inst, 1); 5137 continue; 5138 } 5139 5140 // Handle the case where soffset is an immediate 5141 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5142 Op.addImmOperands(Inst, 1); 5143 continue; 5144 } 5145 5146 // Handle tokens like 'offen' which are sometimes hard-coded into the 5147 // asm string. There are no MCInst operands for these. 5148 if (Op.isToken()) { 5149 continue; 5150 } 5151 assert(Op.isImm()); 5152 5153 // Handle optional arguments 5154 OptionalIdx[Op.getImmTy()] = i; 5155 } 5156 5157 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5158 AMDGPUOperand::ImmTyOffset); 5159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5162 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5163 } 5164 5165 //===----------------------------------------------------------------------===// 5166 // mimg 5167 //===----------------------------------------------------------------------===// 5168 5169 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5170 bool IsAtomic) { 5171 unsigned I = 1; 5172 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5173 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5174 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5175 } 5176 5177 if (IsAtomic) { 5178 // Add src, same as dst 5179 assert(Desc.getNumDefs() == 1); 5180 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5181 } 5182 5183 OptionalImmIndexMap OptionalIdx; 5184 5185 for (unsigned E = Operands.size(); I != E; ++I) { 5186 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5187 5188 // Add the register arguments 5189 if (Op.isReg()) { 5190 Op.addRegOperands(Inst, 1); 5191 } else if (Op.isImmModifier()) { 5192 OptionalIdx[Op.getImmTy()] = I; 5193 } else { 5194 llvm_unreachable("unexpected operand type"); 5195 } 5196 } 5197 5198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5203 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5206 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5207 } 5208 5209 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5210 cvtMIMG(Inst, Operands, true); 5211 } 5212 5213 //===----------------------------------------------------------------------===// 5214 // smrd 5215 //===----------------------------------------------------------------------===// 5216 5217 bool AMDGPUOperand::isSMRDOffset8() const { 5218 return isImm() && isUInt<8>(getImm()); 5219 } 5220 5221 bool AMDGPUOperand::isSMRDOffset20() const { 5222 return isImm() && isUInt<20>(getImm()); 5223 } 5224 5225 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5226 // 32-bit literals are only supported on CI and we only want to use them 5227 // when the offset is > 8-bits. 5228 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5229 } 5230 5231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5232 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5233 } 5234 5235 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5236 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5237 } 5238 5239 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5240 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5241 } 5242 5243 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5244 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5245 } 5246 5247 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5248 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5249 } 5250 5251 //===----------------------------------------------------------------------===// 5252 // vop3 5253 //===----------------------------------------------------------------------===// 5254 5255 static bool ConvertOmodMul(int64_t &Mul) { 5256 if (Mul != 1 && Mul != 2 && Mul != 4) 5257 return false; 5258 5259 Mul >>= 1; 5260 return true; 5261 } 5262 5263 static bool ConvertOmodDiv(int64_t &Div) { 5264 if (Div == 1) { 5265 Div = 0; 5266 return true; 5267 } 5268 5269 if (Div == 2) { 5270 Div = 3; 5271 return true; 5272 } 5273 5274 return false; 5275 } 5276 5277 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5278 if (BoundCtrl == 0) { 5279 BoundCtrl = 1; 5280 return true; 5281 } 5282 5283 if (BoundCtrl == -1) { 5284 BoundCtrl = 0; 5285 return true; 5286 } 5287 5288 return false; 5289 } 5290 5291 // Note: the order in this table matches the order of operands in AsmString. 5292 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5293 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5294 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5295 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5296 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5297 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5298 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5299 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5300 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5301 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5302 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5303 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5304 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5305 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5306 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5307 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5308 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5309 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5310 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5311 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5312 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5313 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5314 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5315 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5316 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5317 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5318 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5319 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5320 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5321 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5322 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5323 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5324 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5325 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5326 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5327 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5328 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5329 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5330 }; 5331 5332 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5333 unsigned size = Operands.size(); 5334 assert(size > 0); 5335 5336 OperandMatchResultTy res = parseOptionalOpr(Operands); 5337 5338 // This is a hack to enable hardcoded mandatory operands which follow 5339 // optional operands. 5340 // 5341 // Current design assumes that all operands after the first optional operand 5342 // are also optional. However implementation of some instructions violates 5343 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5344 // 5345 // To alleviate this problem, we have to (implicitly) parse extra operands 5346 // to make sure autogenerated parser of custom operands never hit hardcoded 5347 // mandatory operands. 5348 5349 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5350 5351 // We have parsed the first optional operand. 5352 // Parse as many operands as necessary to skip all mandatory operands. 5353 5354 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5355 if (res != MatchOperand_Success || 5356 getLexer().is(AsmToken::EndOfStatement)) break; 5357 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5358 res = parseOptionalOpr(Operands); 5359 } 5360 } 5361 5362 return res; 5363 } 5364 5365 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5366 OperandMatchResultTy res; 5367 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5368 // try to parse any optional operand here 5369 if (Op.IsBit) { 5370 res = parseNamedBit(Op.Name, Operands, Op.Type); 5371 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5372 res = parseOModOperand(Operands); 5373 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5374 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5375 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5376 res = parseSDWASel(Operands, Op.Name, Op.Type); 5377 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5378 res = parseSDWADstUnused(Operands); 5379 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5380 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5381 Op.Type == AMDGPUOperand::ImmTyNegLo || 5382 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5383 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5384 Op.ConvertResult); 5385 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5386 res = parseDfmtNfmt(Operands); 5387 } else { 5388 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5389 } 5390 if (res != MatchOperand_NoMatch) { 5391 return res; 5392 } 5393 } 5394 return MatchOperand_NoMatch; 5395 } 5396 5397 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5398 StringRef Name = Parser.getTok().getString(); 5399 if (Name == "mul") { 5400 return parseIntWithPrefix("mul", Operands, 5401 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5402 } 5403 5404 if (Name == "div") { 5405 return parseIntWithPrefix("div", Operands, 5406 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5407 } 5408 5409 return MatchOperand_NoMatch; 5410 } 5411 5412 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5413 cvtVOP3P(Inst, Operands); 5414 5415 int Opc = Inst.getOpcode(); 5416 5417 int SrcNum; 5418 const int Ops[] = { AMDGPU::OpName::src0, 5419 AMDGPU::OpName::src1, 5420 AMDGPU::OpName::src2 }; 5421 for (SrcNum = 0; 5422 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5423 ++SrcNum); 5424 assert(SrcNum > 0); 5425 5426 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5427 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5428 5429 if ((OpSel & (1 << SrcNum)) != 0) { 5430 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5431 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5432 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5433 } 5434 } 5435 5436 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5437 // 1. This operand is input modifiers 5438 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5439 // 2. This is not last operand 5440 && Desc.NumOperands > (OpNum + 1) 5441 // 3. Next operand is register class 5442 && Desc.OpInfo[OpNum + 1].RegClass != -1 5443 // 4. Next register is not tied to any other operand 5444 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5445 } 5446 5447 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5448 { 5449 OptionalImmIndexMap OptionalIdx; 5450 unsigned Opc = Inst.getOpcode(); 5451 5452 unsigned I = 1; 5453 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5454 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5455 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5456 } 5457 5458 for (unsigned E = Operands.size(); I != E; ++I) { 5459 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5460 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5461 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5462 } else if (Op.isInterpSlot() || 5463 Op.isInterpAttr() || 5464 Op.isAttrChan()) { 5465 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5466 } else if (Op.isImmModifier()) { 5467 OptionalIdx[Op.getImmTy()] = I; 5468 } else { 5469 llvm_unreachable("unhandled operand type"); 5470 } 5471 } 5472 5473 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5475 } 5476 5477 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5479 } 5480 5481 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5482 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5483 } 5484 } 5485 5486 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5487 OptionalImmIndexMap &OptionalIdx) { 5488 unsigned Opc = Inst.getOpcode(); 5489 5490 unsigned I = 1; 5491 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5492 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5493 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5494 } 5495 5496 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5497 // This instruction has src modifiers 5498 for (unsigned E = Operands.size(); I != E; ++I) { 5499 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5500 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5501 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5502 } else if (Op.isImmModifier()) { 5503 OptionalIdx[Op.getImmTy()] = I; 5504 } else if (Op.isRegOrImm()) { 5505 Op.addRegOrImmOperands(Inst, 1); 5506 } else { 5507 llvm_unreachable("unhandled operand type"); 5508 } 5509 } 5510 } else { 5511 // No src modifiers 5512 for (unsigned E = Operands.size(); I != E; ++I) { 5513 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5514 if (Op.isMod()) { 5515 OptionalIdx[Op.getImmTy()] = I; 5516 } else { 5517 Op.addRegOrImmOperands(Inst, 1); 5518 } 5519 } 5520 } 5521 5522 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5524 } 5525 5526 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5528 } 5529 5530 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5531 // it has src2 register operand that is tied to dst operand 5532 // we don't allow modifiers for this operand in assembler so src2_modifiers 5533 // should be 0. 5534 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5535 Opc == AMDGPU::V_MAC_F32_e64_vi || 5536 Opc == AMDGPU::V_MAC_F16_e64_vi || 5537 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5538 auto it = Inst.begin(); 5539 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5540 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5541 ++it; 5542 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5543 } 5544 } 5545 5546 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5547 OptionalImmIndexMap OptionalIdx; 5548 cvtVOP3(Inst, Operands, OptionalIdx); 5549 } 5550 5551 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5552 const OperandVector &Operands) { 5553 OptionalImmIndexMap OptIdx; 5554 const int Opc = Inst.getOpcode(); 5555 const MCInstrDesc &Desc = MII.get(Opc); 5556 5557 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5558 5559 cvtVOP3(Inst, Operands, OptIdx); 5560 5561 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5562 assert(!IsPacked); 5563 Inst.addOperand(Inst.getOperand(0)); 5564 } 5565 5566 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5567 // instruction, and then figure out where to actually put the modifiers 5568 5569 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5570 5571 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5572 if (OpSelHiIdx != -1) { 5573 int DefaultVal = IsPacked ? -1 : 0; 5574 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5575 DefaultVal); 5576 } 5577 5578 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5579 if (NegLoIdx != -1) { 5580 assert(IsPacked); 5581 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5582 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5583 } 5584 5585 const int Ops[] = { AMDGPU::OpName::src0, 5586 AMDGPU::OpName::src1, 5587 AMDGPU::OpName::src2 }; 5588 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5589 AMDGPU::OpName::src1_modifiers, 5590 AMDGPU::OpName::src2_modifiers }; 5591 5592 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5593 5594 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5595 unsigned OpSelHi = 0; 5596 unsigned NegLo = 0; 5597 unsigned NegHi = 0; 5598 5599 if (OpSelHiIdx != -1) { 5600 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5601 } 5602 5603 if (NegLoIdx != -1) { 5604 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5605 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5606 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5607 } 5608 5609 for (int J = 0; J < 3; ++J) { 5610 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5611 if (OpIdx == -1) 5612 break; 5613 5614 uint32_t ModVal = 0; 5615 5616 if ((OpSel & (1 << J)) != 0) 5617 ModVal |= SISrcMods::OP_SEL_0; 5618 5619 if ((OpSelHi & (1 << J)) != 0) 5620 ModVal |= SISrcMods::OP_SEL_1; 5621 5622 if ((NegLo & (1 << J)) != 0) 5623 ModVal |= SISrcMods::NEG; 5624 5625 if ((NegHi & (1 << J)) != 0) 5626 ModVal |= SISrcMods::NEG_HI; 5627 5628 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5629 5630 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5631 } 5632 } 5633 5634 //===----------------------------------------------------------------------===// 5635 // dpp 5636 //===----------------------------------------------------------------------===// 5637 5638 bool AMDGPUOperand::isDPPCtrl() const { 5639 using namespace AMDGPU::DPP; 5640 5641 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5642 if (result) { 5643 int64_t Imm = getImm(); 5644 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5645 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5646 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5647 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5648 (Imm == DppCtrl::WAVE_SHL1) || 5649 (Imm == DppCtrl::WAVE_ROL1) || 5650 (Imm == DppCtrl::WAVE_SHR1) || 5651 (Imm == DppCtrl::WAVE_ROR1) || 5652 (Imm == DppCtrl::ROW_MIRROR) || 5653 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5654 (Imm == DppCtrl::BCAST15) || 5655 (Imm == DppCtrl::BCAST31); 5656 } 5657 return false; 5658 } 5659 5660 bool AMDGPUOperand::isS16Imm() const { 5661 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5662 } 5663 5664 bool AMDGPUOperand::isU16Imm() const { 5665 return isImm() && isUInt<16>(getImm()); 5666 } 5667 5668 OperandMatchResultTy 5669 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5670 using namespace AMDGPU::DPP; 5671 5672 SMLoc S = Parser.getTok().getLoc(); 5673 StringRef Prefix; 5674 int64_t Int; 5675 5676 if (getLexer().getKind() == AsmToken::Identifier) { 5677 Prefix = Parser.getTok().getString(); 5678 } else { 5679 return MatchOperand_NoMatch; 5680 } 5681 5682 if (Prefix == "row_mirror") { 5683 Int = DppCtrl::ROW_MIRROR; 5684 Parser.Lex(); 5685 } else if (Prefix == "row_half_mirror") { 5686 Int = DppCtrl::ROW_HALF_MIRROR; 5687 Parser.Lex(); 5688 } else { 5689 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5690 if (Prefix != "quad_perm" 5691 && Prefix != "row_shl" 5692 && Prefix != "row_shr" 5693 && Prefix != "row_ror" 5694 && Prefix != "wave_shl" 5695 && Prefix != "wave_rol" 5696 && Prefix != "wave_shr" 5697 && Prefix != "wave_ror" 5698 && Prefix != "row_bcast") { 5699 return MatchOperand_NoMatch; 5700 } 5701 5702 Parser.Lex(); 5703 if (getLexer().isNot(AsmToken::Colon)) 5704 return MatchOperand_ParseFail; 5705 5706 if (Prefix == "quad_perm") { 5707 // quad_perm:[%d,%d,%d,%d] 5708 Parser.Lex(); 5709 if (getLexer().isNot(AsmToken::LBrac)) 5710 return MatchOperand_ParseFail; 5711 Parser.Lex(); 5712 5713 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5714 return MatchOperand_ParseFail; 5715 5716 for (int i = 0; i < 3; ++i) { 5717 if (getLexer().isNot(AsmToken::Comma)) 5718 return MatchOperand_ParseFail; 5719 Parser.Lex(); 5720 5721 int64_t Temp; 5722 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5723 return MatchOperand_ParseFail; 5724 const int shift = i*2 + 2; 5725 Int += (Temp << shift); 5726 } 5727 5728 if (getLexer().isNot(AsmToken::RBrac)) 5729 return MatchOperand_ParseFail; 5730 Parser.Lex(); 5731 } else { 5732 // sel:%d 5733 Parser.Lex(); 5734 if (getParser().parseAbsoluteExpression(Int)) 5735 return MatchOperand_ParseFail; 5736 5737 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5738 Int |= DppCtrl::ROW_SHL0; 5739 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5740 Int |= DppCtrl::ROW_SHR0; 5741 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5742 Int |= DppCtrl::ROW_ROR0; 5743 } else if (Prefix == "wave_shl" && 1 == Int) { 5744 Int = DppCtrl::WAVE_SHL1; 5745 } else if (Prefix == "wave_rol" && 1 == Int) { 5746 Int = DppCtrl::WAVE_ROL1; 5747 } else if (Prefix == "wave_shr" && 1 == Int) { 5748 Int = DppCtrl::WAVE_SHR1; 5749 } else if (Prefix == "wave_ror" && 1 == Int) { 5750 Int = DppCtrl::WAVE_ROR1; 5751 } else if (Prefix == "row_bcast") { 5752 if (Int == 15) { 5753 Int = DppCtrl::BCAST15; 5754 } else if (Int == 31) { 5755 Int = DppCtrl::BCAST31; 5756 } else { 5757 return MatchOperand_ParseFail; 5758 } 5759 } else { 5760 return MatchOperand_ParseFail; 5761 } 5762 } 5763 } 5764 5765 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5766 return MatchOperand_Success; 5767 } 5768 5769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5770 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5771 } 5772 5773 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 5774 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 5775 } 5776 5777 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5778 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5779 } 5780 5781 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5782 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5783 } 5784 5785 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5786 OptionalImmIndexMap OptionalIdx; 5787 5788 unsigned I = 1; 5789 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5790 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5791 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5792 } 5793 5794 for (unsigned E = Operands.size(); I != E; ++I) { 5795 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5796 MCOI::TIED_TO); 5797 if (TiedTo != -1) { 5798 assert((unsigned)TiedTo < Inst.getNumOperands()); 5799 // handle tied old or src2 for MAC instructions 5800 Inst.addOperand(Inst.getOperand(TiedTo)); 5801 } 5802 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5803 // Add the register arguments 5804 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5805 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5806 // Skip it. 5807 continue; 5808 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5809 Op.addRegWithFPInputModsOperands(Inst, 2); 5810 } else if (Op.isDPPCtrl()) { 5811 Op.addImmOperands(Inst, 1); 5812 } else if (Op.isImm()) { 5813 // Handle optional arguments 5814 OptionalIdx[Op.getImmTy()] = I; 5815 } else { 5816 llvm_unreachable("Invalid operand type"); 5817 } 5818 } 5819 5820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5821 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5822 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5823 } 5824 5825 //===----------------------------------------------------------------------===// 5826 // sdwa 5827 //===----------------------------------------------------------------------===// 5828 5829 OperandMatchResultTy 5830 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5831 AMDGPUOperand::ImmTy Type) { 5832 using namespace llvm::AMDGPU::SDWA; 5833 5834 SMLoc S = Parser.getTok().getLoc(); 5835 StringRef Value; 5836 OperandMatchResultTy res; 5837 5838 res = parseStringWithPrefix(Prefix, Value); 5839 if (res != MatchOperand_Success) { 5840 return res; 5841 } 5842 5843 int64_t Int; 5844 Int = StringSwitch<int64_t>(Value) 5845 .Case("BYTE_0", SdwaSel::BYTE_0) 5846 .Case("BYTE_1", SdwaSel::BYTE_1) 5847 .Case("BYTE_2", SdwaSel::BYTE_2) 5848 .Case("BYTE_3", SdwaSel::BYTE_3) 5849 .Case("WORD_0", SdwaSel::WORD_0) 5850 .Case("WORD_1", SdwaSel::WORD_1) 5851 .Case("DWORD", SdwaSel::DWORD) 5852 .Default(0xffffffff); 5853 Parser.Lex(); // eat last token 5854 5855 if (Int == 0xffffffff) { 5856 return MatchOperand_ParseFail; 5857 } 5858 5859 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5860 return MatchOperand_Success; 5861 } 5862 5863 OperandMatchResultTy 5864 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5865 using namespace llvm::AMDGPU::SDWA; 5866 5867 SMLoc S = Parser.getTok().getLoc(); 5868 StringRef Value; 5869 OperandMatchResultTy res; 5870 5871 res = parseStringWithPrefix("dst_unused", Value); 5872 if (res != MatchOperand_Success) { 5873 return res; 5874 } 5875 5876 int64_t Int; 5877 Int = StringSwitch<int64_t>(Value) 5878 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5879 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5880 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5881 .Default(0xffffffff); 5882 Parser.Lex(); // eat last token 5883 5884 if (Int == 0xffffffff) { 5885 return MatchOperand_ParseFail; 5886 } 5887 5888 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5889 return MatchOperand_Success; 5890 } 5891 5892 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5893 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5894 } 5895 5896 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5897 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5898 } 5899 5900 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5901 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5902 } 5903 5904 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5905 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5906 } 5907 5908 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5909 uint64_t BasicInstType, bool skipVcc) { 5910 using namespace llvm::AMDGPU::SDWA; 5911 5912 OptionalImmIndexMap OptionalIdx; 5913 bool skippedVcc = false; 5914 5915 unsigned I = 1; 5916 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5917 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5918 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5919 } 5920 5921 for (unsigned E = Operands.size(); I != E; ++I) { 5922 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5923 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5924 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5925 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5926 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5927 // Skip VCC only if we didn't skip it on previous iteration. 5928 if (BasicInstType == SIInstrFlags::VOP2 && 5929 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5930 skippedVcc = true; 5931 continue; 5932 } else if (BasicInstType == SIInstrFlags::VOPC && 5933 Inst.getNumOperands() == 0) { 5934 skippedVcc = true; 5935 continue; 5936 } 5937 } 5938 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5939 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5940 } else if (Op.isImm()) { 5941 // Handle optional arguments 5942 OptionalIdx[Op.getImmTy()] = I; 5943 } else { 5944 llvm_unreachable("Invalid operand type"); 5945 } 5946 skippedVcc = false; 5947 } 5948 5949 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5950 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5951 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5952 switch (BasicInstType) { 5953 case SIInstrFlags::VOP1: 5954 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5955 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5957 } 5958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5961 break; 5962 5963 case SIInstrFlags::VOP2: 5964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5965 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5967 } 5968 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5969 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5970 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5971 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5972 break; 5973 5974 case SIInstrFlags::VOPC: 5975 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5976 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5977 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5978 break; 5979 5980 default: 5981 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5982 } 5983 } 5984 5985 // special case v_mac_{f16, f32}: 5986 // it has src2 register operand that is tied to dst operand 5987 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5988 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5989 auto it = Inst.begin(); 5990 std::advance( 5991 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5992 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5993 } 5994 } 5995 5996 /// Force static initialization. 5997 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5998 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5999 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6000 } 6001 6002 #define GET_REGISTER_MATCHER 6003 #define GET_MATCHER_IMPLEMENTATION 6004 #define GET_MNEMONIC_SPELL_CHECKER 6005 #include "AMDGPUGenAsmMatcher.inc" 6006 6007 // This fuction should be defined after auto-generated include so that we have 6008 // MatchClassKind enum defined 6009 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6010 unsigned Kind) { 6011 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6012 // But MatchInstructionImpl() expects to meet token and fails to validate 6013 // operand. This method checks if we are given immediate operand but expect to 6014 // get corresponding token. 6015 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6016 switch (Kind) { 6017 case MCK_addr64: 6018 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6019 case MCK_gds: 6020 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6021 case MCK_lds: 6022 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6023 case MCK_glc: 6024 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6025 case MCK_idxen: 6026 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6027 case MCK_offen: 6028 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6029 case MCK_SSrcB32: 6030 // When operands have expression values, they will return true for isToken, 6031 // because it is not possible to distinguish between a token and an 6032 // expression at parse time. MatchInstructionImpl() will always try to 6033 // match an operand as a token, when isToken returns true, and when the 6034 // name of the expression is not a valid token, the match will fail, 6035 // so we need to handle it here. 6036 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6037 case MCK_SSrcF32: 6038 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6039 case MCK_SoppBrTarget: 6040 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6041 case MCK_VReg32OrOff: 6042 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6043 case MCK_InterpSlot: 6044 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6045 case MCK_Attr: 6046 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6047 case MCK_AttrChan: 6048 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6049 default: 6050 return Match_InvalidOperand; 6051 } 6052 } 6053 6054 //===----------------------------------------------------------------------===// 6055 // endpgm 6056 //===----------------------------------------------------------------------===// 6057 6058 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6059 SMLoc S = Parser.getTok().getLoc(); 6060 int64_t Imm = 0; 6061 6062 if (!parseExpr(Imm)) { 6063 // The operand is optional, if not present default to 0 6064 Imm = 0; 6065 } 6066 6067 if (!isUInt<16>(Imm)) { 6068 Error(S, "expected a 16-bit value"); 6069 return MatchOperand_ParseFail; 6070 } 6071 6072 Operands.push_back( 6073 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6074 return MatchOperand_Success; 6075 } 6076 6077 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6078