1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0; 106 Operand |= Neg ? SISrcMods::NEG : 0; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyHigh 178 }; 179 180 struct TokOp { 181 const char *Data; 182 unsigned Length; 183 }; 184 185 struct ImmOp { 186 int64_t Val; 187 ImmTy Type; 188 bool IsFPImm; 189 Modifiers Mods; 190 }; 191 192 struct RegOp { 193 unsigned RegNo; 194 bool IsForcedVOP3; 195 Modifiers Mods; 196 }; 197 198 union { 199 TokOp Tok; 200 ImmOp Imm; 201 RegOp Reg; 202 const MCExpr *Expr; 203 }; 204 205 bool isToken() const override { 206 if (Kind == Token) 207 return true; 208 209 if (Kind != Expression || !Expr) 210 return false; 211 212 // When parsing operands, we can't always tell if something was meant to be 213 // a token, like 'gds', or an expression that references a global variable. 214 // In this case, we assume the string is an expression, and if we need to 215 // interpret is a token, then we treat the symbol name as the token. 216 return isa<MCSymbolRefExpr>(Expr); 217 } 218 219 bool isImm() const override { 220 return Kind == Immediate; 221 } 222 223 bool isInlinableImm(MVT type) const; 224 bool isLiteralImm(MVT type) const; 225 226 bool isRegKind() const { 227 return Kind == Register; 228 } 229 230 bool isReg() const override { 231 return isRegKind() && !hasModifiers(); 232 } 233 234 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 235 return isRegClass(RCID) || isInlinableImm(type); 236 } 237 238 bool isRegOrImmWithInt16InputMods() const { 239 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 240 } 241 242 bool isRegOrImmWithInt32InputMods() const { 243 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 244 } 245 246 bool isRegOrImmWithInt64InputMods() const { 247 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 248 } 249 250 bool isRegOrImmWithFP16InputMods() const { 251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 252 } 253 254 bool isRegOrImmWithFP32InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 256 } 257 258 bool isRegOrImmWithFP64InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 260 } 261 262 bool isVReg() const { 263 return isRegClass(AMDGPU::VGPR_32RegClassID) || 264 isRegClass(AMDGPU::VReg_64RegClassID) || 265 isRegClass(AMDGPU::VReg_96RegClassID) || 266 isRegClass(AMDGPU::VReg_128RegClassID) || 267 isRegClass(AMDGPU::VReg_256RegClassID) || 268 isRegClass(AMDGPU::VReg_512RegClassID); 269 } 270 271 bool isVReg32() const { 272 return isRegClass(AMDGPU::VGPR_32RegClassID); 273 } 274 275 bool isVReg32OrOff() const { 276 return isOff() || isVReg32(); 277 } 278 279 bool isSDWAOperand(MVT type) const; 280 bool isSDWAFP16Operand() const; 281 bool isSDWAFP32Operand() const; 282 bool isSDWAInt16Operand() const; 283 bool isSDWAInt32Operand() const; 284 285 bool isImmTy(ImmTy ImmT) const { 286 return isImm() && Imm.Type == ImmT; 287 } 288 289 bool isImmModifier() const { 290 return isImm() && Imm.Type != ImmTyNone; 291 } 292 293 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 294 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 295 bool isDMask() const { return isImmTy(ImmTyDMask); } 296 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 297 bool isDA() const { return isImmTy(ImmTyDA); } 298 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 299 bool isLWE() const { return isImmTy(ImmTyLWE); } 300 bool isOff() const { return isImmTy(ImmTyOff); } 301 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 302 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 303 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 304 bool isOffen() const { return isImmTy(ImmTyOffen); } 305 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 306 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 307 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 308 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 309 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 310 311 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 312 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 313 bool isGDS() const { return isImmTy(ImmTyGDS); } 314 bool isLDS() const { return isImmTy(ImmTyLDS); } 315 bool isGLC() const { return isImmTy(ImmTyGLC); } 316 bool isSLC() const { return isImmTy(ImmTySLC); } 317 bool isTFE() const { return isImmTy(ImmTyTFE); } 318 bool isD16() const { return isImmTy(ImmTyD16); } 319 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 320 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 321 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 322 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 323 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 324 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 325 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 326 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 327 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 328 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 329 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 330 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 331 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 332 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 333 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 334 bool isHigh() const { return isImmTy(ImmTyHigh); } 335 336 bool isMod() const { 337 return isClampSI() || isOModSI(); 338 } 339 340 bool isRegOrImm() const { 341 return isReg() || isImm(); 342 } 343 344 bool isRegClass(unsigned RCID) const; 345 346 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 347 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 348 } 349 350 bool isSCSrcB16() const { 351 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 352 } 353 354 bool isSCSrcV2B16() const { 355 return isSCSrcB16(); 356 } 357 358 bool isSCSrcB32() const { 359 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 360 } 361 362 bool isSCSrcB64() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 364 } 365 366 bool isSCSrcF16() const { 367 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 368 } 369 370 bool isSCSrcV2F16() const { 371 return isSCSrcF16(); 372 } 373 374 bool isSCSrcF32() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 376 } 377 378 bool isSCSrcF64() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 380 } 381 382 bool isSSrcB32() const { 383 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 384 } 385 386 bool isSSrcB16() const { 387 return isSCSrcB16() || isLiteralImm(MVT::i16); 388 } 389 390 bool isSSrcV2B16() const { 391 llvm_unreachable("cannot happen"); 392 return isSSrcB16(); 393 } 394 395 bool isSSrcB64() const { 396 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 397 // See isVSrc64(). 398 return isSCSrcB64() || isLiteralImm(MVT::i64); 399 } 400 401 bool isSSrcF32() const { 402 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 403 } 404 405 bool isSSrcF64() const { 406 return isSCSrcB64() || isLiteralImm(MVT::f64); 407 } 408 409 bool isSSrcF16() const { 410 return isSCSrcB16() || isLiteralImm(MVT::f16); 411 } 412 413 bool isSSrcV2F16() const { 414 llvm_unreachable("cannot happen"); 415 return isSSrcF16(); 416 } 417 418 bool isSSrcOrLdsB32() const { 419 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 420 isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isVCSrcB32() const { 424 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 425 } 426 427 bool isVCSrcB64() const { 428 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 429 } 430 431 bool isVCSrcB16() const { 432 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 433 } 434 435 bool isVCSrcV2B16() const { 436 return isVCSrcB16(); 437 } 438 439 bool isVCSrcF32() const { 440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 441 } 442 443 bool isVCSrcF64() const { 444 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 445 } 446 447 bool isVCSrcF16() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 449 } 450 451 bool isVCSrcV2F16() const { 452 return isVCSrcF16(); 453 } 454 455 bool isVSrcB32() const { 456 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 457 } 458 459 bool isVSrcB64() const { 460 return isVCSrcF64() || isLiteralImm(MVT::i64); 461 } 462 463 bool isVSrcB16() const { 464 return isVCSrcF16() || isLiteralImm(MVT::i16); 465 } 466 467 bool isVSrcV2B16() const { 468 llvm_unreachable("cannot happen"); 469 return isVSrcB16(); 470 } 471 472 bool isVSrcF32() const { 473 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 474 } 475 476 bool isVSrcF64() const { 477 return isVCSrcF64() || isLiteralImm(MVT::f64); 478 } 479 480 bool isVSrcF16() const { 481 return isVCSrcF16() || isLiteralImm(MVT::f16); 482 } 483 484 bool isVSrcV2F16() const { 485 llvm_unreachable("cannot happen"); 486 return isVSrcF16(); 487 } 488 489 bool isKImmFP32() const { 490 return isLiteralImm(MVT::f32); 491 } 492 493 bool isKImmFP16() const { 494 return isLiteralImm(MVT::f16); 495 } 496 497 bool isMem() const override { 498 return false; 499 } 500 501 bool isExpr() const { 502 return Kind == Expression; 503 } 504 505 bool isSoppBrTarget() const { 506 return isExpr() || isImm(); 507 } 508 509 bool isSWaitCnt() const; 510 bool isHwreg() const; 511 bool isSendMsg() const; 512 bool isSwizzle() const; 513 bool isSMRDOffset8() const; 514 bool isSMRDOffset20() const; 515 bool isSMRDLiteralOffset() const; 516 bool isDPPCtrl() const; 517 bool isGPRIdxMode() const; 518 bool isS16Imm() const; 519 bool isU16Imm() const; 520 521 StringRef getExpressionAsToken() const { 522 assert(isExpr()); 523 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 524 return S->getSymbol().getName(); 525 } 526 527 StringRef getToken() const { 528 assert(isToken()); 529 530 if (Kind == Expression) 531 return getExpressionAsToken(); 532 533 return StringRef(Tok.Data, Tok.Length); 534 } 535 536 int64_t getImm() const { 537 assert(isImm()); 538 return Imm.Val; 539 } 540 541 ImmTy getImmTy() const { 542 assert(isImm()); 543 return Imm.Type; 544 } 545 546 unsigned getReg() const override { 547 return Reg.RegNo; 548 } 549 550 SMLoc getStartLoc() const override { 551 return StartLoc; 552 } 553 554 SMLoc getEndLoc() const override { 555 return EndLoc; 556 } 557 558 SMRange getLocRange() const { 559 return SMRange(StartLoc, EndLoc); 560 } 561 562 Modifiers getModifiers() const { 563 assert(isRegKind() || isImmTy(ImmTyNone)); 564 return isRegKind() ? Reg.Mods : Imm.Mods; 565 } 566 567 void setModifiers(Modifiers Mods) { 568 assert(isRegKind() || isImmTy(ImmTyNone)); 569 if (isRegKind()) 570 Reg.Mods = Mods; 571 else 572 Imm.Mods = Mods; 573 } 574 575 bool hasModifiers() const { 576 return getModifiers().hasModifiers(); 577 } 578 579 bool hasFPModifiers() const { 580 return getModifiers().hasFPModifiers(); 581 } 582 583 bool hasIntModifiers() const { 584 return getModifiers().hasIntModifiers(); 585 } 586 587 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 588 589 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 590 591 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 592 593 template <unsigned Bitwidth> 594 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 595 596 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 597 addKImmFPOperands<16>(Inst, N); 598 } 599 600 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 601 addKImmFPOperands<32>(Inst, N); 602 } 603 604 void addRegOperands(MCInst &Inst, unsigned N) const; 605 606 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 607 if (isRegKind()) 608 addRegOperands(Inst, N); 609 else if (isExpr()) 610 Inst.addOperand(MCOperand::createExpr(Expr)); 611 else 612 addImmOperands(Inst, N); 613 } 614 615 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 616 Modifiers Mods = getModifiers(); 617 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 618 if (isRegKind()) { 619 addRegOperands(Inst, N); 620 } else { 621 addImmOperands(Inst, N, false); 622 } 623 } 624 625 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 626 assert(!hasIntModifiers()); 627 addRegOrImmWithInputModsOperands(Inst, N); 628 } 629 630 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 631 assert(!hasFPModifiers()); 632 addRegOrImmWithInputModsOperands(Inst, N); 633 } 634 635 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 636 Modifiers Mods = getModifiers(); 637 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 638 assert(isRegKind()); 639 addRegOperands(Inst, N); 640 } 641 642 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 643 assert(!hasIntModifiers()); 644 addRegWithInputModsOperands(Inst, N); 645 } 646 647 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 648 assert(!hasFPModifiers()); 649 addRegWithInputModsOperands(Inst, N); 650 } 651 652 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 653 if (isImm()) 654 addImmOperands(Inst, N); 655 else { 656 assert(isExpr()); 657 Inst.addOperand(MCOperand::createExpr(Expr)); 658 } 659 } 660 661 static void printImmTy(raw_ostream& OS, ImmTy Type) { 662 switch (Type) { 663 case ImmTyNone: OS << "None"; break; 664 case ImmTyGDS: OS << "GDS"; break; 665 case ImmTyLDS: OS << "LDS"; break; 666 case ImmTyOffen: OS << "Offen"; break; 667 case ImmTyIdxen: OS << "Idxen"; break; 668 case ImmTyAddr64: OS << "Addr64"; break; 669 case ImmTyOffset: OS << "Offset"; break; 670 case ImmTyInstOffset: OS << "InstOffset"; break; 671 case ImmTyOffset0: OS << "Offset0"; break; 672 case ImmTyOffset1: OS << "Offset1"; break; 673 case ImmTyGLC: OS << "GLC"; break; 674 case ImmTySLC: OS << "SLC"; break; 675 case ImmTyTFE: OS << "TFE"; break; 676 case ImmTyD16: OS << "D16"; break; 677 case ImmTyFORMAT: OS << "FORMAT"; break; 678 case ImmTyClampSI: OS << "ClampSI"; break; 679 case ImmTyOModSI: OS << "OModSI"; break; 680 case ImmTyDppCtrl: OS << "DppCtrl"; break; 681 case ImmTyDppRowMask: OS << "DppRowMask"; break; 682 case ImmTyDppBankMask: OS << "DppBankMask"; break; 683 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 684 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 685 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 686 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 687 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 688 case ImmTyDMask: OS << "DMask"; break; 689 case ImmTyUNorm: OS << "UNorm"; break; 690 case ImmTyDA: OS << "DA"; break; 691 case ImmTyR128A16: OS << "R128A16"; break; 692 case ImmTyLWE: OS << "LWE"; break; 693 case ImmTyOff: OS << "Off"; break; 694 case ImmTyExpTgt: OS << "ExpTgt"; break; 695 case ImmTyExpCompr: OS << "ExpCompr"; break; 696 case ImmTyExpVM: OS << "ExpVM"; break; 697 case ImmTyHwreg: OS << "Hwreg"; break; 698 case ImmTySendMsg: OS << "SendMsg"; break; 699 case ImmTyInterpSlot: OS << "InterpSlot"; break; 700 case ImmTyInterpAttr: OS << "InterpAttr"; break; 701 case ImmTyAttrChan: OS << "AttrChan"; break; 702 case ImmTyOpSel: OS << "OpSel"; break; 703 case ImmTyOpSelHi: OS << "OpSelHi"; break; 704 case ImmTyNegLo: OS << "NegLo"; break; 705 case ImmTyNegHi: OS << "NegHi"; break; 706 case ImmTySwizzle: OS << "Swizzle"; break; 707 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 708 case ImmTyHigh: OS << "High"; break; 709 } 710 } 711 712 void print(raw_ostream &OS) const override { 713 switch (Kind) { 714 case Register: 715 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 716 break; 717 case Immediate: 718 OS << '<' << getImm(); 719 if (getImmTy() != ImmTyNone) { 720 OS << " type: "; printImmTy(OS, getImmTy()); 721 } 722 OS << " mods: " << Imm.Mods << '>'; 723 break; 724 case Token: 725 OS << '\'' << getToken() << '\''; 726 break; 727 case Expression: 728 OS << "<expr " << *Expr << '>'; 729 break; 730 } 731 } 732 733 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 734 int64_t Val, SMLoc Loc, 735 ImmTy Type = ImmTyNone, 736 bool IsFPImm = false) { 737 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 738 Op->Imm.Val = Val; 739 Op->Imm.IsFPImm = IsFPImm; 740 Op->Imm.Type = Type; 741 Op->Imm.Mods = Modifiers(); 742 Op->StartLoc = Loc; 743 Op->EndLoc = Loc; 744 return Op; 745 } 746 747 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 748 StringRef Str, SMLoc Loc, 749 bool HasExplicitEncodingSize = true) { 750 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 751 Res->Tok.Data = Str.data(); 752 Res->Tok.Length = Str.size(); 753 Res->StartLoc = Loc; 754 Res->EndLoc = Loc; 755 return Res; 756 } 757 758 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 759 unsigned RegNo, SMLoc S, 760 SMLoc E, 761 bool ForceVOP3) { 762 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 763 Op->Reg.RegNo = RegNo; 764 Op->Reg.Mods = Modifiers(); 765 Op->Reg.IsForcedVOP3 = ForceVOP3; 766 Op->StartLoc = S; 767 Op->EndLoc = E; 768 return Op; 769 } 770 771 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 772 const class MCExpr *Expr, SMLoc S) { 773 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 774 Op->Expr = Expr; 775 Op->StartLoc = S; 776 Op->EndLoc = S; 777 return Op; 778 } 779 }; 780 781 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 782 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 783 return OS; 784 } 785 786 //===----------------------------------------------------------------------===// 787 // AsmParser 788 //===----------------------------------------------------------------------===// 789 790 // Holds info related to the current kernel, e.g. count of SGPRs used. 791 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 792 // .amdgpu_hsa_kernel or at EOF. 793 class KernelScopeInfo { 794 int SgprIndexUnusedMin = -1; 795 int VgprIndexUnusedMin = -1; 796 MCContext *Ctx = nullptr; 797 798 void usesSgprAt(int i) { 799 if (i >= SgprIndexUnusedMin) { 800 SgprIndexUnusedMin = ++i; 801 if (Ctx) { 802 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 803 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 804 } 805 } 806 } 807 808 void usesVgprAt(int i) { 809 if (i >= VgprIndexUnusedMin) { 810 VgprIndexUnusedMin = ++i; 811 if (Ctx) { 812 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 813 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 814 } 815 } 816 } 817 818 public: 819 KernelScopeInfo() = default; 820 821 void initialize(MCContext &Context) { 822 Ctx = &Context; 823 usesSgprAt(SgprIndexUnusedMin = -1); 824 usesVgprAt(VgprIndexUnusedMin = -1); 825 } 826 827 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 828 switch (RegKind) { 829 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 830 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 831 default: break; 832 } 833 } 834 }; 835 836 class AMDGPUAsmParser : public MCTargetAsmParser { 837 MCAsmParser &Parser; 838 839 // Number of extra operands parsed after the first optional operand. 840 // This may be necessary to skip hardcoded mandatory operands. 841 static const unsigned MAX_OPR_LOOKAHEAD = 8; 842 843 unsigned ForcedEncodingSize = 0; 844 bool ForcedDPP = false; 845 bool ForcedSDWA = false; 846 KernelScopeInfo KernelScope; 847 848 /// @name Auto-generated Match Functions 849 /// { 850 851 #define GET_ASSEMBLER_HEADER 852 #include "AMDGPUGenAsmMatcher.inc" 853 854 /// } 855 856 private: 857 bool ParseAsAbsoluteExpression(uint32_t &Ret); 858 bool OutOfRangeError(SMRange Range); 859 /// Calculate VGPR/SGPR blocks required for given target, reserved 860 /// registers, and user-specified NextFreeXGPR values. 861 /// 862 /// \param Features [in] Target features, used for bug corrections. 863 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 864 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 865 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 866 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 867 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 868 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 869 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 870 /// \param VGPRBlocks [out] Result VGPR block count. 871 /// \param SGPRBlocks [out] Result SGPR block count. 872 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 873 bool FlatScrUsed, bool XNACKUsed, 874 unsigned NextFreeVGPR, SMRange VGPRRange, 875 unsigned NextFreeSGPR, SMRange SGPRRange, 876 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 877 bool ParseDirectiveAMDGCNTarget(); 878 bool ParseDirectiveAMDHSAKernel(); 879 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 880 bool ParseDirectiveHSACodeObjectVersion(); 881 bool ParseDirectiveHSACodeObjectISA(); 882 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 883 bool ParseDirectiveAMDKernelCodeT(); 884 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 885 bool ParseDirectiveAMDGPUHsaKernel(); 886 887 bool ParseDirectiveISAVersion(); 888 bool ParseDirectiveHSAMetadata(); 889 bool ParseDirectivePALMetadata(); 890 891 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 892 RegisterKind RegKind, unsigned Reg1, 893 unsigned RegNum); 894 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 895 unsigned& RegNum, unsigned& RegWidth, 896 unsigned *DwordRegIndex); 897 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 898 void initializeGprCountSymbol(RegisterKind RegKind); 899 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 900 unsigned RegWidth); 901 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 902 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 903 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 904 bool IsGdsHardcoded); 905 906 public: 907 enum AMDGPUMatchResultTy { 908 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 909 }; 910 911 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 912 913 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 914 const MCInstrInfo &MII, 915 const MCTargetOptions &Options) 916 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 917 MCAsmParserExtension::Initialize(Parser); 918 919 if (getFeatureBits().none()) { 920 // Set default features. 921 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 922 } 923 924 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 925 926 { 927 // TODO: make those pre-defined variables read-only. 928 // Currently there is none suitable machinery in the core llvm-mc for this. 929 // MCSymbol::isRedefinable is intended for another purpose, and 930 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 931 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 932 MCContext &Ctx = getContext(); 933 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 934 MCSymbol *Sym = 935 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 936 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 937 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 938 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 939 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 940 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 941 } else { 942 MCSymbol *Sym = 943 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 944 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 945 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 946 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 947 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 948 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 949 } 950 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 951 initializeGprCountSymbol(IS_VGPR); 952 initializeGprCountSymbol(IS_SGPR); 953 } else 954 KernelScope.initialize(getContext()); 955 } 956 } 957 958 bool hasXNACK() const { 959 return AMDGPU::hasXNACK(getSTI()); 960 } 961 962 bool hasMIMG_R128() const { 963 return AMDGPU::hasMIMG_R128(getSTI()); 964 } 965 966 bool hasPackedD16() const { 967 return AMDGPU::hasPackedD16(getSTI()); 968 } 969 970 bool isSI() const { 971 return AMDGPU::isSI(getSTI()); 972 } 973 974 bool isCI() const { 975 return AMDGPU::isCI(getSTI()); 976 } 977 978 bool isVI() const { 979 return AMDGPU::isVI(getSTI()); 980 } 981 982 bool isGFX9() const { 983 return AMDGPU::isGFX9(getSTI()); 984 } 985 986 bool hasInv2PiInlineImm() const { 987 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 988 } 989 990 bool hasFlatOffsets() const { 991 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 992 } 993 994 bool hasSGPR102_SGPR103() const { 995 return !isVI(); 996 } 997 998 bool hasIntClamp() const { 999 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1000 } 1001 1002 AMDGPUTargetStreamer &getTargetStreamer() { 1003 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1004 return static_cast<AMDGPUTargetStreamer &>(TS); 1005 } 1006 1007 const MCRegisterInfo *getMRI() const { 1008 // We need this const_cast because for some reason getContext() is not const 1009 // in MCAsmParser. 1010 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1011 } 1012 1013 const MCInstrInfo *getMII() const { 1014 return &MII; 1015 } 1016 1017 const FeatureBitset &getFeatureBits() const { 1018 return getSTI().getFeatureBits(); 1019 } 1020 1021 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1022 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1023 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1024 1025 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1026 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1027 bool isForcedDPP() const { return ForcedDPP; } 1028 bool isForcedSDWA() const { return ForcedSDWA; } 1029 ArrayRef<unsigned> getMatchedVariants() const; 1030 1031 std::unique_ptr<AMDGPUOperand> parseRegister(); 1032 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1033 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1034 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1035 unsigned Kind) override; 1036 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1037 OperandVector &Operands, MCStreamer &Out, 1038 uint64_t &ErrorInfo, 1039 bool MatchingInlineAsm) override; 1040 bool ParseDirective(AsmToken DirectiveID) override; 1041 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1042 StringRef parseMnemonicSuffix(StringRef Name); 1043 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1044 SMLoc NameLoc, OperandVector &Operands) override; 1045 //bool ProcessInstruction(MCInst &Inst); 1046 1047 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1048 1049 OperandMatchResultTy 1050 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1051 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1052 bool (*ConvertResult)(int64_t &) = nullptr); 1053 1054 OperandMatchResultTy parseOperandArrayWithPrefix( 1055 const char *Prefix, 1056 OperandVector &Operands, 1057 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1058 bool (*ConvertResult)(int64_t&) = nullptr); 1059 1060 OperandMatchResultTy 1061 parseNamedBit(const char *Name, OperandVector &Operands, 1062 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1063 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1064 StringRef &Value); 1065 1066 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1067 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1068 OperandMatchResultTy parseReg(OperandVector &Operands); 1069 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1070 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1071 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1072 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1073 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1074 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1075 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1076 1077 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1078 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1079 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1080 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1081 1082 bool parseCnt(int64_t &IntVal); 1083 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1084 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1085 1086 private: 1087 struct OperandInfoTy { 1088 int64_t Id; 1089 bool IsSymbolic = false; 1090 1091 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1092 }; 1093 1094 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1095 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1096 1097 void errorExpTgt(); 1098 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1099 1100 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1101 bool validateSOPLiteral(const MCInst &Inst) const; 1102 bool validateConstantBusLimitations(const MCInst &Inst); 1103 bool validateEarlyClobberLimitations(const MCInst &Inst); 1104 bool validateIntClampSupported(const MCInst &Inst); 1105 bool validateMIMGAtomicDMask(const MCInst &Inst); 1106 bool validateMIMGGatherDMask(const MCInst &Inst); 1107 bool validateMIMGDataSize(const MCInst &Inst); 1108 bool validateMIMGD16(const MCInst &Inst); 1109 bool validateLdsDirect(const MCInst &Inst); 1110 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1111 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1112 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1113 1114 bool trySkipId(const StringRef Id); 1115 bool trySkipToken(const AsmToken::TokenKind Kind); 1116 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1117 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1118 bool parseExpr(int64_t &Imm); 1119 1120 public: 1121 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1122 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1123 1124 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1125 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1126 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1127 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1128 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1129 1130 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1131 const unsigned MinVal, 1132 const unsigned MaxVal, 1133 const StringRef ErrMsg); 1134 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1135 bool parseSwizzleOffset(int64_t &Imm); 1136 bool parseSwizzleMacro(int64_t &Imm); 1137 bool parseSwizzleQuadPerm(int64_t &Imm); 1138 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1139 bool parseSwizzleBroadcast(int64_t &Imm); 1140 bool parseSwizzleSwap(int64_t &Imm); 1141 bool parseSwizzleReverse(int64_t &Imm); 1142 1143 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1144 int64_t parseGPRIdxMacro(); 1145 1146 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1147 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1148 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1149 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1150 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1151 1152 AMDGPUOperand::Ptr defaultGLC() const; 1153 AMDGPUOperand::Ptr defaultSLC() const; 1154 1155 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1156 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1157 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1158 AMDGPUOperand::Ptr defaultOffsetU12() const; 1159 AMDGPUOperand::Ptr defaultOffsetS13() const; 1160 1161 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1162 1163 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1164 OptionalImmIndexMap &OptionalIdx); 1165 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1166 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1167 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1168 1169 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1170 1171 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1172 bool IsAtomic = false); 1173 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1174 1175 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1176 AMDGPUOperand::Ptr defaultRowMask() const; 1177 AMDGPUOperand::Ptr defaultBankMask() const; 1178 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1179 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1180 1181 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1182 AMDGPUOperand::ImmTy Type); 1183 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1184 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1185 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1186 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1187 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1188 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1189 uint64_t BasicInstType, bool skipVcc = false); 1190 }; 1191 1192 struct OptionalOperand { 1193 const char *Name; 1194 AMDGPUOperand::ImmTy Type; 1195 bool IsBit; 1196 bool (*ConvertResult)(int64_t&); 1197 }; 1198 1199 } // end anonymous namespace 1200 1201 // May be called with integer type with equivalent bitwidth. 1202 static const fltSemantics *getFltSemantics(unsigned Size) { 1203 switch (Size) { 1204 case 4: 1205 return &APFloat::IEEEsingle(); 1206 case 8: 1207 return &APFloat::IEEEdouble(); 1208 case 2: 1209 return &APFloat::IEEEhalf(); 1210 default: 1211 llvm_unreachable("unsupported fp type"); 1212 } 1213 } 1214 1215 static const fltSemantics *getFltSemantics(MVT VT) { 1216 return getFltSemantics(VT.getSizeInBits() / 8); 1217 } 1218 1219 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1220 switch (OperandType) { 1221 case AMDGPU::OPERAND_REG_IMM_INT32: 1222 case AMDGPU::OPERAND_REG_IMM_FP32: 1223 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1224 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1225 return &APFloat::IEEEsingle(); 1226 case AMDGPU::OPERAND_REG_IMM_INT64: 1227 case AMDGPU::OPERAND_REG_IMM_FP64: 1228 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1229 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1230 return &APFloat::IEEEdouble(); 1231 case AMDGPU::OPERAND_REG_IMM_INT16: 1232 case AMDGPU::OPERAND_REG_IMM_FP16: 1233 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1234 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1235 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1236 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1237 return &APFloat::IEEEhalf(); 1238 default: 1239 llvm_unreachable("unsupported fp type"); 1240 } 1241 } 1242 1243 //===----------------------------------------------------------------------===// 1244 // Operand 1245 //===----------------------------------------------------------------------===// 1246 1247 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1248 bool Lost; 1249 1250 // Convert literal to single precision 1251 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1252 APFloat::rmNearestTiesToEven, 1253 &Lost); 1254 // We allow precision lost but not overflow or underflow 1255 if (Status != APFloat::opOK && 1256 Lost && 1257 ((Status & APFloat::opOverflow) != 0 || 1258 (Status & APFloat::opUnderflow) != 0)) { 1259 return false; 1260 } 1261 1262 return true; 1263 } 1264 1265 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1266 if (!isImmTy(ImmTyNone)) { 1267 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1268 return false; 1269 } 1270 // TODO: We should avoid using host float here. It would be better to 1271 // check the float bit values which is what a few other places do. 1272 // We've had bot failures before due to weird NaN support on mips hosts. 1273 1274 APInt Literal(64, Imm.Val); 1275 1276 if (Imm.IsFPImm) { // We got fp literal token 1277 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1278 return AMDGPU::isInlinableLiteral64(Imm.Val, 1279 AsmParser->hasInv2PiInlineImm()); 1280 } 1281 1282 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1283 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1284 return false; 1285 1286 if (type.getScalarSizeInBits() == 16) { 1287 return AMDGPU::isInlinableLiteral16( 1288 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1289 AsmParser->hasInv2PiInlineImm()); 1290 } 1291 1292 // Check if single precision literal is inlinable 1293 return AMDGPU::isInlinableLiteral32( 1294 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1295 AsmParser->hasInv2PiInlineImm()); 1296 } 1297 1298 // We got int literal token. 1299 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1300 return AMDGPU::isInlinableLiteral64(Imm.Val, 1301 AsmParser->hasInv2PiInlineImm()); 1302 } 1303 1304 if (type.getScalarSizeInBits() == 16) { 1305 return AMDGPU::isInlinableLiteral16( 1306 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1307 AsmParser->hasInv2PiInlineImm()); 1308 } 1309 1310 return AMDGPU::isInlinableLiteral32( 1311 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1312 AsmParser->hasInv2PiInlineImm()); 1313 } 1314 1315 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1316 // Check that this immediate can be added as literal 1317 if (!isImmTy(ImmTyNone)) { 1318 return false; 1319 } 1320 1321 if (!Imm.IsFPImm) { 1322 // We got int literal token. 1323 1324 if (type == MVT::f64 && hasFPModifiers()) { 1325 // Cannot apply fp modifiers to int literals preserving the same semantics 1326 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1327 // disable these cases. 1328 return false; 1329 } 1330 1331 unsigned Size = type.getSizeInBits(); 1332 if (Size == 64) 1333 Size = 32; 1334 1335 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1336 // types. 1337 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1338 } 1339 1340 // We got fp literal token 1341 if (type == MVT::f64) { // Expected 64-bit fp operand 1342 // We would set low 64-bits of literal to zeroes but we accept this literals 1343 return true; 1344 } 1345 1346 if (type == MVT::i64) { // Expected 64-bit int operand 1347 // We don't allow fp literals in 64-bit integer instructions. It is 1348 // unclear how we should encode them. 1349 return false; 1350 } 1351 1352 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1353 return canLosslesslyConvertToFPType(FPLiteral, type); 1354 } 1355 1356 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1357 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1358 } 1359 1360 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1361 if (AsmParser->isVI()) 1362 return isVReg32(); 1363 else if (AsmParser->isGFX9()) 1364 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1365 else 1366 return false; 1367 } 1368 1369 bool AMDGPUOperand::isSDWAFP16Operand() const { 1370 return isSDWAOperand(MVT::f16); 1371 } 1372 1373 bool AMDGPUOperand::isSDWAFP32Operand() const { 1374 return isSDWAOperand(MVT::f32); 1375 } 1376 1377 bool AMDGPUOperand::isSDWAInt16Operand() const { 1378 return isSDWAOperand(MVT::i16); 1379 } 1380 1381 bool AMDGPUOperand::isSDWAInt32Operand() const { 1382 return isSDWAOperand(MVT::i32); 1383 } 1384 1385 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1386 { 1387 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1388 assert(Size == 2 || Size == 4 || Size == 8); 1389 1390 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1391 1392 if (Imm.Mods.Abs) { 1393 Val &= ~FpSignMask; 1394 } 1395 if (Imm.Mods.Neg) { 1396 Val ^= FpSignMask; 1397 } 1398 1399 return Val; 1400 } 1401 1402 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1403 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1404 Inst.getNumOperands())) { 1405 addLiteralImmOperand(Inst, Imm.Val, 1406 ApplyModifiers & 1407 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1408 } else { 1409 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1410 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1411 } 1412 } 1413 1414 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1415 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1416 auto OpNum = Inst.getNumOperands(); 1417 // Check that this operand accepts literals 1418 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1419 1420 if (ApplyModifiers) { 1421 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1422 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1423 Val = applyInputFPModifiers(Val, Size); 1424 } 1425 1426 APInt Literal(64, Val); 1427 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1428 1429 if (Imm.IsFPImm) { // We got fp literal token 1430 switch (OpTy) { 1431 case AMDGPU::OPERAND_REG_IMM_INT64: 1432 case AMDGPU::OPERAND_REG_IMM_FP64: 1433 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1434 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1435 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1436 AsmParser->hasInv2PiInlineImm())) { 1437 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1438 return; 1439 } 1440 1441 // Non-inlineable 1442 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1443 // For fp operands we check if low 32 bits are zeros 1444 if (Literal.getLoBits(32) != 0) { 1445 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1446 "Can't encode literal as exact 64-bit floating-point operand. " 1447 "Low 32-bits will be set to zero"); 1448 } 1449 1450 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1451 return; 1452 } 1453 1454 // We don't allow fp literals in 64-bit integer instructions. It is 1455 // unclear how we should encode them. This case should be checked earlier 1456 // in predicate methods (isLiteralImm()) 1457 llvm_unreachable("fp literal in 64-bit integer instruction."); 1458 1459 case AMDGPU::OPERAND_REG_IMM_INT32: 1460 case AMDGPU::OPERAND_REG_IMM_FP32: 1461 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1462 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1463 case AMDGPU::OPERAND_REG_IMM_INT16: 1464 case AMDGPU::OPERAND_REG_IMM_FP16: 1465 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1466 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1467 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1468 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1469 bool lost; 1470 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1471 // Convert literal to single precision 1472 FPLiteral.convert(*getOpFltSemantics(OpTy), 1473 APFloat::rmNearestTiesToEven, &lost); 1474 // We allow precision lost but not overflow or underflow. This should be 1475 // checked earlier in isLiteralImm() 1476 1477 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1478 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1479 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1480 ImmVal |= (ImmVal << 16); 1481 } 1482 1483 Inst.addOperand(MCOperand::createImm(ImmVal)); 1484 return; 1485 } 1486 default: 1487 llvm_unreachable("invalid operand size"); 1488 } 1489 1490 return; 1491 } 1492 1493 // We got int literal token. 1494 // Only sign extend inline immediates. 1495 // FIXME: No errors on truncation 1496 switch (OpTy) { 1497 case AMDGPU::OPERAND_REG_IMM_INT32: 1498 case AMDGPU::OPERAND_REG_IMM_FP32: 1499 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1500 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1501 if (isInt<32>(Val) && 1502 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1503 AsmParser->hasInv2PiInlineImm())) { 1504 Inst.addOperand(MCOperand::createImm(Val)); 1505 return; 1506 } 1507 1508 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1509 return; 1510 1511 case AMDGPU::OPERAND_REG_IMM_INT64: 1512 case AMDGPU::OPERAND_REG_IMM_FP64: 1513 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1514 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1515 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1516 Inst.addOperand(MCOperand::createImm(Val)); 1517 return; 1518 } 1519 1520 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1521 return; 1522 1523 case AMDGPU::OPERAND_REG_IMM_INT16: 1524 case AMDGPU::OPERAND_REG_IMM_FP16: 1525 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1526 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1527 if (isInt<16>(Val) && 1528 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1529 AsmParser->hasInv2PiInlineImm())) { 1530 Inst.addOperand(MCOperand::createImm(Val)); 1531 return; 1532 } 1533 1534 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1535 return; 1536 1537 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1538 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1539 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1540 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1541 AsmParser->hasInv2PiInlineImm())); 1542 1543 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1544 static_cast<uint32_t>(LiteralVal); 1545 Inst.addOperand(MCOperand::createImm(ImmVal)); 1546 return; 1547 } 1548 default: 1549 llvm_unreachable("invalid operand size"); 1550 } 1551 } 1552 1553 template <unsigned Bitwidth> 1554 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1555 APInt Literal(64, Imm.Val); 1556 1557 if (!Imm.IsFPImm) { 1558 // We got int literal token. 1559 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1560 return; 1561 } 1562 1563 bool Lost; 1564 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1565 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1566 APFloat::rmNearestTiesToEven, &Lost); 1567 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1568 } 1569 1570 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1571 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1572 } 1573 1574 //===----------------------------------------------------------------------===// 1575 // AsmParser 1576 //===----------------------------------------------------------------------===// 1577 1578 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1579 if (Is == IS_VGPR) { 1580 switch (RegWidth) { 1581 default: return -1; 1582 case 1: return AMDGPU::VGPR_32RegClassID; 1583 case 2: return AMDGPU::VReg_64RegClassID; 1584 case 3: return AMDGPU::VReg_96RegClassID; 1585 case 4: return AMDGPU::VReg_128RegClassID; 1586 case 8: return AMDGPU::VReg_256RegClassID; 1587 case 16: return AMDGPU::VReg_512RegClassID; 1588 } 1589 } else if (Is == IS_TTMP) { 1590 switch (RegWidth) { 1591 default: return -1; 1592 case 1: return AMDGPU::TTMP_32RegClassID; 1593 case 2: return AMDGPU::TTMP_64RegClassID; 1594 case 4: return AMDGPU::TTMP_128RegClassID; 1595 case 8: return AMDGPU::TTMP_256RegClassID; 1596 case 16: return AMDGPU::TTMP_512RegClassID; 1597 } 1598 } else if (Is == IS_SGPR) { 1599 switch (RegWidth) { 1600 default: return -1; 1601 case 1: return AMDGPU::SGPR_32RegClassID; 1602 case 2: return AMDGPU::SGPR_64RegClassID; 1603 case 4: return AMDGPU::SGPR_128RegClassID; 1604 case 8: return AMDGPU::SGPR_256RegClassID; 1605 case 16: return AMDGPU::SGPR_512RegClassID; 1606 } 1607 } 1608 return -1; 1609 } 1610 1611 static unsigned getSpecialRegForName(StringRef RegName) { 1612 return StringSwitch<unsigned>(RegName) 1613 .Case("exec", AMDGPU::EXEC) 1614 .Case("vcc", AMDGPU::VCC) 1615 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1616 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1617 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1618 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1619 .Case("m0", AMDGPU::M0) 1620 .Case("scc", AMDGPU::SCC) 1621 .Case("tba", AMDGPU::TBA) 1622 .Case("tma", AMDGPU::TMA) 1623 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1624 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1625 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1626 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1627 .Case("vcc_lo", AMDGPU::VCC_LO) 1628 .Case("vcc_hi", AMDGPU::VCC_HI) 1629 .Case("exec_lo", AMDGPU::EXEC_LO) 1630 .Case("exec_hi", AMDGPU::EXEC_HI) 1631 .Case("tma_lo", AMDGPU::TMA_LO) 1632 .Case("tma_hi", AMDGPU::TMA_HI) 1633 .Case("tba_lo", AMDGPU::TBA_LO) 1634 .Case("tba_hi", AMDGPU::TBA_HI) 1635 .Default(0); 1636 } 1637 1638 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1639 SMLoc &EndLoc) { 1640 auto R = parseRegister(); 1641 if (!R) return true; 1642 assert(R->isReg()); 1643 RegNo = R->getReg(); 1644 StartLoc = R->getStartLoc(); 1645 EndLoc = R->getEndLoc(); 1646 return false; 1647 } 1648 1649 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1650 RegisterKind RegKind, unsigned Reg1, 1651 unsigned RegNum) { 1652 switch (RegKind) { 1653 case IS_SPECIAL: 1654 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1655 Reg = AMDGPU::EXEC; 1656 RegWidth = 2; 1657 return true; 1658 } 1659 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1660 Reg = AMDGPU::FLAT_SCR; 1661 RegWidth = 2; 1662 return true; 1663 } 1664 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1665 Reg = AMDGPU::XNACK_MASK; 1666 RegWidth = 2; 1667 return true; 1668 } 1669 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1670 Reg = AMDGPU::VCC; 1671 RegWidth = 2; 1672 return true; 1673 } 1674 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1675 Reg = AMDGPU::TBA; 1676 RegWidth = 2; 1677 return true; 1678 } 1679 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1680 Reg = AMDGPU::TMA; 1681 RegWidth = 2; 1682 return true; 1683 } 1684 return false; 1685 case IS_VGPR: 1686 case IS_SGPR: 1687 case IS_TTMP: 1688 if (Reg1 != Reg + RegWidth) { 1689 return false; 1690 } 1691 RegWidth++; 1692 return true; 1693 default: 1694 llvm_unreachable("unexpected register kind"); 1695 } 1696 } 1697 1698 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1699 unsigned &RegNum, unsigned &RegWidth, 1700 unsigned *DwordRegIndex) { 1701 if (DwordRegIndex) { *DwordRegIndex = 0; } 1702 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1703 if (getLexer().is(AsmToken::Identifier)) { 1704 StringRef RegName = Parser.getTok().getString(); 1705 if ((Reg = getSpecialRegForName(RegName))) { 1706 Parser.Lex(); 1707 RegKind = IS_SPECIAL; 1708 } else { 1709 unsigned RegNumIndex = 0; 1710 if (RegName[0] == 'v') { 1711 RegNumIndex = 1; 1712 RegKind = IS_VGPR; 1713 } else if (RegName[0] == 's') { 1714 RegNumIndex = 1; 1715 RegKind = IS_SGPR; 1716 } else if (RegName.startswith("ttmp")) { 1717 RegNumIndex = strlen("ttmp"); 1718 RegKind = IS_TTMP; 1719 } else { 1720 return false; 1721 } 1722 if (RegName.size() > RegNumIndex) { 1723 // Single 32-bit register: vXX. 1724 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1725 return false; 1726 Parser.Lex(); 1727 RegWidth = 1; 1728 } else { 1729 // Range of registers: v[XX:YY]. ":YY" is optional. 1730 Parser.Lex(); 1731 int64_t RegLo, RegHi; 1732 if (getLexer().isNot(AsmToken::LBrac)) 1733 return false; 1734 Parser.Lex(); 1735 1736 if (getParser().parseAbsoluteExpression(RegLo)) 1737 return false; 1738 1739 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1740 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1741 return false; 1742 Parser.Lex(); 1743 1744 if (isRBrace) { 1745 RegHi = RegLo; 1746 } else { 1747 if (getParser().parseAbsoluteExpression(RegHi)) 1748 return false; 1749 1750 if (getLexer().isNot(AsmToken::RBrac)) 1751 return false; 1752 Parser.Lex(); 1753 } 1754 RegNum = (unsigned) RegLo; 1755 RegWidth = (RegHi - RegLo) + 1; 1756 } 1757 } 1758 } else if (getLexer().is(AsmToken::LBrac)) { 1759 // List of consecutive registers: [s0,s1,s2,s3] 1760 Parser.Lex(); 1761 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1762 return false; 1763 if (RegWidth != 1) 1764 return false; 1765 RegisterKind RegKind1; 1766 unsigned Reg1, RegNum1, RegWidth1; 1767 do { 1768 if (getLexer().is(AsmToken::Comma)) { 1769 Parser.Lex(); 1770 } else if (getLexer().is(AsmToken::RBrac)) { 1771 Parser.Lex(); 1772 break; 1773 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1774 if (RegWidth1 != 1) { 1775 return false; 1776 } 1777 if (RegKind1 != RegKind) { 1778 return false; 1779 } 1780 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1781 return false; 1782 } 1783 } else { 1784 return false; 1785 } 1786 } while (true); 1787 } else { 1788 return false; 1789 } 1790 switch (RegKind) { 1791 case IS_SPECIAL: 1792 RegNum = 0; 1793 RegWidth = 1; 1794 break; 1795 case IS_VGPR: 1796 case IS_SGPR: 1797 case IS_TTMP: 1798 { 1799 unsigned Size = 1; 1800 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1801 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1802 Size = std::min(RegWidth, 4u); 1803 } 1804 if (RegNum % Size != 0) 1805 return false; 1806 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1807 RegNum = RegNum / Size; 1808 int RCID = getRegClass(RegKind, RegWidth); 1809 if (RCID == -1) 1810 return false; 1811 const MCRegisterClass RC = TRI->getRegClass(RCID); 1812 if (RegNum >= RC.getNumRegs()) 1813 return false; 1814 Reg = RC.getRegister(RegNum); 1815 break; 1816 } 1817 1818 default: 1819 llvm_unreachable("unexpected register kind"); 1820 } 1821 1822 if (!subtargetHasRegister(*TRI, Reg)) 1823 return false; 1824 return true; 1825 } 1826 1827 Optional<StringRef> 1828 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1829 switch (RegKind) { 1830 case IS_VGPR: 1831 return StringRef(".amdgcn.next_free_vgpr"); 1832 case IS_SGPR: 1833 return StringRef(".amdgcn.next_free_sgpr"); 1834 default: 1835 return None; 1836 } 1837 } 1838 1839 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1840 auto SymbolName = getGprCountSymbolName(RegKind); 1841 assert(SymbolName && "initializing invalid register kind"); 1842 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1843 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1844 } 1845 1846 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1847 unsigned DwordRegIndex, 1848 unsigned RegWidth) { 1849 // Symbols are only defined for GCN targets 1850 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1851 return true; 1852 1853 auto SymbolName = getGprCountSymbolName(RegKind); 1854 if (!SymbolName) 1855 return true; 1856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1857 1858 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1859 int64_t OldCount; 1860 1861 if (!Sym->isVariable()) 1862 return !Error(getParser().getTok().getLoc(), 1863 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1864 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1865 return !Error( 1866 getParser().getTok().getLoc(), 1867 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1868 1869 if (OldCount <= NewMax) 1870 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1871 1872 return true; 1873 } 1874 1875 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1876 const auto &Tok = Parser.getTok(); 1877 SMLoc StartLoc = Tok.getLoc(); 1878 SMLoc EndLoc = Tok.getEndLoc(); 1879 RegisterKind RegKind; 1880 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1881 1882 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1883 return nullptr; 1884 } 1885 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1886 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1887 return nullptr; 1888 } else 1889 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1890 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1891 } 1892 1893 bool 1894 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1895 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1896 (getLexer().getKind() == AsmToken::Integer || 1897 getLexer().getKind() == AsmToken::Real)) { 1898 // This is a workaround for handling operands like these: 1899 // |1.0| 1900 // |-1| 1901 // This syntax is not compatible with syntax of standard 1902 // MC expressions (due to the trailing '|'). 1903 1904 SMLoc EndLoc; 1905 const MCExpr *Expr; 1906 1907 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1908 return true; 1909 } 1910 1911 return !Expr->evaluateAsAbsolute(Val); 1912 } 1913 1914 return getParser().parseAbsoluteExpression(Val); 1915 } 1916 1917 OperandMatchResultTy 1918 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1919 // TODO: add syntactic sugar for 1/(2*PI) 1920 bool Minus = false; 1921 if (getLexer().getKind() == AsmToken::Minus) { 1922 const AsmToken NextToken = getLexer().peekTok(); 1923 if (!NextToken.is(AsmToken::Integer) && 1924 !NextToken.is(AsmToken::Real)) { 1925 return MatchOperand_NoMatch; 1926 } 1927 Minus = true; 1928 Parser.Lex(); 1929 } 1930 1931 SMLoc S = Parser.getTok().getLoc(); 1932 switch(getLexer().getKind()) { 1933 case AsmToken::Integer: { 1934 int64_t IntVal; 1935 if (parseAbsoluteExpr(IntVal, AbsMod)) 1936 return MatchOperand_ParseFail; 1937 if (Minus) 1938 IntVal *= -1; 1939 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1940 return MatchOperand_Success; 1941 } 1942 case AsmToken::Real: { 1943 int64_t IntVal; 1944 if (parseAbsoluteExpr(IntVal, AbsMod)) 1945 return MatchOperand_ParseFail; 1946 1947 APFloat F(BitsToDouble(IntVal)); 1948 if (Minus) 1949 F.changeSign(); 1950 Operands.push_back( 1951 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1952 AMDGPUOperand::ImmTyNone, true)); 1953 return MatchOperand_Success; 1954 } 1955 default: 1956 return MatchOperand_NoMatch; 1957 } 1958 } 1959 1960 OperandMatchResultTy 1961 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1962 if (auto R = parseRegister()) { 1963 assert(R->isReg()); 1964 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1965 Operands.push_back(std::move(R)); 1966 return MatchOperand_Success; 1967 } 1968 return MatchOperand_NoMatch; 1969 } 1970 1971 OperandMatchResultTy 1972 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1973 auto res = parseImm(Operands, AbsMod); 1974 if (res != MatchOperand_NoMatch) { 1975 return res; 1976 } 1977 1978 return parseReg(Operands); 1979 } 1980 1981 OperandMatchResultTy 1982 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1983 bool AllowImm) { 1984 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1985 1986 if (getLexer().getKind()== AsmToken::Minus) { 1987 const AsmToken NextToken = getLexer().peekTok(); 1988 1989 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1990 if (NextToken.is(AsmToken::Minus)) { 1991 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1992 return MatchOperand_ParseFail; 1993 } 1994 1995 // '-' followed by an integer literal N should be interpreted as integer 1996 // negation rather than a floating-point NEG modifier applied to N. 1997 // Beside being contr-intuitive, such use of floating-point NEG modifier 1998 // results in different meaning of integer literals used with VOP1/2/C 1999 // and VOP3, for example: 2000 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2001 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2002 // Negative fp literals should be handled likewise for unifomtity 2003 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 2004 Parser.Lex(); 2005 Negate = true; 2006 } 2007 } 2008 2009 if (getLexer().getKind() == AsmToken::Identifier && 2010 Parser.getTok().getString() == "neg") { 2011 if (Negate) { 2012 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2013 return MatchOperand_ParseFail; 2014 } 2015 Parser.Lex(); 2016 Negate2 = true; 2017 if (getLexer().isNot(AsmToken::LParen)) { 2018 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2019 return MatchOperand_ParseFail; 2020 } 2021 Parser.Lex(); 2022 } 2023 2024 if (getLexer().getKind() == AsmToken::Identifier && 2025 Parser.getTok().getString() == "abs") { 2026 Parser.Lex(); 2027 Abs2 = true; 2028 if (getLexer().isNot(AsmToken::LParen)) { 2029 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2030 return MatchOperand_ParseFail; 2031 } 2032 Parser.Lex(); 2033 } 2034 2035 if (getLexer().getKind() == AsmToken::Pipe) { 2036 if (Abs2) { 2037 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2038 return MatchOperand_ParseFail; 2039 } 2040 Parser.Lex(); 2041 Abs = true; 2042 } 2043 2044 OperandMatchResultTy Res; 2045 if (AllowImm) { 2046 Res = parseRegOrImm(Operands, Abs); 2047 } else { 2048 Res = parseReg(Operands); 2049 } 2050 if (Res != MatchOperand_Success) { 2051 return Res; 2052 } 2053 2054 AMDGPUOperand::Modifiers Mods; 2055 if (Abs) { 2056 if (getLexer().getKind() != AsmToken::Pipe) { 2057 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2058 return MatchOperand_ParseFail; 2059 } 2060 Parser.Lex(); 2061 Mods.Abs = true; 2062 } 2063 if (Abs2) { 2064 if (getLexer().isNot(AsmToken::RParen)) { 2065 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2066 return MatchOperand_ParseFail; 2067 } 2068 Parser.Lex(); 2069 Mods.Abs = true; 2070 } 2071 2072 if (Negate) { 2073 Mods.Neg = true; 2074 } else if (Negate2) { 2075 if (getLexer().isNot(AsmToken::RParen)) { 2076 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2077 return MatchOperand_ParseFail; 2078 } 2079 Parser.Lex(); 2080 Mods.Neg = true; 2081 } 2082 2083 if (Mods.hasFPModifiers()) { 2084 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2085 Op.setModifiers(Mods); 2086 } 2087 return MatchOperand_Success; 2088 } 2089 2090 OperandMatchResultTy 2091 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2092 bool AllowImm) { 2093 bool Sext = false; 2094 2095 if (getLexer().getKind() == AsmToken::Identifier && 2096 Parser.getTok().getString() == "sext") { 2097 Parser.Lex(); 2098 Sext = true; 2099 if (getLexer().isNot(AsmToken::LParen)) { 2100 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2101 return MatchOperand_ParseFail; 2102 } 2103 Parser.Lex(); 2104 } 2105 2106 OperandMatchResultTy Res; 2107 if (AllowImm) { 2108 Res = parseRegOrImm(Operands); 2109 } else { 2110 Res = parseReg(Operands); 2111 } 2112 if (Res != MatchOperand_Success) { 2113 return Res; 2114 } 2115 2116 AMDGPUOperand::Modifiers Mods; 2117 if (Sext) { 2118 if (getLexer().isNot(AsmToken::RParen)) { 2119 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2120 return MatchOperand_ParseFail; 2121 } 2122 Parser.Lex(); 2123 Mods.Sext = true; 2124 } 2125 2126 if (Mods.hasIntModifiers()) { 2127 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2128 Op.setModifiers(Mods); 2129 } 2130 2131 return MatchOperand_Success; 2132 } 2133 2134 OperandMatchResultTy 2135 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2136 return parseRegOrImmWithFPInputMods(Operands, false); 2137 } 2138 2139 OperandMatchResultTy 2140 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2141 return parseRegOrImmWithIntInputMods(Operands, false); 2142 } 2143 2144 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2145 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2146 if (Reg) { 2147 Operands.push_back(std::move(Reg)); 2148 return MatchOperand_Success; 2149 } 2150 2151 const AsmToken &Tok = Parser.getTok(); 2152 if (Tok.getString() == "off") { 2153 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2154 AMDGPUOperand::ImmTyOff, false)); 2155 Parser.Lex(); 2156 return MatchOperand_Success; 2157 } 2158 2159 return MatchOperand_NoMatch; 2160 } 2161 2162 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2163 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2164 2165 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2166 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2167 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2168 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2169 return Match_InvalidOperand; 2170 2171 if ((TSFlags & SIInstrFlags::VOP3) && 2172 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2173 getForcedEncodingSize() != 64) 2174 return Match_PreferE32; 2175 2176 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2177 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2178 // v_mac_f32/16 allow only dst_sel == DWORD; 2179 auto OpNum = 2180 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2181 const auto &Op = Inst.getOperand(OpNum); 2182 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2183 return Match_InvalidOperand; 2184 } 2185 } 2186 2187 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2188 // FIXME: Produces error without correct column reported. 2189 auto OpNum = 2190 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2191 const auto &Op = Inst.getOperand(OpNum); 2192 if (Op.getImm() != 0) 2193 return Match_InvalidOperand; 2194 } 2195 2196 return Match_Success; 2197 } 2198 2199 // What asm variants we should check 2200 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2201 if (getForcedEncodingSize() == 32) { 2202 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2203 return makeArrayRef(Variants); 2204 } 2205 2206 if (isForcedVOP3()) { 2207 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2208 return makeArrayRef(Variants); 2209 } 2210 2211 if (isForcedSDWA()) { 2212 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2213 AMDGPUAsmVariants::SDWA9}; 2214 return makeArrayRef(Variants); 2215 } 2216 2217 if (isForcedDPP()) { 2218 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2219 return makeArrayRef(Variants); 2220 } 2221 2222 static const unsigned Variants[] = { 2223 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2224 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2225 }; 2226 2227 return makeArrayRef(Variants); 2228 } 2229 2230 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2231 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2232 const unsigned Num = Desc.getNumImplicitUses(); 2233 for (unsigned i = 0; i < Num; ++i) { 2234 unsigned Reg = Desc.ImplicitUses[i]; 2235 switch (Reg) { 2236 case AMDGPU::FLAT_SCR: 2237 case AMDGPU::VCC: 2238 case AMDGPU::M0: 2239 return Reg; 2240 default: 2241 break; 2242 } 2243 } 2244 return AMDGPU::NoRegister; 2245 } 2246 2247 // NB: This code is correct only when used to check constant 2248 // bus limitations because GFX7 support no f16 inline constants. 2249 // Note that there are no cases when a GFX7 opcode violates 2250 // constant bus limitations due to the use of an f16 constant. 2251 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2252 unsigned OpIdx) const { 2253 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2254 2255 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2256 return false; 2257 } 2258 2259 const MCOperand &MO = Inst.getOperand(OpIdx); 2260 2261 int64_t Val = MO.getImm(); 2262 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2263 2264 switch (OpSize) { // expected operand size 2265 case 8: 2266 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2267 case 4: 2268 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2269 case 2: { 2270 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2271 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2272 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2273 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2274 } else { 2275 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2276 } 2277 } 2278 default: 2279 llvm_unreachable("invalid operand size"); 2280 } 2281 } 2282 2283 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2284 const MCOperand &MO = Inst.getOperand(OpIdx); 2285 if (MO.isImm()) { 2286 return !isInlineConstant(Inst, OpIdx); 2287 } 2288 return !MO.isReg() || 2289 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2290 } 2291 2292 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2293 const unsigned Opcode = Inst.getOpcode(); 2294 const MCInstrDesc &Desc = MII.get(Opcode); 2295 unsigned ConstantBusUseCount = 0; 2296 2297 if (Desc.TSFlags & 2298 (SIInstrFlags::VOPC | 2299 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2300 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2301 SIInstrFlags::SDWA)) { 2302 // Check special imm operands (used by madmk, etc) 2303 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2304 ++ConstantBusUseCount; 2305 } 2306 2307 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2308 if (SGPRUsed != AMDGPU::NoRegister) { 2309 ++ConstantBusUseCount; 2310 } 2311 2312 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2313 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2314 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2315 2316 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2317 2318 for (int OpIdx : OpIndices) { 2319 if (OpIdx == -1) break; 2320 2321 const MCOperand &MO = Inst.getOperand(OpIdx); 2322 if (usesConstantBus(Inst, OpIdx)) { 2323 if (MO.isReg()) { 2324 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2325 // Pairs of registers with a partial intersections like these 2326 // s0, s[0:1] 2327 // flat_scratch_lo, flat_scratch 2328 // flat_scratch_lo, flat_scratch_hi 2329 // are theoretically valid but they are disabled anyway. 2330 // Note that this code mimics SIInstrInfo::verifyInstruction 2331 if (Reg != SGPRUsed) { 2332 ++ConstantBusUseCount; 2333 } 2334 SGPRUsed = Reg; 2335 } else { // Expression or a literal 2336 ++ConstantBusUseCount; 2337 } 2338 } 2339 } 2340 } 2341 2342 return ConstantBusUseCount <= 1; 2343 } 2344 2345 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2346 const unsigned Opcode = Inst.getOpcode(); 2347 const MCInstrDesc &Desc = MII.get(Opcode); 2348 2349 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2350 if (DstIdx == -1 || 2351 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2352 return true; 2353 } 2354 2355 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2356 2357 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2358 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2359 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2360 2361 assert(DstIdx != -1); 2362 const MCOperand &Dst = Inst.getOperand(DstIdx); 2363 assert(Dst.isReg()); 2364 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2365 2366 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2367 2368 for (int SrcIdx : SrcIndices) { 2369 if (SrcIdx == -1) break; 2370 const MCOperand &Src = Inst.getOperand(SrcIdx); 2371 if (Src.isReg()) { 2372 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2373 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2374 return false; 2375 } 2376 } 2377 } 2378 2379 return true; 2380 } 2381 2382 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2383 2384 const unsigned Opc = Inst.getOpcode(); 2385 const MCInstrDesc &Desc = MII.get(Opc); 2386 2387 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2388 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2389 assert(ClampIdx != -1); 2390 return Inst.getOperand(ClampIdx).getImm() == 0; 2391 } 2392 2393 return true; 2394 } 2395 2396 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2397 2398 const unsigned Opc = Inst.getOpcode(); 2399 const MCInstrDesc &Desc = MII.get(Opc); 2400 2401 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2402 return true; 2403 2404 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2405 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2406 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2407 2408 assert(VDataIdx != -1); 2409 assert(DMaskIdx != -1); 2410 assert(TFEIdx != -1); 2411 2412 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2413 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2414 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2415 if (DMask == 0) 2416 DMask = 1; 2417 2418 unsigned DataSize = 2419 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2420 if (hasPackedD16()) { 2421 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2422 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2423 DataSize = (DataSize + 1) / 2; 2424 } 2425 2426 return (VDataSize / 4) == DataSize + TFESize; 2427 } 2428 2429 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2430 2431 const unsigned Opc = Inst.getOpcode(); 2432 const MCInstrDesc &Desc = MII.get(Opc); 2433 2434 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2435 return true; 2436 if (!Desc.mayLoad() || !Desc.mayStore()) 2437 return true; // Not atomic 2438 2439 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2440 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2441 2442 // This is an incomplete check because image_atomic_cmpswap 2443 // may only use 0x3 and 0xf while other atomic operations 2444 // may use 0x1 and 0x3. However these limitations are 2445 // verified when we check that dmask matches dst size. 2446 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2447 } 2448 2449 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2450 2451 const unsigned Opc = Inst.getOpcode(); 2452 const MCInstrDesc &Desc = MII.get(Opc); 2453 2454 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2455 return true; 2456 2457 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2458 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2459 2460 // GATHER4 instructions use dmask in a different fashion compared to 2461 // other MIMG instructions. The only useful DMASK values are 2462 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2463 // (red,red,red,red) etc.) The ISA document doesn't mention 2464 // this. 2465 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2466 } 2467 2468 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2469 2470 const unsigned Opc = Inst.getOpcode(); 2471 const MCInstrDesc &Desc = MII.get(Opc); 2472 2473 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2474 return true; 2475 2476 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2477 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2478 if (isCI() || isSI()) 2479 return false; 2480 } 2481 2482 return true; 2483 } 2484 2485 static bool IsRevOpcode(const unsigned Opcode) 2486 { 2487 switch (Opcode) { 2488 case AMDGPU::V_SUBREV_F32_e32: 2489 case AMDGPU::V_SUBREV_F32_e64: 2490 case AMDGPU::V_SUBREV_F32_e32_si: 2491 case AMDGPU::V_SUBREV_F32_e32_vi: 2492 case AMDGPU::V_SUBREV_F32_e64_si: 2493 case AMDGPU::V_SUBREV_F32_e64_vi: 2494 case AMDGPU::V_SUBREV_I32_e32: 2495 case AMDGPU::V_SUBREV_I32_e64: 2496 case AMDGPU::V_SUBREV_I32_e32_si: 2497 case AMDGPU::V_SUBREV_I32_e64_si: 2498 case AMDGPU::V_SUBBREV_U32_e32: 2499 case AMDGPU::V_SUBBREV_U32_e64: 2500 case AMDGPU::V_SUBBREV_U32_e32_si: 2501 case AMDGPU::V_SUBBREV_U32_e32_vi: 2502 case AMDGPU::V_SUBBREV_U32_e64_si: 2503 case AMDGPU::V_SUBBREV_U32_e64_vi: 2504 case AMDGPU::V_SUBREV_U32_e32: 2505 case AMDGPU::V_SUBREV_U32_e64: 2506 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2507 case AMDGPU::V_SUBREV_U32_e32_vi: 2508 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2509 case AMDGPU::V_SUBREV_U32_e64_vi: 2510 case AMDGPU::V_SUBREV_F16_e32: 2511 case AMDGPU::V_SUBREV_F16_e64: 2512 case AMDGPU::V_SUBREV_F16_e32_vi: 2513 case AMDGPU::V_SUBREV_F16_e64_vi: 2514 case AMDGPU::V_SUBREV_U16_e32: 2515 case AMDGPU::V_SUBREV_U16_e64: 2516 case AMDGPU::V_SUBREV_U16_e32_vi: 2517 case AMDGPU::V_SUBREV_U16_e64_vi: 2518 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2519 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2520 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2521 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2522 case AMDGPU::V_LSHLREV_B32_e32_si: 2523 case AMDGPU::V_LSHLREV_B32_e64_si: 2524 case AMDGPU::V_LSHLREV_B16_e32_vi: 2525 case AMDGPU::V_LSHLREV_B16_e64_vi: 2526 case AMDGPU::V_LSHLREV_B32_e32_vi: 2527 case AMDGPU::V_LSHLREV_B32_e64_vi: 2528 case AMDGPU::V_LSHLREV_B64_vi: 2529 case AMDGPU::V_LSHRREV_B32_e32_si: 2530 case AMDGPU::V_LSHRREV_B32_e64_si: 2531 case AMDGPU::V_LSHRREV_B16_e32_vi: 2532 case AMDGPU::V_LSHRREV_B16_e64_vi: 2533 case AMDGPU::V_LSHRREV_B32_e32_vi: 2534 case AMDGPU::V_LSHRREV_B32_e64_vi: 2535 case AMDGPU::V_LSHRREV_B64_vi: 2536 case AMDGPU::V_ASHRREV_I32_e64_si: 2537 case AMDGPU::V_ASHRREV_I32_e32_si: 2538 case AMDGPU::V_ASHRREV_I16_e32_vi: 2539 case AMDGPU::V_ASHRREV_I16_e64_vi: 2540 case AMDGPU::V_ASHRREV_I32_e32_vi: 2541 case AMDGPU::V_ASHRREV_I32_e64_vi: 2542 case AMDGPU::V_ASHRREV_I64_vi: 2543 case AMDGPU::V_PK_LSHLREV_B16_vi: 2544 case AMDGPU::V_PK_LSHRREV_B16_vi: 2545 case AMDGPU::V_PK_ASHRREV_I16_vi: 2546 return true; 2547 default: 2548 return false; 2549 } 2550 } 2551 2552 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2553 2554 using namespace SIInstrFlags; 2555 const unsigned Opcode = Inst.getOpcode(); 2556 const MCInstrDesc &Desc = MII.get(Opcode); 2557 2558 // lds_direct register is defined so that it can be used 2559 // with 9-bit operands only. Ignore encodings which do not accept these. 2560 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2561 return true; 2562 2563 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2564 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2565 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2566 2567 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2568 2569 // lds_direct cannot be specified as either src1 or src2. 2570 for (int SrcIdx : SrcIndices) { 2571 if (SrcIdx == -1) break; 2572 const MCOperand &Src = Inst.getOperand(SrcIdx); 2573 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2574 return false; 2575 } 2576 } 2577 2578 if (Src0Idx == -1) 2579 return true; 2580 2581 const MCOperand &Src = Inst.getOperand(Src0Idx); 2582 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2583 return true; 2584 2585 // lds_direct is specified as src0. Check additional limitations. 2586 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2587 } 2588 2589 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2590 unsigned Opcode = Inst.getOpcode(); 2591 const MCInstrDesc &Desc = MII.get(Opcode); 2592 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2593 return true; 2594 2595 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2596 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2597 2598 const int OpIndices[] = { Src0Idx, Src1Idx }; 2599 2600 unsigned NumLiterals = 0; 2601 uint32_t LiteralValue; 2602 2603 for (int OpIdx : OpIndices) { 2604 if (OpIdx == -1) break; 2605 2606 const MCOperand &MO = Inst.getOperand(OpIdx); 2607 if (MO.isImm() && 2608 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2609 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2610 !isInlineConstant(Inst, OpIdx)) { 2611 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2612 if (NumLiterals == 0 || LiteralValue != Value) { 2613 LiteralValue = Value; 2614 ++NumLiterals; 2615 } 2616 } 2617 } 2618 2619 return NumLiterals <= 1; 2620 } 2621 2622 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2623 const SMLoc &IDLoc) { 2624 if (!validateLdsDirect(Inst)) { 2625 Error(IDLoc, 2626 "invalid use of lds_direct"); 2627 return false; 2628 } 2629 if (!validateSOPLiteral(Inst)) { 2630 Error(IDLoc, 2631 "only one literal operand is allowed"); 2632 return false; 2633 } 2634 if (!validateConstantBusLimitations(Inst)) { 2635 Error(IDLoc, 2636 "invalid operand (violates constant bus restrictions)"); 2637 return false; 2638 } 2639 if (!validateEarlyClobberLimitations(Inst)) { 2640 Error(IDLoc, 2641 "destination must be different than all sources"); 2642 return false; 2643 } 2644 if (!validateIntClampSupported(Inst)) { 2645 Error(IDLoc, 2646 "integer clamping is not supported on this GPU"); 2647 return false; 2648 } 2649 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2650 if (!validateMIMGD16(Inst)) { 2651 Error(IDLoc, 2652 "d16 modifier is not supported on this GPU"); 2653 return false; 2654 } 2655 if (!validateMIMGDataSize(Inst)) { 2656 Error(IDLoc, 2657 "image data size does not match dmask and tfe"); 2658 return false; 2659 } 2660 if (!validateMIMGAtomicDMask(Inst)) { 2661 Error(IDLoc, 2662 "invalid atomic image dmask"); 2663 return false; 2664 } 2665 if (!validateMIMGGatherDMask(Inst)) { 2666 Error(IDLoc, 2667 "invalid image_gather dmask: only one bit must be set"); 2668 return false; 2669 } 2670 2671 return true; 2672 } 2673 2674 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2675 unsigned VariantID = 0); 2676 2677 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2678 OperandVector &Operands, 2679 MCStreamer &Out, 2680 uint64_t &ErrorInfo, 2681 bool MatchingInlineAsm) { 2682 MCInst Inst; 2683 unsigned Result = Match_Success; 2684 for (auto Variant : getMatchedVariants()) { 2685 uint64_t EI; 2686 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2687 Variant); 2688 // We order match statuses from least to most specific. We use most specific 2689 // status as resulting 2690 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2691 if ((R == Match_Success) || 2692 (R == Match_PreferE32) || 2693 (R == Match_MissingFeature && Result != Match_PreferE32) || 2694 (R == Match_InvalidOperand && Result != Match_MissingFeature 2695 && Result != Match_PreferE32) || 2696 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2697 && Result != Match_MissingFeature 2698 && Result != Match_PreferE32)) { 2699 Result = R; 2700 ErrorInfo = EI; 2701 } 2702 if (R == Match_Success) 2703 break; 2704 } 2705 2706 switch (Result) { 2707 default: break; 2708 case Match_Success: 2709 if (!validateInstruction(Inst, IDLoc)) { 2710 return true; 2711 } 2712 Inst.setLoc(IDLoc); 2713 Out.EmitInstruction(Inst, getSTI()); 2714 return false; 2715 2716 case Match_MissingFeature: 2717 return Error(IDLoc, "instruction not supported on this GPU"); 2718 2719 case Match_MnemonicFail: { 2720 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2721 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2722 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2723 return Error(IDLoc, "invalid instruction" + Suggestion, 2724 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2725 } 2726 2727 case Match_InvalidOperand: { 2728 SMLoc ErrorLoc = IDLoc; 2729 if (ErrorInfo != ~0ULL) { 2730 if (ErrorInfo >= Operands.size()) { 2731 return Error(IDLoc, "too few operands for instruction"); 2732 } 2733 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2734 if (ErrorLoc == SMLoc()) 2735 ErrorLoc = IDLoc; 2736 } 2737 return Error(ErrorLoc, "invalid operand for instruction"); 2738 } 2739 2740 case Match_PreferE32: 2741 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2742 "should be encoded as e32"); 2743 } 2744 llvm_unreachable("Implement any new match types added!"); 2745 } 2746 2747 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2748 int64_t Tmp = -1; 2749 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2750 return true; 2751 } 2752 if (getParser().parseAbsoluteExpression(Tmp)) { 2753 return true; 2754 } 2755 Ret = static_cast<uint32_t>(Tmp); 2756 return false; 2757 } 2758 2759 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2760 uint32_t &Minor) { 2761 if (ParseAsAbsoluteExpression(Major)) 2762 return TokError("invalid major version"); 2763 2764 if (getLexer().isNot(AsmToken::Comma)) 2765 return TokError("minor version number required, comma expected"); 2766 Lex(); 2767 2768 if (ParseAsAbsoluteExpression(Minor)) 2769 return TokError("invalid minor version"); 2770 2771 return false; 2772 } 2773 2774 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2775 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2776 return TokError("directive only supported for amdgcn architecture"); 2777 2778 std::string Target; 2779 2780 SMLoc TargetStart = getTok().getLoc(); 2781 if (getParser().parseEscapedString(Target)) 2782 return true; 2783 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2784 2785 std::string ExpectedTarget; 2786 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2787 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2788 2789 if (Target != ExpectedTargetOS.str()) 2790 return getParser().Error(TargetRange.Start, "target must match options", 2791 TargetRange); 2792 2793 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2794 return false; 2795 } 2796 2797 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2798 return getParser().Error(Range.Start, "value out of range", Range); 2799 } 2800 2801 bool AMDGPUAsmParser::calculateGPRBlocks( 2802 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2803 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2804 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2805 unsigned &SGPRBlocks) { 2806 // TODO(scott.linder): These calculations are duplicated from 2807 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2808 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2809 2810 unsigned NumVGPRs = NextFreeVGPR; 2811 unsigned NumSGPRs = NextFreeSGPR; 2812 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2813 2814 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2815 NumSGPRs > MaxAddressableNumSGPRs) 2816 return OutOfRangeError(SGPRRange); 2817 2818 NumSGPRs += 2819 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2820 2821 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2822 NumSGPRs > MaxAddressableNumSGPRs) 2823 return OutOfRangeError(SGPRRange); 2824 2825 if (Features.test(FeatureSGPRInitBug)) 2826 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2827 2828 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2829 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2830 2831 return false; 2832 } 2833 2834 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2835 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2836 return TokError("directive only supported for amdgcn architecture"); 2837 2838 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2839 return TokError("directive only supported for amdhsa OS"); 2840 2841 StringRef KernelName; 2842 if (getParser().parseIdentifier(KernelName)) 2843 return true; 2844 2845 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2846 2847 StringSet<> Seen; 2848 2849 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2850 2851 SMRange VGPRRange; 2852 uint64_t NextFreeVGPR = 0; 2853 SMRange SGPRRange; 2854 uint64_t NextFreeSGPR = 0; 2855 unsigned UserSGPRCount = 0; 2856 bool ReserveVCC = true; 2857 bool ReserveFlatScr = true; 2858 bool ReserveXNACK = hasXNACK(); 2859 2860 while (true) { 2861 while (getLexer().is(AsmToken::EndOfStatement)) 2862 Lex(); 2863 2864 if (getLexer().isNot(AsmToken::Identifier)) 2865 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2866 2867 StringRef ID = getTok().getIdentifier(); 2868 SMRange IDRange = getTok().getLocRange(); 2869 Lex(); 2870 2871 if (ID == ".end_amdhsa_kernel") 2872 break; 2873 2874 if (Seen.find(ID) != Seen.end()) 2875 return TokError(".amdhsa_ directives cannot be repeated"); 2876 Seen.insert(ID); 2877 2878 SMLoc ValStart = getTok().getLoc(); 2879 int64_t IVal; 2880 if (getParser().parseAbsoluteExpression(IVal)) 2881 return true; 2882 SMLoc ValEnd = getTok().getLoc(); 2883 SMRange ValRange = SMRange(ValStart, ValEnd); 2884 2885 if (IVal < 0) 2886 return OutOfRangeError(ValRange); 2887 2888 uint64_t Val = IVal; 2889 2890 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2891 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2892 return OutOfRangeError(RANGE); \ 2893 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2894 2895 if (ID == ".amdhsa_group_segment_fixed_size") { 2896 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2897 return OutOfRangeError(ValRange); 2898 KD.group_segment_fixed_size = Val; 2899 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2900 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2901 return OutOfRangeError(ValRange); 2902 KD.private_segment_fixed_size = Val; 2903 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2904 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2905 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2906 Val, ValRange); 2907 UserSGPRCount++; 2908 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2909 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2910 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2911 ValRange); 2912 UserSGPRCount++; 2913 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2914 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2915 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2916 ValRange); 2917 UserSGPRCount++; 2918 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2919 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2920 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2921 Val, ValRange); 2922 UserSGPRCount++; 2923 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2924 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2925 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2926 ValRange); 2927 UserSGPRCount++; 2928 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2929 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2930 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2931 ValRange); 2932 UserSGPRCount++; 2933 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2934 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2935 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2936 Val, ValRange); 2937 UserSGPRCount++; 2938 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2939 PARSE_BITS_ENTRY( 2940 KD.compute_pgm_rsrc2, 2941 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2942 ValRange); 2943 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2944 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2945 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2946 ValRange); 2947 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2949 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2950 ValRange); 2951 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2952 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2953 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2954 ValRange); 2955 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2957 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2958 ValRange); 2959 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2960 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2961 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2962 ValRange); 2963 } else if (ID == ".amdhsa_next_free_vgpr") { 2964 VGPRRange = ValRange; 2965 NextFreeVGPR = Val; 2966 } else if (ID == ".amdhsa_next_free_sgpr") { 2967 SGPRRange = ValRange; 2968 NextFreeSGPR = Val; 2969 } else if (ID == ".amdhsa_reserve_vcc") { 2970 if (!isUInt<1>(Val)) 2971 return OutOfRangeError(ValRange); 2972 ReserveVCC = Val; 2973 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2974 if (IVersion.Major < 7) 2975 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2976 IDRange); 2977 if (!isUInt<1>(Val)) 2978 return OutOfRangeError(ValRange); 2979 ReserveFlatScr = Val; 2980 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2981 if (IVersion.Major < 8) 2982 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2983 IDRange); 2984 if (!isUInt<1>(Val)) 2985 return OutOfRangeError(ValRange); 2986 ReserveXNACK = Val; 2987 } else if (ID == ".amdhsa_float_round_mode_32") { 2988 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2989 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2990 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2991 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2992 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2993 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2994 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2995 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2996 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2997 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2998 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2999 ValRange); 3000 } else if (ID == ".amdhsa_dx10_clamp") { 3001 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3002 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3003 } else if (ID == ".amdhsa_ieee_mode") { 3004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3005 Val, ValRange); 3006 } else if (ID == ".amdhsa_fp16_overflow") { 3007 if (IVersion.Major < 9) 3008 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3009 IDRange); 3010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3011 ValRange); 3012 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3013 PARSE_BITS_ENTRY( 3014 KD.compute_pgm_rsrc2, 3015 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3016 ValRange); 3017 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3019 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3020 Val, ValRange); 3021 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3022 PARSE_BITS_ENTRY( 3023 KD.compute_pgm_rsrc2, 3024 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3025 ValRange); 3026 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3028 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3029 Val, ValRange); 3030 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3031 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3032 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3033 Val, ValRange); 3034 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3035 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3036 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3037 Val, ValRange); 3038 } else if (ID == ".amdhsa_exception_int_div_zero") { 3039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3040 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3041 Val, ValRange); 3042 } else { 3043 return getParser().Error(IDRange.Start, 3044 "unknown .amdhsa_kernel directive", IDRange); 3045 } 3046 3047 #undef PARSE_BITS_ENTRY 3048 } 3049 3050 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3051 return TokError(".amdhsa_next_free_vgpr directive is required"); 3052 3053 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3054 return TokError(".amdhsa_next_free_sgpr directive is required"); 3055 3056 unsigned VGPRBlocks; 3057 unsigned SGPRBlocks; 3058 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3059 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3060 SGPRRange, VGPRBlocks, SGPRBlocks)) 3061 return true; 3062 3063 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3064 VGPRBlocks)) 3065 return OutOfRangeError(VGPRRange); 3066 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3067 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3068 3069 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3070 SGPRBlocks)) 3071 return OutOfRangeError(SGPRRange); 3072 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3073 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3074 SGPRBlocks); 3075 3076 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3077 return TokError("too many user SGPRs enabled"); 3078 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3079 UserSGPRCount); 3080 3081 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3082 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3083 ReserveFlatScr, ReserveXNACK); 3084 return false; 3085 } 3086 3087 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3088 uint32_t Major; 3089 uint32_t Minor; 3090 3091 if (ParseDirectiveMajorMinor(Major, Minor)) 3092 return true; 3093 3094 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3095 return false; 3096 } 3097 3098 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3099 uint32_t Major; 3100 uint32_t Minor; 3101 uint32_t Stepping; 3102 StringRef VendorName; 3103 StringRef ArchName; 3104 3105 // If this directive has no arguments, then use the ISA version for the 3106 // targeted GPU. 3107 if (getLexer().is(AsmToken::EndOfStatement)) { 3108 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3109 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3110 ISA.Stepping, 3111 "AMD", "AMDGPU"); 3112 return false; 3113 } 3114 3115 if (ParseDirectiveMajorMinor(Major, Minor)) 3116 return true; 3117 3118 if (getLexer().isNot(AsmToken::Comma)) 3119 return TokError("stepping version number required, comma expected"); 3120 Lex(); 3121 3122 if (ParseAsAbsoluteExpression(Stepping)) 3123 return TokError("invalid stepping version"); 3124 3125 if (getLexer().isNot(AsmToken::Comma)) 3126 return TokError("vendor name required, comma expected"); 3127 Lex(); 3128 3129 if (getLexer().isNot(AsmToken::String)) 3130 return TokError("invalid vendor name"); 3131 3132 VendorName = getLexer().getTok().getStringContents(); 3133 Lex(); 3134 3135 if (getLexer().isNot(AsmToken::Comma)) 3136 return TokError("arch name required, comma expected"); 3137 Lex(); 3138 3139 if (getLexer().isNot(AsmToken::String)) 3140 return TokError("invalid arch name"); 3141 3142 ArchName = getLexer().getTok().getStringContents(); 3143 Lex(); 3144 3145 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3146 VendorName, ArchName); 3147 return false; 3148 } 3149 3150 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3151 amd_kernel_code_t &Header) { 3152 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3153 // assembly for backwards compatibility. 3154 if (ID == "max_scratch_backing_memory_byte_size") { 3155 Parser.eatToEndOfStatement(); 3156 return false; 3157 } 3158 3159 SmallString<40> ErrStr; 3160 raw_svector_ostream Err(ErrStr); 3161 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3162 return TokError(Err.str()); 3163 } 3164 Lex(); 3165 return false; 3166 } 3167 3168 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3169 amd_kernel_code_t Header; 3170 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3171 3172 while (true) { 3173 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3174 // will set the current token to EndOfStatement. 3175 while(getLexer().is(AsmToken::EndOfStatement)) 3176 Lex(); 3177 3178 if (getLexer().isNot(AsmToken::Identifier)) 3179 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3180 3181 StringRef ID = getLexer().getTok().getIdentifier(); 3182 Lex(); 3183 3184 if (ID == ".end_amd_kernel_code_t") 3185 break; 3186 3187 if (ParseAMDKernelCodeTValue(ID, Header)) 3188 return true; 3189 } 3190 3191 getTargetStreamer().EmitAMDKernelCodeT(Header); 3192 3193 return false; 3194 } 3195 3196 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3197 if (getLexer().isNot(AsmToken::Identifier)) 3198 return TokError("expected symbol name"); 3199 3200 StringRef KernelName = Parser.getTok().getString(); 3201 3202 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3203 ELF::STT_AMDGPU_HSA_KERNEL); 3204 Lex(); 3205 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3206 KernelScope.initialize(getContext()); 3207 return false; 3208 } 3209 3210 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3211 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3212 return Error(getParser().getTok().getLoc(), 3213 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3214 "architectures"); 3215 } 3216 3217 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3218 3219 std::string ISAVersionStringFromSTI; 3220 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3221 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3222 3223 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3224 return Error(getParser().getTok().getLoc(), 3225 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3226 "arguments specified through the command line"); 3227 } 3228 3229 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3230 Lex(); 3231 3232 return false; 3233 } 3234 3235 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3236 const char *AssemblerDirectiveBegin; 3237 const char *AssemblerDirectiveEnd; 3238 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3239 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3240 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3241 HSAMD::V3::AssemblerDirectiveEnd) 3242 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3243 HSAMD::AssemblerDirectiveEnd); 3244 3245 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3246 return Error(getParser().getTok().getLoc(), 3247 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3248 "not available on non-amdhsa OSes")).str()); 3249 } 3250 3251 std::string HSAMetadataString; 3252 raw_string_ostream YamlStream(HSAMetadataString); 3253 3254 getLexer().setSkipSpace(false); 3255 3256 bool FoundEnd = false; 3257 while (!getLexer().is(AsmToken::Eof)) { 3258 while (getLexer().is(AsmToken::Space)) { 3259 YamlStream << getLexer().getTok().getString(); 3260 Lex(); 3261 } 3262 3263 if (getLexer().is(AsmToken::Identifier)) { 3264 StringRef ID = getLexer().getTok().getIdentifier(); 3265 if (ID == AssemblerDirectiveEnd) { 3266 Lex(); 3267 FoundEnd = true; 3268 break; 3269 } 3270 } 3271 3272 YamlStream << Parser.parseStringToEndOfStatement() 3273 << getContext().getAsmInfo()->getSeparatorString(); 3274 3275 Parser.eatToEndOfStatement(); 3276 } 3277 3278 getLexer().setSkipSpace(true); 3279 3280 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3281 return TokError(Twine("expected directive ") + 3282 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3283 } 3284 3285 YamlStream.flush(); 3286 3287 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3288 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3289 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3290 } else { 3291 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3292 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3293 } 3294 3295 return false; 3296 } 3297 3298 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3299 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3300 return Error(getParser().getTok().getLoc(), 3301 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3302 "not available on non-amdpal OSes")).str()); 3303 } 3304 3305 PALMD::Metadata PALMetadata; 3306 for (;;) { 3307 uint32_t Value; 3308 if (ParseAsAbsoluteExpression(Value)) { 3309 return TokError(Twine("invalid value in ") + 3310 Twine(PALMD::AssemblerDirective)); 3311 } 3312 PALMetadata.push_back(Value); 3313 if (getLexer().isNot(AsmToken::Comma)) 3314 break; 3315 Lex(); 3316 } 3317 getTargetStreamer().EmitPALMetadata(PALMetadata); 3318 return false; 3319 } 3320 3321 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3322 StringRef IDVal = DirectiveID.getString(); 3323 3324 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3325 if (IDVal == ".amdgcn_target") 3326 return ParseDirectiveAMDGCNTarget(); 3327 3328 if (IDVal == ".amdhsa_kernel") 3329 return ParseDirectiveAMDHSAKernel(); 3330 3331 // TODO: Restructure/combine with PAL metadata directive. 3332 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3333 return ParseDirectiveHSAMetadata(); 3334 } else { 3335 if (IDVal == ".hsa_code_object_version") 3336 return ParseDirectiveHSACodeObjectVersion(); 3337 3338 if (IDVal == ".hsa_code_object_isa") 3339 return ParseDirectiveHSACodeObjectISA(); 3340 3341 if (IDVal == ".amd_kernel_code_t") 3342 return ParseDirectiveAMDKernelCodeT(); 3343 3344 if (IDVal == ".amdgpu_hsa_kernel") 3345 return ParseDirectiveAMDGPUHsaKernel(); 3346 3347 if (IDVal == ".amd_amdgpu_isa") 3348 return ParseDirectiveISAVersion(); 3349 3350 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3351 return ParseDirectiveHSAMetadata(); 3352 } 3353 3354 if (IDVal == PALMD::AssemblerDirective) 3355 return ParseDirectivePALMetadata(); 3356 3357 return true; 3358 } 3359 3360 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3361 unsigned RegNo) const { 3362 3363 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3364 R.isValid(); ++R) { 3365 if (*R == RegNo) 3366 return isGFX9(); 3367 } 3368 3369 switch (RegNo) { 3370 case AMDGPU::TBA: 3371 case AMDGPU::TBA_LO: 3372 case AMDGPU::TBA_HI: 3373 case AMDGPU::TMA: 3374 case AMDGPU::TMA_LO: 3375 case AMDGPU::TMA_HI: 3376 return !isGFX9(); 3377 case AMDGPU::XNACK_MASK: 3378 case AMDGPU::XNACK_MASK_LO: 3379 case AMDGPU::XNACK_MASK_HI: 3380 return !isCI() && !isSI() && hasXNACK(); 3381 default: 3382 break; 3383 } 3384 3385 if (isCI()) 3386 return true; 3387 3388 if (isSI()) { 3389 // No flat_scr 3390 switch (RegNo) { 3391 case AMDGPU::FLAT_SCR: 3392 case AMDGPU::FLAT_SCR_LO: 3393 case AMDGPU::FLAT_SCR_HI: 3394 return false; 3395 default: 3396 return true; 3397 } 3398 } 3399 3400 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3401 // SI/CI have. 3402 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3403 R.isValid(); ++R) { 3404 if (*R == RegNo) 3405 return false; 3406 } 3407 3408 return true; 3409 } 3410 3411 OperandMatchResultTy 3412 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3413 // Try to parse with a custom parser 3414 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3415 3416 // If we successfully parsed the operand or if there as an error parsing, 3417 // we are done. 3418 // 3419 // If we are parsing after we reach EndOfStatement then this means we 3420 // are appending default values to the Operands list. This is only done 3421 // by custom parser, so we shouldn't continue on to the generic parsing. 3422 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3423 getLexer().is(AsmToken::EndOfStatement)) 3424 return ResTy; 3425 3426 ResTy = parseRegOrImm(Operands); 3427 3428 if (ResTy == MatchOperand_Success) 3429 return ResTy; 3430 3431 const auto &Tok = Parser.getTok(); 3432 SMLoc S = Tok.getLoc(); 3433 3434 const MCExpr *Expr = nullptr; 3435 if (!Parser.parseExpression(Expr)) { 3436 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3437 return MatchOperand_Success; 3438 } 3439 3440 // Possibly this is an instruction flag like 'gds'. 3441 if (Tok.getKind() == AsmToken::Identifier) { 3442 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3443 Parser.Lex(); 3444 return MatchOperand_Success; 3445 } 3446 3447 return MatchOperand_NoMatch; 3448 } 3449 3450 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3451 // Clear any forced encodings from the previous instruction. 3452 setForcedEncodingSize(0); 3453 setForcedDPP(false); 3454 setForcedSDWA(false); 3455 3456 if (Name.endswith("_e64")) { 3457 setForcedEncodingSize(64); 3458 return Name.substr(0, Name.size() - 4); 3459 } else if (Name.endswith("_e32")) { 3460 setForcedEncodingSize(32); 3461 return Name.substr(0, Name.size() - 4); 3462 } else if (Name.endswith("_dpp")) { 3463 setForcedDPP(true); 3464 return Name.substr(0, Name.size() - 4); 3465 } else if (Name.endswith("_sdwa")) { 3466 setForcedSDWA(true); 3467 return Name.substr(0, Name.size() - 5); 3468 } 3469 return Name; 3470 } 3471 3472 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3473 StringRef Name, 3474 SMLoc NameLoc, OperandVector &Operands) { 3475 // Add the instruction mnemonic 3476 Name = parseMnemonicSuffix(Name); 3477 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3478 3479 while (!getLexer().is(AsmToken::EndOfStatement)) { 3480 OperandMatchResultTy Res = parseOperand(Operands, Name); 3481 3482 // Eat the comma or space if there is one. 3483 if (getLexer().is(AsmToken::Comma)) 3484 Parser.Lex(); 3485 3486 switch (Res) { 3487 case MatchOperand_Success: break; 3488 case MatchOperand_ParseFail: 3489 Error(getLexer().getLoc(), "failed parsing operand."); 3490 while (!getLexer().is(AsmToken::EndOfStatement)) { 3491 Parser.Lex(); 3492 } 3493 return true; 3494 case MatchOperand_NoMatch: 3495 Error(getLexer().getLoc(), "not a valid operand."); 3496 while (!getLexer().is(AsmToken::EndOfStatement)) { 3497 Parser.Lex(); 3498 } 3499 return true; 3500 } 3501 } 3502 3503 return false; 3504 } 3505 3506 //===----------------------------------------------------------------------===// 3507 // Utility functions 3508 //===----------------------------------------------------------------------===// 3509 3510 OperandMatchResultTy 3511 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3512 switch(getLexer().getKind()) { 3513 default: return MatchOperand_NoMatch; 3514 case AsmToken::Identifier: { 3515 StringRef Name = Parser.getTok().getString(); 3516 if (!Name.equals(Prefix)) { 3517 return MatchOperand_NoMatch; 3518 } 3519 3520 Parser.Lex(); 3521 if (getLexer().isNot(AsmToken::Colon)) 3522 return MatchOperand_ParseFail; 3523 3524 Parser.Lex(); 3525 3526 bool IsMinus = false; 3527 if (getLexer().getKind() == AsmToken::Minus) { 3528 Parser.Lex(); 3529 IsMinus = true; 3530 } 3531 3532 if (getLexer().isNot(AsmToken::Integer)) 3533 return MatchOperand_ParseFail; 3534 3535 if (getParser().parseAbsoluteExpression(Int)) 3536 return MatchOperand_ParseFail; 3537 3538 if (IsMinus) 3539 Int = -Int; 3540 break; 3541 } 3542 } 3543 return MatchOperand_Success; 3544 } 3545 3546 OperandMatchResultTy 3547 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3548 AMDGPUOperand::ImmTy ImmTy, 3549 bool (*ConvertResult)(int64_t&)) { 3550 SMLoc S = Parser.getTok().getLoc(); 3551 int64_t Value = 0; 3552 3553 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3554 if (Res != MatchOperand_Success) 3555 return Res; 3556 3557 if (ConvertResult && !ConvertResult(Value)) { 3558 return MatchOperand_ParseFail; 3559 } 3560 3561 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3562 return MatchOperand_Success; 3563 } 3564 3565 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3566 const char *Prefix, 3567 OperandVector &Operands, 3568 AMDGPUOperand::ImmTy ImmTy, 3569 bool (*ConvertResult)(int64_t&)) { 3570 StringRef Name = Parser.getTok().getString(); 3571 if (!Name.equals(Prefix)) 3572 return MatchOperand_NoMatch; 3573 3574 Parser.Lex(); 3575 if (getLexer().isNot(AsmToken::Colon)) 3576 return MatchOperand_ParseFail; 3577 3578 Parser.Lex(); 3579 if (getLexer().isNot(AsmToken::LBrac)) 3580 return MatchOperand_ParseFail; 3581 Parser.Lex(); 3582 3583 unsigned Val = 0; 3584 SMLoc S = Parser.getTok().getLoc(); 3585 3586 // FIXME: How to verify the number of elements matches the number of src 3587 // operands? 3588 for (int I = 0; I < 4; ++I) { 3589 if (I != 0) { 3590 if (getLexer().is(AsmToken::RBrac)) 3591 break; 3592 3593 if (getLexer().isNot(AsmToken::Comma)) 3594 return MatchOperand_ParseFail; 3595 Parser.Lex(); 3596 } 3597 3598 if (getLexer().isNot(AsmToken::Integer)) 3599 return MatchOperand_ParseFail; 3600 3601 int64_t Op; 3602 if (getParser().parseAbsoluteExpression(Op)) 3603 return MatchOperand_ParseFail; 3604 3605 if (Op != 0 && Op != 1) 3606 return MatchOperand_ParseFail; 3607 Val |= (Op << I); 3608 } 3609 3610 Parser.Lex(); 3611 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3612 return MatchOperand_Success; 3613 } 3614 3615 OperandMatchResultTy 3616 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3617 AMDGPUOperand::ImmTy ImmTy) { 3618 int64_t Bit = 0; 3619 SMLoc S = Parser.getTok().getLoc(); 3620 3621 // We are at the end of the statement, and this is a default argument, so 3622 // use a default value. 3623 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3624 switch(getLexer().getKind()) { 3625 case AsmToken::Identifier: { 3626 StringRef Tok = Parser.getTok().getString(); 3627 if (Tok == Name) { 3628 if (Tok == "r128" && isGFX9()) 3629 Error(S, "r128 modifier is not supported on this GPU"); 3630 if (Tok == "a16" && !isGFX9()) 3631 Error(S, "a16 modifier is not supported on this GPU"); 3632 Bit = 1; 3633 Parser.Lex(); 3634 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3635 Bit = 0; 3636 Parser.Lex(); 3637 } else { 3638 return MatchOperand_NoMatch; 3639 } 3640 break; 3641 } 3642 default: 3643 return MatchOperand_NoMatch; 3644 } 3645 } 3646 3647 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3648 return MatchOperand_Success; 3649 } 3650 3651 static void addOptionalImmOperand( 3652 MCInst& Inst, const OperandVector& Operands, 3653 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3654 AMDGPUOperand::ImmTy ImmT, 3655 int64_t Default = 0) { 3656 auto i = OptionalIdx.find(ImmT); 3657 if (i != OptionalIdx.end()) { 3658 unsigned Idx = i->second; 3659 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3660 } else { 3661 Inst.addOperand(MCOperand::createImm(Default)); 3662 } 3663 } 3664 3665 OperandMatchResultTy 3666 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3667 if (getLexer().isNot(AsmToken::Identifier)) { 3668 return MatchOperand_NoMatch; 3669 } 3670 StringRef Tok = Parser.getTok().getString(); 3671 if (Tok != Prefix) { 3672 return MatchOperand_NoMatch; 3673 } 3674 3675 Parser.Lex(); 3676 if (getLexer().isNot(AsmToken::Colon)) { 3677 return MatchOperand_ParseFail; 3678 } 3679 3680 Parser.Lex(); 3681 if (getLexer().isNot(AsmToken::Identifier)) { 3682 return MatchOperand_ParseFail; 3683 } 3684 3685 Value = Parser.getTok().getString(); 3686 return MatchOperand_Success; 3687 } 3688 3689 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3690 // values to live in a joint format operand in the MCInst encoding. 3691 OperandMatchResultTy 3692 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3693 SMLoc S = Parser.getTok().getLoc(); 3694 int64_t Dfmt = 0, Nfmt = 0; 3695 // dfmt and nfmt can appear in either order, and each is optional. 3696 bool GotDfmt = false, GotNfmt = false; 3697 while (!GotDfmt || !GotNfmt) { 3698 if (!GotDfmt) { 3699 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3700 if (Res != MatchOperand_NoMatch) { 3701 if (Res != MatchOperand_Success) 3702 return Res; 3703 if (Dfmt >= 16) { 3704 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3705 return MatchOperand_ParseFail; 3706 } 3707 GotDfmt = true; 3708 Parser.Lex(); 3709 continue; 3710 } 3711 } 3712 if (!GotNfmt) { 3713 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3714 if (Res != MatchOperand_NoMatch) { 3715 if (Res != MatchOperand_Success) 3716 return Res; 3717 if (Nfmt >= 8) { 3718 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3719 return MatchOperand_ParseFail; 3720 } 3721 GotNfmt = true; 3722 Parser.Lex(); 3723 continue; 3724 } 3725 } 3726 break; 3727 } 3728 if (!GotDfmt && !GotNfmt) 3729 return MatchOperand_NoMatch; 3730 auto Format = Dfmt | Nfmt << 4; 3731 Operands.push_back( 3732 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3733 return MatchOperand_Success; 3734 } 3735 3736 //===----------------------------------------------------------------------===// 3737 // ds 3738 //===----------------------------------------------------------------------===// 3739 3740 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3741 const OperandVector &Operands) { 3742 OptionalImmIndexMap OptionalIdx; 3743 3744 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3745 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3746 3747 // Add the register arguments 3748 if (Op.isReg()) { 3749 Op.addRegOperands(Inst, 1); 3750 continue; 3751 } 3752 3753 // Handle optional arguments 3754 OptionalIdx[Op.getImmTy()] = i; 3755 } 3756 3757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3758 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3759 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3760 3761 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3762 } 3763 3764 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3765 bool IsGdsHardcoded) { 3766 OptionalImmIndexMap OptionalIdx; 3767 3768 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3769 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3770 3771 // Add the register arguments 3772 if (Op.isReg()) { 3773 Op.addRegOperands(Inst, 1); 3774 continue; 3775 } 3776 3777 if (Op.isToken() && Op.getToken() == "gds") { 3778 IsGdsHardcoded = true; 3779 continue; 3780 } 3781 3782 // Handle optional arguments 3783 OptionalIdx[Op.getImmTy()] = i; 3784 } 3785 3786 AMDGPUOperand::ImmTy OffsetType = 3787 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3788 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3789 AMDGPUOperand::ImmTyOffset; 3790 3791 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3792 3793 if (!IsGdsHardcoded) { 3794 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3795 } 3796 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3797 } 3798 3799 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3800 OptionalImmIndexMap OptionalIdx; 3801 3802 unsigned OperandIdx[4]; 3803 unsigned EnMask = 0; 3804 int SrcIdx = 0; 3805 3806 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3807 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3808 3809 // Add the register arguments 3810 if (Op.isReg()) { 3811 assert(SrcIdx < 4); 3812 OperandIdx[SrcIdx] = Inst.size(); 3813 Op.addRegOperands(Inst, 1); 3814 ++SrcIdx; 3815 continue; 3816 } 3817 3818 if (Op.isOff()) { 3819 assert(SrcIdx < 4); 3820 OperandIdx[SrcIdx] = Inst.size(); 3821 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3822 ++SrcIdx; 3823 continue; 3824 } 3825 3826 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3827 Op.addImmOperands(Inst, 1); 3828 continue; 3829 } 3830 3831 if (Op.isToken() && Op.getToken() == "done") 3832 continue; 3833 3834 // Handle optional arguments 3835 OptionalIdx[Op.getImmTy()] = i; 3836 } 3837 3838 assert(SrcIdx == 4); 3839 3840 bool Compr = false; 3841 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3842 Compr = true; 3843 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3844 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3845 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3846 } 3847 3848 for (auto i = 0; i < SrcIdx; ++i) { 3849 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3850 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3851 } 3852 } 3853 3854 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3855 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3856 3857 Inst.addOperand(MCOperand::createImm(EnMask)); 3858 } 3859 3860 //===----------------------------------------------------------------------===// 3861 // s_waitcnt 3862 //===----------------------------------------------------------------------===// 3863 3864 static bool 3865 encodeCnt( 3866 const AMDGPU::IsaVersion ISA, 3867 int64_t &IntVal, 3868 int64_t CntVal, 3869 bool Saturate, 3870 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3871 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3872 { 3873 bool Failed = false; 3874 3875 IntVal = encode(ISA, IntVal, CntVal); 3876 if (CntVal != decode(ISA, IntVal)) { 3877 if (Saturate) { 3878 IntVal = encode(ISA, IntVal, -1); 3879 } else { 3880 Failed = true; 3881 } 3882 } 3883 return Failed; 3884 } 3885 3886 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3887 StringRef CntName = Parser.getTok().getString(); 3888 int64_t CntVal; 3889 3890 Parser.Lex(); 3891 if (getLexer().isNot(AsmToken::LParen)) 3892 return true; 3893 3894 Parser.Lex(); 3895 if (getLexer().isNot(AsmToken::Integer)) 3896 return true; 3897 3898 SMLoc ValLoc = Parser.getTok().getLoc(); 3899 if (getParser().parseAbsoluteExpression(CntVal)) 3900 return true; 3901 3902 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3903 3904 bool Failed = true; 3905 bool Sat = CntName.endswith("_sat"); 3906 3907 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3908 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3909 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3910 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3911 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3912 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3913 } 3914 3915 if (Failed) { 3916 Error(ValLoc, "too large value for " + CntName); 3917 return true; 3918 } 3919 3920 if (getLexer().isNot(AsmToken::RParen)) { 3921 return true; 3922 } 3923 3924 Parser.Lex(); 3925 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3926 const AsmToken NextToken = getLexer().peekTok(); 3927 if (NextToken.is(AsmToken::Identifier)) { 3928 Parser.Lex(); 3929 } 3930 } 3931 3932 return false; 3933 } 3934 3935 OperandMatchResultTy 3936 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3937 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3938 int64_t Waitcnt = getWaitcntBitMask(ISA); 3939 SMLoc S = Parser.getTok().getLoc(); 3940 3941 switch(getLexer().getKind()) { 3942 default: return MatchOperand_ParseFail; 3943 case AsmToken::Integer: 3944 // The operand can be an integer value. 3945 if (getParser().parseAbsoluteExpression(Waitcnt)) 3946 return MatchOperand_ParseFail; 3947 break; 3948 3949 case AsmToken::Identifier: 3950 do { 3951 if (parseCnt(Waitcnt)) 3952 return MatchOperand_ParseFail; 3953 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3954 break; 3955 } 3956 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3957 return MatchOperand_Success; 3958 } 3959 3960 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3961 int64_t &Width) { 3962 using namespace llvm::AMDGPU::Hwreg; 3963 3964 if (Parser.getTok().getString() != "hwreg") 3965 return true; 3966 Parser.Lex(); 3967 3968 if (getLexer().isNot(AsmToken::LParen)) 3969 return true; 3970 Parser.Lex(); 3971 3972 if (getLexer().is(AsmToken::Identifier)) { 3973 HwReg.IsSymbolic = true; 3974 HwReg.Id = ID_UNKNOWN_; 3975 const StringRef tok = Parser.getTok().getString(); 3976 int Last = ID_SYMBOLIC_LAST_; 3977 if (isSI() || isCI() || isVI()) 3978 Last = ID_SYMBOLIC_FIRST_GFX9_; 3979 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3980 if (tok == IdSymbolic[i]) { 3981 HwReg.Id = i; 3982 break; 3983 } 3984 } 3985 Parser.Lex(); 3986 } else { 3987 HwReg.IsSymbolic = false; 3988 if (getLexer().isNot(AsmToken::Integer)) 3989 return true; 3990 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3991 return true; 3992 } 3993 3994 if (getLexer().is(AsmToken::RParen)) { 3995 Parser.Lex(); 3996 return false; 3997 } 3998 3999 // optional params 4000 if (getLexer().isNot(AsmToken::Comma)) 4001 return true; 4002 Parser.Lex(); 4003 4004 if (getLexer().isNot(AsmToken::Integer)) 4005 return true; 4006 if (getParser().parseAbsoluteExpression(Offset)) 4007 return true; 4008 4009 if (getLexer().isNot(AsmToken::Comma)) 4010 return true; 4011 Parser.Lex(); 4012 4013 if (getLexer().isNot(AsmToken::Integer)) 4014 return true; 4015 if (getParser().parseAbsoluteExpression(Width)) 4016 return true; 4017 4018 if (getLexer().isNot(AsmToken::RParen)) 4019 return true; 4020 Parser.Lex(); 4021 4022 return false; 4023 } 4024 4025 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4026 using namespace llvm::AMDGPU::Hwreg; 4027 4028 int64_t Imm16Val = 0; 4029 SMLoc S = Parser.getTok().getLoc(); 4030 4031 switch(getLexer().getKind()) { 4032 default: return MatchOperand_NoMatch; 4033 case AsmToken::Integer: 4034 // The operand can be an integer value. 4035 if (getParser().parseAbsoluteExpression(Imm16Val)) 4036 return MatchOperand_NoMatch; 4037 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4038 Error(S, "invalid immediate: only 16-bit values are legal"); 4039 // Do not return error code, but create an imm operand anyway and proceed 4040 // to the next operand, if any. That avoids unneccessary error messages. 4041 } 4042 break; 4043 4044 case AsmToken::Identifier: { 4045 OperandInfoTy HwReg(ID_UNKNOWN_); 4046 int64_t Offset = OFFSET_DEFAULT_; 4047 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4048 if (parseHwregConstruct(HwReg, Offset, Width)) 4049 return MatchOperand_ParseFail; 4050 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4051 if (HwReg.IsSymbolic) 4052 Error(S, "invalid symbolic name of hardware register"); 4053 else 4054 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4055 } 4056 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4057 Error(S, "invalid bit offset: only 5-bit values are legal"); 4058 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4059 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4060 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4061 } 4062 break; 4063 } 4064 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4065 return MatchOperand_Success; 4066 } 4067 4068 bool AMDGPUOperand::isSWaitCnt() const { 4069 return isImm(); 4070 } 4071 4072 bool AMDGPUOperand::isHwreg() const { 4073 return isImmTy(ImmTyHwreg); 4074 } 4075 4076 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4077 using namespace llvm::AMDGPU::SendMsg; 4078 4079 if (Parser.getTok().getString() != "sendmsg") 4080 return true; 4081 Parser.Lex(); 4082 4083 if (getLexer().isNot(AsmToken::LParen)) 4084 return true; 4085 Parser.Lex(); 4086 4087 if (getLexer().is(AsmToken::Identifier)) { 4088 Msg.IsSymbolic = true; 4089 Msg.Id = ID_UNKNOWN_; 4090 const std::string tok = Parser.getTok().getString(); 4091 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4092 switch(i) { 4093 default: continue; // Omit gaps. 4094 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4095 } 4096 if (tok == IdSymbolic[i]) { 4097 Msg.Id = i; 4098 break; 4099 } 4100 } 4101 Parser.Lex(); 4102 } else { 4103 Msg.IsSymbolic = false; 4104 if (getLexer().isNot(AsmToken::Integer)) 4105 return true; 4106 if (getParser().parseAbsoluteExpression(Msg.Id)) 4107 return true; 4108 if (getLexer().is(AsmToken::Integer)) 4109 if (getParser().parseAbsoluteExpression(Msg.Id)) 4110 Msg.Id = ID_UNKNOWN_; 4111 } 4112 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4113 return false; 4114 4115 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4116 if (getLexer().isNot(AsmToken::RParen)) 4117 return true; 4118 Parser.Lex(); 4119 return false; 4120 } 4121 4122 if (getLexer().isNot(AsmToken::Comma)) 4123 return true; 4124 Parser.Lex(); 4125 4126 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4127 Operation.Id = ID_UNKNOWN_; 4128 if (getLexer().is(AsmToken::Identifier)) { 4129 Operation.IsSymbolic = true; 4130 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4131 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4132 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4133 const StringRef Tok = Parser.getTok().getString(); 4134 for (int i = F; i < L; ++i) { 4135 if (Tok == S[i]) { 4136 Operation.Id = i; 4137 break; 4138 } 4139 } 4140 Parser.Lex(); 4141 } else { 4142 Operation.IsSymbolic = false; 4143 if (getLexer().isNot(AsmToken::Integer)) 4144 return true; 4145 if (getParser().parseAbsoluteExpression(Operation.Id)) 4146 return true; 4147 } 4148 4149 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4150 // Stream id is optional. 4151 if (getLexer().is(AsmToken::RParen)) { 4152 Parser.Lex(); 4153 return false; 4154 } 4155 4156 if (getLexer().isNot(AsmToken::Comma)) 4157 return true; 4158 Parser.Lex(); 4159 4160 if (getLexer().isNot(AsmToken::Integer)) 4161 return true; 4162 if (getParser().parseAbsoluteExpression(StreamId)) 4163 return true; 4164 } 4165 4166 if (getLexer().isNot(AsmToken::RParen)) 4167 return true; 4168 Parser.Lex(); 4169 return false; 4170 } 4171 4172 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4173 if (getLexer().getKind() != AsmToken::Identifier) 4174 return MatchOperand_NoMatch; 4175 4176 StringRef Str = Parser.getTok().getString(); 4177 int Slot = StringSwitch<int>(Str) 4178 .Case("p10", 0) 4179 .Case("p20", 1) 4180 .Case("p0", 2) 4181 .Default(-1); 4182 4183 SMLoc S = Parser.getTok().getLoc(); 4184 if (Slot == -1) 4185 return MatchOperand_ParseFail; 4186 4187 Parser.Lex(); 4188 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4189 AMDGPUOperand::ImmTyInterpSlot)); 4190 return MatchOperand_Success; 4191 } 4192 4193 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4194 if (getLexer().getKind() != AsmToken::Identifier) 4195 return MatchOperand_NoMatch; 4196 4197 StringRef Str = Parser.getTok().getString(); 4198 if (!Str.startswith("attr")) 4199 return MatchOperand_NoMatch; 4200 4201 StringRef Chan = Str.take_back(2); 4202 int AttrChan = StringSwitch<int>(Chan) 4203 .Case(".x", 0) 4204 .Case(".y", 1) 4205 .Case(".z", 2) 4206 .Case(".w", 3) 4207 .Default(-1); 4208 if (AttrChan == -1) 4209 return MatchOperand_ParseFail; 4210 4211 Str = Str.drop_back(2).drop_front(4); 4212 4213 uint8_t Attr; 4214 if (Str.getAsInteger(10, Attr)) 4215 return MatchOperand_ParseFail; 4216 4217 SMLoc S = Parser.getTok().getLoc(); 4218 Parser.Lex(); 4219 if (Attr > 63) { 4220 Error(S, "out of bounds attr"); 4221 return MatchOperand_Success; 4222 } 4223 4224 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4225 4226 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4227 AMDGPUOperand::ImmTyInterpAttr)); 4228 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4229 AMDGPUOperand::ImmTyAttrChan)); 4230 return MatchOperand_Success; 4231 } 4232 4233 void AMDGPUAsmParser::errorExpTgt() { 4234 Error(Parser.getTok().getLoc(), "invalid exp target"); 4235 } 4236 4237 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4238 uint8_t &Val) { 4239 if (Str == "null") { 4240 Val = 9; 4241 return MatchOperand_Success; 4242 } 4243 4244 if (Str.startswith("mrt")) { 4245 Str = Str.drop_front(3); 4246 if (Str == "z") { // == mrtz 4247 Val = 8; 4248 return MatchOperand_Success; 4249 } 4250 4251 if (Str.getAsInteger(10, Val)) 4252 return MatchOperand_ParseFail; 4253 4254 if (Val > 7) 4255 errorExpTgt(); 4256 4257 return MatchOperand_Success; 4258 } 4259 4260 if (Str.startswith("pos")) { 4261 Str = Str.drop_front(3); 4262 if (Str.getAsInteger(10, Val)) 4263 return MatchOperand_ParseFail; 4264 4265 if (Val > 3) 4266 errorExpTgt(); 4267 4268 Val += 12; 4269 return MatchOperand_Success; 4270 } 4271 4272 if (Str.startswith("param")) { 4273 Str = Str.drop_front(5); 4274 if (Str.getAsInteger(10, Val)) 4275 return MatchOperand_ParseFail; 4276 4277 if (Val >= 32) 4278 errorExpTgt(); 4279 4280 Val += 32; 4281 return MatchOperand_Success; 4282 } 4283 4284 if (Str.startswith("invalid_target_")) { 4285 Str = Str.drop_front(15); 4286 if (Str.getAsInteger(10, Val)) 4287 return MatchOperand_ParseFail; 4288 4289 errorExpTgt(); 4290 return MatchOperand_Success; 4291 } 4292 4293 return MatchOperand_NoMatch; 4294 } 4295 4296 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4297 uint8_t Val; 4298 StringRef Str = Parser.getTok().getString(); 4299 4300 auto Res = parseExpTgtImpl(Str, Val); 4301 if (Res != MatchOperand_Success) 4302 return Res; 4303 4304 SMLoc S = Parser.getTok().getLoc(); 4305 Parser.Lex(); 4306 4307 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4308 AMDGPUOperand::ImmTyExpTgt)); 4309 return MatchOperand_Success; 4310 } 4311 4312 OperandMatchResultTy 4313 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4314 using namespace llvm::AMDGPU::SendMsg; 4315 4316 int64_t Imm16Val = 0; 4317 SMLoc S = Parser.getTok().getLoc(); 4318 4319 switch(getLexer().getKind()) { 4320 default: 4321 return MatchOperand_NoMatch; 4322 case AsmToken::Integer: 4323 // The operand can be an integer value. 4324 if (getParser().parseAbsoluteExpression(Imm16Val)) 4325 return MatchOperand_NoMatch; 4326 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4327 Error(S, "invalid immediate: only 16-bit values are legal"); 4328 // Do not return error code, but create an imm operand anyway and proceed 4329 // to the next operand, if any. That avoids unneccessary error messages. 4330 } 4331 break; 4332 case AsmToken::Identifier: { 4333 OperandInfoTy Msg(ID_UNKNOWN_); 4334 OperandInfoTy Operation(OP_UNKNOWN_); 4335 int64_t StreamId = STREAM_ID_DEFAULT_; 4336 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4337 return MatchOperand_ParseFail; 4338 do { 4339 // Validate and encode message ID. 4340 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4341 || Msg.Id == ID_SYSMSG)) { 4342 if (Msg.IsSymbolic) 4343 Error(S, "invalid/unsupported symbolic name of message"); 4344 else 4345 Error(S, "invalid/unsupported code of message"); 4346 break; 4347 } 4348 Imm16Val = (Msg.Id << ID_SHIFT_); 4349 // Validate and encode operation ID. 4350 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4351 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4352 if (Operation.IsSymbolic) 4353 Error(S, "invalid symbolic name of GS_OP"); 4354 else 4355 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4356 break; 4357 } 4358 if (Operation.Id == OP_GS_NOP 4359 && Msg.Id != ID_GS_DONE) { 4360 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4361 break; 4362 } 4363 Imm16Val |= (Operation.Id << OP_SHIFT_); 4364 } 4365 if (Msg.Id == ID_SYSMSG) { 4366 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4367 if (Operation.IsSymbolic) 4368 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4369 else 4370 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4371 break; 4372 } 4373 Imm16Val |= (Operation.Id << OP_SHIFT_); 4374 } 4375 // Validate and encode stream ID. 4376 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4377 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4378 Error(S, "invalid stream id: only 2-bit values are legal"); 4379 break; 4380 } 4381 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4382 } 4383 } while (false); 4384 } 4385 break; 4386 } 4387 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4388 return MatchOperand_Success; 4389 } 4390 4391 bool AMDGPUOperand::isSendMsg() const { 4392 return isImmTy(ImmTySendMsg); 4393 } 4394 4395 //===----------------------------------------------------------------------===// 4396 // parser helpers 4397 //===----------------------------------------------------------------------===// 4398 4399 bool 4400 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4401 if (getLexer().getKind() == AsmToken::Identifier && 4402 Parser.getTok().getString() == Id) { 4403 Parser.Lex(); 4404 return true; 4405 } 4406 return false; 4407 } 4408 4409 bool 4410 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4411 if (getLexer().getKind() == Kind) { 4412 Parser.Lex(); 4413 return true; 4414 } 4415 return false; 4416 } 4417 4418 bool 4419 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4420 const StringRef ErrMsg) { 4421 if (!trySkipToken(Kind)) { 4422 Error(Parser.getTok().getLoc(), ErrMsg); 4423 return false; 4424 } 4425 return true; 4426 } 4427 4428 bool 4429 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4430 return !getParser().parseAbsoluteExpression(Imm); 4431 } 4432 4433 bool 4434 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4435 SMLoc S = Parser.getTok().getLoc(); 4436 if (getLexer().getKind() == AsmToken::String) { 4437 Val = Parser.getTok().getStringContents(); 4438 Parser.Lex(); 4439 return true; 4440 } else { 4441 Error(S, ErrMsg); 4442 return false; 4443 } 4444 } 4445 4446 //===----------------------------------------------------------------------===// 4447 // swizzle 4448 //===----------------------------------------------------------------------===// 4449 4450 LLVM_READNONE 4451 static unsigned 4452 encodeBitmaskPerm(const unsigned AndMask, 4453 const unsigned OrMask, 4454 const unsigned XorMask) { 4455 using namespace llvm::AMDGPU::Swizzle; 4456 4457 return BITMASK_PERM_ENC | 4458 (AndMask << BITMASK_AND_SHIFT) | 4459 (OrMask << BITMASK_OR_SHIFT) | 4460 (XorMask << BITMASK_XOR_SHIFT); 4461 } 4462 4463 bool 4464 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4465 const unsigned MinVal, 4466 const unsigned MaxVal, 4467 const StringRef ErrMsg) { 4468 for (unsigned i = 0; i < OpNum; ++i) { 4469 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4470 return false; 4471 } 4472 SMLoc ExprLoc = Parser.getTok().getLoc(); 4473 if (!parseExpr(Op[i])) { 4474 return false; 4475 } 4476 if (Op[i] < MinVal || Op[i] > MaxVal) { 4477 Error(ExprLoc, ErrMsg); 4478 return false; 4479 } 4480 } 4481 4482 return true; 4483 } 4484 4485 bool 4486 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4487 using namespace llvm::AMDGPU::Swizzle; 4488 4489 int64_t Lane[LANE_NUM]; 4490 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4491 "expected a 2-bit lane id")) { 4492 Imm = QUAD_PERM_ENC; 4493 for (auto i = 0; i < LANE_NUM; ++i) { 4494 Imm |= Lane[i] << (LANE_SHIFT * i); 4495 } 4496 return true; 4497 } 4498 return false; 4499 } 4500 4501 bool 4502 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4503 using namespace llvm::AMDGPU::Swizzle; 4504 4505 SMLoc S = Parser.getTok().getLoc(); 4506 int64_t GroupSize; 4507 int64_t LaneIdx; 4508 4509 if (!parseSwizzleOperands(1, &GroupSize, 4510 2, 32, 4511 "group size must be in the interval [2,32]")) { 4512 return false; 4513 } 4514 if (!isPowerOf2_64(GroupSize)) { 4515 Error(S, "group size must be a power of two"); 4516 return false; 4517 } 4518 if (parseSwizzleOperands(1, &LaneIdx, 4519 0, GroupSize - 1, 4520 "lane id must be in the interval [0,group size - 1]")) { 4521 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4522 return true; 4523 } 4524 return false; 4525 } 4526 4527 bool 4528 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4529 using namespace llvm::AMDGPU::Swizzle; 4530 4531 SMLoc S = Parser.getTok().getLoc(); 4532 int64_t GroupSize; 4533 4534 if (!parseSwizzleOperands(1, &GroupSize, 4535 2, 32, "group size must be in the interval [2,32]")) { 4536 return false; 4537 } 4538 if (!isPowerOf2_64(GroupSize)) { 4539 Error(S, "group size must be a power of two"); 4540 return false; 4541 } 4542 4543 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4544 return true; 4545 } 4546 4547 bool 4548 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4549 using namespace llvm::AMDGPU::Swizzle; 4550 4551 SMLoc S = Parser.getTok().getLoc(); 4552 int64_t GroupSize; 4553 4554 if (!parseSwizzleOperands(1, &GroupSize, 4555 1, 16, "group size must be in the interval [1,16]")) { 4556 return false; 4557 } 4558 if (!isPowerOf2_64(GroupSize)) { 4559 Error(S, "group size must be a power of two"); 4560 return false; 4561 } 4562 4563 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4564 return true; 4565 } 4566 4567 bool 4568 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4569 using namespace llvm::AMDGPU::Swizzle; 4570 4571 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4572 return false; 4573 } 4574 4575 StringRef Ctl; 4576 SMLoc StrLoc = Parser.getTok().getLoc(); 4577 if (!parseString(Ctl)) { 4578 return false; 4579 } 4580 if (Ctl.size() != BITMASK_WIDTH) { 4581 Error(StrLoc, "expected a 5-character mask"); 4582 return false; 4583 } 4584 4585 unsigned AndMask = 0; 4586 unsigned OrMask = 0; 4587 unsigned XorMask = 0; 4588 4589 for (size_t i = 0; i < Ctl.size(); ++i) { 4590 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4591 switch(Ctl[i]) { 4592 default: 4593 Error(StrLoc, "invalid mask"); 4594 return false; 4595 case '0': 4596 break; 4597 case '1': 4598 OrMask |= Mask; 4599 break; 4600 case 'p': 4601 AndMask |= Mask; 4602 break; 4603 case 'i': 4604 AndMask |= Mask; 4605 XorMask |= Mask; 4606 break; 4607 } 4608 } 4609 4610 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4611 return true; 4612 } 4613 4614 bool 4615 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4616 4617 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4618 4619 if (!parseExpr(Imm)) { 4620 return false; 4621 } 4622 if (!isUInt<16>(Imm)) { 4623 Error(OffsetLoc, "expected a 16-bit offset"); 4624 return false; 4625 } 4626 return true; 4627 } 4628 4629 bool 4630 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4631 using namespace llvm::AMDGPU::Swizzle; 4632 4633 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4634 4635 SMLoc ModeLoc = Parser.getTok().getLoc(); 4636 bool Ok = false; 4637 4638 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4639 Ok = parseSwizzleQuadPerm(Imm); 4640 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4641 Ok = parseSwizzleBitmaskPerm(Imm); 4642 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4643 Ok = parseSwizzleBroadcast(Imm); 4644 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4645 Ok = parseSwizzleSwap(Imm); 4646 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4647 Ok = parseSwizzleReverse(Imm); 4648 } else { 4649 Error(ModeLoc, "expected a swizzle mode"); 4650 } 4651 4652 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4653 } 4654 4655 return false; 4656 } 4657 4658 OperandMatchResultTy 4659 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4660 SMLoc S = Parser.getTok().getLoc(); 4661 int64_t Imm = 0; 4662 4663 if (trySkipId("offset")) { 4664 4665 bool Ok = false; 4666 if (skipToken(AsmToken::Colon, "expected a colon")) { 4667 if (trySkipId("swizzle")) { 4668 Ok = parseSwizzleMacro(Imm); 4669 } else { 4670 Ok = parseSwizzleOffset(Imm); 4671 } 4672 } 4673 4674 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4675 4676 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4677 } else { 4678 // Swizzle "offset" operand is optional. 4679 // If it is omitted, try parsing other optional operands. 4680 return parseOptionalOpr(Operands); 4681 } 4682 } 4683 4684 bool 4685 AMDGPUOperand::isSwizzle() const { 4686 return isImmTy(ImmTySwizzle); 4687 } 4688 4689 //===----------------------------------------------------------------------===// 4690 // VGPR Index Mode 4691 //===----------------------------------------------------------------------===// 4692 4693 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4694 4695 using namespace llvm::AMDGPU::VGPRIndexMode; 4696 4697 if (trySkipToken(AsmToken::RParen)) { 4698 return OFF; 4699 } 4700 4701 int64_t Imm = 0; 4702 4703 while (true) { 4704 unsigned Mode = 0; 4705 SMLoc S = Parser.getTok().getLoc(); 4706 4707 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4708 if (trySkipId(IdSymbolic[ModeId])) { 4709 Mode = 1 << ModeId; 4710 break; 4711 } 4712 } 4713 4714 if (Mode == 0) { 4715 Error(S, (Imm == 0)? 4716 "expected a VGPR index mode or a closing parenthesis" : 4717 "expected a VGPR index mode"); 4718 break; 4719 } 4720 4721 if (Imm & Mode) { 4722 Error(S, "duplicate VGPR index mode"); 4723 break; 4724 } 4725 Imm |= Mode; 4726 4727 if (trySkipToken(AsmToken::RParen)) 4728 break; 4729 if (!skipToken(AsmToken::Comma, 4730 "expected a comma or a closing parenthesis")) 4731 break; 4732 } 4733 4734 return Imm; 4735 } 4736 4737 OperandMatchResultTy 4738 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4739 4740 int64_t Imm = 0; 4741 SMLoc S = Parser.getTok().getLoc(); 4742 4743 if (getLexer().getKind() == AsmToken::Identifier && 4744 Parser.getTok().getString() == "gpr_idx" && 4745 getLexer().peekTok().is(AsmToken::LParen)) { 4746 4747 Parser.Lex(); 4748 Parser.Lex(); 4749 4750 // If parse failed, trigger an error but do not return error code 4751 // to avoid excessive error messages. 4752 Imm = parseGPRIdxMacro(); 4753 4754 } else { 4755 if (getParser().parseAbsoluteExpression(Imm)) 4756 return MatchOperand_NoMatch; 4757 if (Imm < 0 || !isUInt<4>(Imm)) { 4758 Error(S, "invalid immediate: only 4-bit values are legal"); 4759 } 4760 } 4761 4762 Operands.push_back( 4763 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 4764 return MatchOperand_Success; 4765 } 4766 4767 bool AMDGPUOperand::isGPRIdxMode() const { 4768 return isImmTy(ImmTyGprIdxMode); 4769 } 4770 4771 //===----------------------------------------------------------------------===// 4772 // sopp branch targets 4773 //===----------------------------------------------------------------------===// 4774 4775 OperandMatchResultTy 4776 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4777 SMLoc S = Parser.getTok().getLoc(); 4778 4779 switch (getLexer().getKind()) { 4780 default: return MatchOperand_ParseFail; 4781 case AsmToken::Integer: { 4782 int64_t Imm; 4783 if (getParser().parseAbsoluteExpression(Imm)) 4784 return MatchOperand_ParseFail; 4785 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4786 return MatchOperand_Success; 4787 } 4788 4789 case AsmToken::Identifier: 4790 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4791 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4792 Parser.getTok().getString()), getContext()), S)); 4793 Parser.Lex(); 4794 return MatchOperand_Success; 4795 } 4796 } 4797 4798 //===----------------------------------------------------------------------===// 4799 // mubuf 4800 //===----------------------------------------------------------------------===// 4801 4802 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4803 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4804 } 4805 4806 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4807 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4808 } 4809 4810 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4811 const OperandVector &Operands, 4812 bool IsAtomic, 4813 bool IsAtomicReturn, 4814 bool IsLds) { 4815 bool IsLdsOpcode = IsLds; 4816 bool HasLdsModifier = false; 4817 OptionalImmIndexMap OptionalIdx; 4818 assert(IsAtomicReturn ? IsAtomic : true); 4819 4820 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4821 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4822 4823 // Add the register arguments 4824 if (Op.isReg()) { 4825 Op.addRegOperands(Inst, 1); 4826 continue; 4827 } 4828 4829 // Handle the case where soffset is an immediate 4830 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4831 Op.addImmOperands(Inst, 1); 4832 continue; 4833 } 4834 4835 HasLdsModifier = Op.isLDS(); 4836 4837 // Handle tokens like 'offen' which are sometimes hard-coded into the 4838 // asm string. There are no MCInst operands for these. 4839 if (Op.isToken()) { 4840 continue; 4841 } 4842 assert(Op.isImm()); 4843 4844 // Handle optional arguments 4845 OptionalIdx[Op.getImmTy()] = i; 4846 } 4847 4848 // This is a workaround for an llvm quirk which may result in an 4849 // incorrect instruction selection. Lds and non-lds versions of 4850 // MUBUF instructions are identical except that lds versions 4851 // have mandatory 'lds' modifier. However this modifier follows 4852 // optional modifiers and llvm asm matcher regards this 'lds' 4853 // modifier as an optional one. As a result, an lds version 4854 // of opcode may be selected even if it has no 'lds' modifier. 4855 if (IsLdsOpcode && !HasLdsModifier) { 4856 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4857 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4858 Inst.setOpcode(NoLdsOpcode); 4859 IsLdsOpcode = false; 4860 } 4861 } 4862 4863 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4864 if (IsAtomicReturn) { 4865 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4866 Inst.insert(I, *I); 4867 } 4868 4869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4870 if (!IsAtomic) { // glc is hard-coded. 4871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4872 } 4873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4874 4875 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4877 } 4878 } 4879 4880 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4881 OptionalImmIndexMap OptionalIdx; 4882 4883 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4884 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4885 4886 // Add the register arguments 4887 if (Op.isReg()) { 4888 Op.addRegOperands(Inst, 1); 4889 continue; 4890 } 4891 4892 // Handle the case where soffset is an immediate 4893 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4894 Op.addImmOperands(Inst, 1); 4895 continue; 4896 } 4897 4898 // Handle tokens like 'offen' which are sometimes hard-coded into the 4899 // asm string. There are no MCInst operands for these. 4900 if (Op.isToken()) { 4901 continue; 4902 } 4903 assert(Op.isImm()); 4904 4905 // Handle optional arguments 4906 OptionalIdx[Op.getImmTy()] = i; 4907 } 4908 4909 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4910 AMDGPUOperand::ImmTyOffset); 4911 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4915 } 4916 4917 //===----------------------------------------------------------------------===// 4918 // mimg 4919 //===----------------------------------------------------------------------===// 4920 4921 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4922 bool IsAtomic) { 4923 unsigned I = 1; 4924 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4925 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4926 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4927 } 4928 4929 if (IsAtomic) { 4930 // Add src, same as dst 4931 assert(Desc.getNumDefs() == 1); 4932 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4933 } 4934 4935 OptionalImmIndexMap OptionalIdx; 4936 4937 for (unsigned E = Operands.size(); I != E; ++I) { 4938 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4939 4940 // Add the register arguments 4941 if (Op.isReg()) { 4942 Op.addRegOperands(Inst, 1); 4943 } else if (Op.isImmModifier()) { 4944 OptionalIdx[Op.getImmTy()] = I; 4945 } else { 4946 llvm_unreachable("unexpected operand type"); 4947 } 4948 } 4949 4950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4954 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4959 } 4960 4961 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4962 cvtMIMG(Inst, Operands, true); 4963 } 4964 4965 //===----------------------------------------------------------------------===// 4966 // smrd 4967 //===----------------------------------------------------------------------===// 4968 4969 bool AMDGPUOperand::isSMRDOffset8() const { 4970 return isImm() && isUInt<8>(getImm()); 4971 } 4972 4973 bool AMDGPUOperand::isSMRDOffset20() const { 4974 return isImm() && isUInt<20>(getImm()); 4975 } 4976 4977 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4978 // 32-bit literals are only supported on CI and we only want to use them 4979 // when the offset is > 8-bits. 4980 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4981 } 4982 4983 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4984 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4985 } 4986 4987 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4988 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4989 } 4990 4991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4992 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4993 } 4994 4995 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4996 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4997 } 4998 4999 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5000 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5001 } 5002 5003 //===----------------------------------------------------------------------===// 5004 // vop3 5005 //===----------------------------------------------------------------------===// 5006 5007 static bool ConvertOmodMul(int64_t &Mul) { 5008 if (Mul != 1 && Mul != 2 && Mul != 4) 5009 return false; 5010 5011 Mul >>= 1; 5012 return true; 5013 } 5014 5015 static bool ConvertOmodDiv(int64_t &Div) { 5016 if (Div == 1) { 5017 Div = 0; 5018 return true; 5019 } 5020 5021 if (Div == 2) { 5022 Div = 3; 5023 return true; 5024 } 5025 5026 return false; 5027 } 5028 5029 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5030 if (BoundCtrl == 0) { 5031 BoundCtrl = 1; 5032 return true; 5033 } 5034 5035 if (BoundCtrl == -1) { 5036 BoundCtrl = 0; 5037 return true; 5038 } 5039 5040 return false; 5041 } 5042 5043 // Note: the order in this table matches the order of operands in AsmString. 5044 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5045 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5046 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5047 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5048 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5049 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5050 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5051 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5052 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5053 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5054 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5055 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5056 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5057 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5058 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5059 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5060 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5061 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5062 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5063 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5064 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5065 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5066 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5067 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5068 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5069 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5070 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5071 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5072 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5073 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5074 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5075 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5076 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5077 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5078 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5079 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5080 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5081 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5082 }; 5083 5084 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5085 unsigned size = Operands.size(); 5086 assert(size > 0); 5087 5088 OperandMatchResultTy res = parseOptionalOpr(Operands); 5089 5090 // This is a hack to enable hardcoded mandatory operands which follow 5091 // optional operands. 5092 // 5093 // Current design assumes that all operands after the first optional operand 5094 // are also optional. However implementation of some instructions violates 5095 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5096 // 5097 // To alleviate this problem, we have to (implicitly) parse extra operands 5098 // to make sure autogenerated parser of custom operands never hit hardcoded 5099 // mandatory operands. 5100 5101 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5102 5103 // We have parsed the first optional operand. 5104 // Parse as many operands as necessary to skip all mandatory operands. 5105 5106 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5107 if (res != MatchOperand_Success || 5108 getLexer().is(AsmToken::EndOfStatement)) break; 5109 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5110 res = parseOptionalOpr(Operands); 5111 } 5112 } 5113 5114 return res; 5115 } 5116 5117 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5118 OperandMatchResultTy res; 5119 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5120 // try to parse any optional operand here 5121 if (Op.IsBit) { 5122 res = parseNamedBit(Op.Name, Operands, Op.Type); 5123 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5124 res = parseOModOperand(Operands); 5125 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5126 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5127 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5128 res = parseSDWASel(Operands, Op.Name, Op.Type); 5129 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5130 res = parseSDWADstUnused(Operands); 5131 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5132 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5133 Op.Type == AMDGPUOperand::ImmTyNegLo || 5134 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5135 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5136 Op.ConvertResult); 5137 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5138 res = parseDfmtNfmt(Operands); 5139 } else { 5140 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5141 } 5142 if (res != MatchOperand_NoMatch) { 5143 return res; 5144 } 5145 } 5146 return MatchOperand_NoMatch; 5147 } 5148 5149 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5150 StringRef Name = Parser.getTok().getString(); 5151 if (Name == "mul") { 5152 return parseIntWithPrefix("mul", Operands, 5153 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5154 } 5155 5156 if (Name == "div") { 5157 return parseIntWithPrefix("div", Operands, 5158 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5159 } 5160 5161 return MatchOperand_NoMatch; 5162 } 5163 5164 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5165 cvtVOP3P(Inst, Operands); 5166 5167 int Opc = Inst.getOpcode(); 5168 5169 int SrcNum; 5170 const int Ops[] = { AMDGPU::OpName::src0, 5171 AMDGPU::OpName::src1, 5172 AMDGPU::OpName::src2 }; 5173 for (SrcNum = 0; 5174 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5175 ++SrcNum); 5176 assert(SrcNum > 0); 5177 5178 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5179 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5180 5181 if ((OpSel & (1 << SrcNum)) != 0) { 5182 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5183 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5184 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5185 } 5186 } 5187 5188 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5189 // 1. This operand is input modifiers 5190 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5191 // 2. This is not last operand 5192 && Desc.NumOperands > (OpNum + 1) 5193 // 3. Next operand is register class 5194 && Desc.OpInfo[OpNum + 1].RegClass != -1 5195 // 4. Next register is not tied to any other operand 5196 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5197 } 5198 5199 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5200 { 5201 OptionalImmIndexMap OptionalIdx; 5202 unsigned Opc = Inst.getOpcode(); 5203 5204 unsigned I = 1; 5205 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5206 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5207 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5208 } 5209 5210 for (unsigned E = Operands.size(); I != E; ++I) { 5211 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5212 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5213 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5214 } else if (Op.isInterpSlot() || 5215 Op.isInterpAttr() || 5216 Op.isAttrChan()) { 5217 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5218 } else if (Op.isImmModifier()) { 5219 OptionalIdx[Op.getImmTy()] = I; 5220 } else { 5221 llvm_unreachable("unhandled operand type"); 5222 } 5223 } 5224 5225 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5227 } 5228 5229 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5230 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5231 } 5232 5233 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5234 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5235 } 5236 } 5237 5238 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5239 OptionalImmIndexMap &OptionalIdx) { 5240 unsigned Opc = Inst.getOpcode(); 5241 5242 unsigned I = 1; 5243 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5244 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5245 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5246 } 5247 5248 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5249 // This instruction has src modifiers 5250 for (unsigned E = Operands.size(); I != E; ++I) { 5251 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5252 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5253 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5254 } else if (Op.isImmModifier()) { 5255 OptionalIdx[Op.getImmTy()] = I; 5256 } else if (Op.isRegOrImm()) { 5257 Op.addRegOrImmOperands(Inst, 1); 5258 } else { 5259 llvm_unreachable("unhandled operand type"); 5260 } 5261 } 5262 } else { 5263 // No src modifiers 5264 for (unsigned E = Operands.size(); I != E; ++I) { 5265 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5266 if (Op.isMod()) { 5267 OptionalIdx[Op.getImmTy()] = I; 5268 } else { 5269 Op.addRegOrImmOperands(Inst, 1); 5270 } 5271 } 5272 } 5273 5274 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5275 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5276 } 5277 5278 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5280 } 5281 5282 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5283 // it has src2 register operand that is tied to dst operand 5284 // we don't allow modifiers for this operand in assembler so src2_modifiers 5285 // should be 0. 5286 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5287 Opc == AMDGPU::V_MAC_F32_e64_vi || 5288 Opc == AMDGPU::V_MAC_F16_e64_vi || 5289 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5290 auto it = Inst.begin(); 5291 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5292 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5293 ++it; 5294 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5295 } 5296 } 5297 5298 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5299 OptionalImmIndexMap OptionalIdx; 5300 cvtVOP3(Inst, Operands, OptionalIdx); 5301 } 5302 5303 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5304 const OperandVector &Operands) { 5305 OptionalImmIndexMap OptIdx; 5306 const int Opc = Inst.getOpcode(); 5307 const MCInstrDesc &Desc = MII.get(Opc); 5308 5309 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5310 5311 cvtVOP3(Inst, Operands, OptIdx); 5312 5313 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5314 assert(!IsPacked); 5315 Inst.addOperand(Inst.getOperand(0)); 5316 } 5317 5318 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5319 // instruction, and then figure out where to actually put the modifiers 5320 5321 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5322 5323 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5324 if (OpSelHiIdx != -1) { 5325 int DefaultVal = IsPacked ? -1 : 0; 5326 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5327 DefaultVal); 5328 } 5329 5330 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5331 if (NegLoIdx != -1) { 5332 assert(IsPacked); 5333 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5334 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5335 } 5336 5337 const int Ops[] = { AMDGPU::OpName::src0, 5338 AMDGPU::OpName::src1, 5339 AMDGPU::OpName::src2 }; 5340 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5341 AMDGPU::OpName::src1_modifiers, 5342 AMDGPU::OpName::src2_modifiers }; 5343 5344 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5345 5346 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5347 unsigned OpSelHi = 0; 5348 unsigned NegLo = 0; 5349 unsigned NegHi = 0; 5350 5351 if (OpSelHiIdx != -1) { 5352 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5353 } 5354 5355 if (NegLoIdx != -1) { 5356 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5357 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5358 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5359 } 5360 5361 for (int J = 0; J < 3; ++J) { 5362 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5363 if (OpIdx == -1) 5364 break; 5365 5366 uint32_t ModVal = 0; 5367 5368 if ((OpSel & (1 << J)) != 0) 5369 ModVal |= SISrcMods::OP_SEL_0; 5370 5371 if ((OpSelHi & (1 << J)) != 0) 5372 ModVal |= SISrcMods::OP_SEL_1; 5373 5374 if ((NegLo & (1 << J)) != 0) 5375 ModVal |= SISrcMods::NEG; 5376 5377 if ((NegHi & (1 << J)) != 0) 5378 ModVal |= SISrcMods::NEG_HI; 5379 5380 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5381 5382 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5383 } 5384 } 5385 5386 //===----------------------------------------------------------------------===// 5387 // dpp 5388 //===----------------------------------------------------------------------===// 5389 5390 bool AMDGPUOperand::isDPPCtrl() const { 5391 using namespace AMDGPU::DPP; 5392 5393 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5394 if (result) { 5395 int64_t Imm = getImm(); 5396 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5397 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5398 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5399 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5400 (Imm == DppCtrl::WAVE_SHL1) || 5401 (Imm == DppCtrl::WAVE_ROL1) || 5402 (Imm == DppCtrl::WAVE_SHR1) || 5403 (Imm == DppCtrl::WAVE_ROR1) || 5404 (Imm == DppCtrl::ROW_MIRROR) || 5405 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5406 (Imm == DppCtrl::BCAST15) || 5407 (Imm == DppCtrl::BCAST31); 5408 } 5409 return false; 5410 } 5411 5412 bool AMDGPUOperand::isS16Imm() const { 5413 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5414 } 5415 5416 bool AMDGPUOperand::isU16Imm() const { 5417 return isImm() && isUInt<16>(getImm()); 5418 } 5419 5420 OperandMatchResultTy 5421 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5422 using namespace AMDGPU::DPP; 5423 5424 SMLoc S = Parser.getTok().getLoc(); 5425 StringRef Prefix; 5426 int64_t Int; 5427 5428 if (getLexer().getKind() == AsmToken::Identifier) { 5429 Prefix = Parser.getTok().getString(); 5430 } else { 5431 return MatchOperand_NoMatch; 5432 } 5433 5434 if (Prefix == "row_mirror") { 5435 Int = DppCtrl::ROW_MIRROR; 5436 Parser.Lex(); 5437 } else if (Prefix == "row_half_mirror") { 5438 Int = DppCtrl::ROW_HALF_MIRROR; 5439 Parser.Lex(); 5440 } else { 5441 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5442 if (Prefix != "quad_perm" 5443 && Prefix != "row_shl" 5444 && Prefix != "row_shr" 5445 && Prefix != "row_ror" 5446 && Prefix != "wave_shl" 5447 && Prefix != "wave_rol" 5448 && Prefix != "wave_shr" 5449 && Prefix != "wave_ror" 5450 && Prefix != "row_bcast") { 5451 return MatchOperand_NoMatch; 5452 } 5453 5454 Parser.Lex(); 5455 if (getLexer().isNot(AsmToken::Colon)) 5456 return MatchOperand_ParseFail; 5457 5458 if (Prefix == "quad_perm") { 5459 // quad_perm:[%d,%d,%d,%d] 5460 Parser.Lex(); 5461 if (getLexer().isNot(AsmToken::LBrac)) 5462 return MatchOperand_ParseFail; 5463 Parser.Lex(); 5464 5465 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5466 return MatchOperand_ParseFail; 5467 5468 for (int i = 0; i < 3; ++i) { 5469 if (getLexer().isNot(AsmToken::Comma)) 5470 return MatchOperand_ParseFail; 5471 Parser.Lex(); 5472 5473 int64_t Temp; 5474 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5475 return MatchOperand_ParseFail; 5476 const int shift = i*2 + 2; 5477 Int += (Temp << shift); 5478 } 5479 5480 if (getLexer().isNot(AsmToken::RBrac)) 5481 return MatchOperand_ParseFail; 5482 Parser.Lex(); 5483 } else { 5484 // sel:%d 5485 Parser.Lex(); 5486 if (getParser().parseAbsoluteExpression(Int)) 5487 return MatchOperand_ParseFail; 5488 5489 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5490 Int |= DppCtrl::ROW_SHL0; 5491 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5492 Int |= DppCtrl::ROW_SHR0; 5493 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5494 Int |= DppCtrl::ROW_ROR0; 5495 } else if (Prefix == "wave_shl" && 1 == Int) { 5496 Int = DppCtrl::WAVE_SHL1; 5497 } else if (Prefix == "wave_rol" && 1 == Int) { 5498 Int = DppCtrl::WAVE_ROL1; 5499 } else if (Prefix == "wave_shr" && 1 == Int) { 5500 Int = DppCtrl::WAVE_SHR1; 5501 } else if (Prefix == "wave_ror" && 1 == Int) { 5502 Int = DppCtrl::WAVE_ROR1; 5503 } else if (Prefix == "row_bcast") { 5504 if (Int == 15) { 5505 Int = DppCtrl::BCAST15; 5506 } else if (Int == 31) { 5507 Int = DppCtrl::BCAST31; 5508 } else { 5509 return MatchOperand_ParseFail; 5510 } 5511 } else { 5512 return MatchOperand_ParseFail; 5513 } 5514 } 5515 } 5516 5517 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5518 return MatchOperand_Success; 5519 } 5520 5521 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5522 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5523 } 5524 5525 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5526 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5527 } 5528 5529 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5530 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5531 } 5532 5533 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5534 OptionalImmIndexMap OptionalIdx; 5535 5536 unsigned I = 1; 5537 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5538 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5539 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5540 } 5541 5542 for (unsigned E = Operands.size(); I != E; ++I) { 5543 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5544 MCOI::TIED_TO); 5545 if (TiedTo != -1) { 5546 assert((unsigned)TiedTo < Inst.getNumOperands()); 5547 // handle tied old or src2 for MAC instructions 5548 Inst.addOperand(Inst.getOperand(TiedTo)); 5549 } 5550 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5551 // Add the register arguments 5552 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5553 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5554 // Skip it. 5555 continue; 5556 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5557 Op.addRegWithFPInputModsOperands(Inst, 2); 5558 } else if (Op.isDPPCtrl()) { 5559 Op.addImmOperands(Inst, 1); 5560 } else if (Op.isImm()) { 5561 // Handle optional arguments 5562 OptionalIdx[Op.getImmTy()] = I; 5563 } else { 5564 llvm_unreachable("Invalid operand type"); 5565 } 5566 } 5567 5568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5571 } 5572 5573 //===----------------------------------------------------------------------===// 5574 // sdwa 5575 //===----------------------------------------------------------------------===// 5576 5577 OperandMatchResultTy 5578 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5579 AMDGPUOperand::ImmTy Type) { 5580 using namespace llvm::AMDGPU::SDWA; 5581 5582 SMLoc S = Parser.getTok().getLoc(); 5583 StringRef Value; 5584 OperandMatchResultTy res; 5585 5586 res = parseStringWithPrefix(Prefix, Value); 5587 if (res != MatchOperand_Success) { 5588 return res; 5589 } 5590 5591 int64_t Int; 5592 Int = StringSwitch<int64_t>(Value) 5593 .Case("BYTE_0", SdwaSel::BYTE_0) 5594 .Case("BYTE_1", SdwaSel::BYTE_1) 5595 .Case("BYTE_2", SdwaSel::BYTE_2) 5596 .Case("BYTE_3", SdwaSel::BYTE_3) 5597 .Case("WORD_0", SdwaSel::WORD_0) 5598 .Case("WORD_1", SdwaSel::WORD_1) 5599 .Case("DWORD", SdwaSel::DWORD) 5600 .Default(0xffffffff); 5601 Parser.Lex(); // eat last token 5602 5603 if (Int == 0xffffffff) { 5604 return MatchOperand_ParseFail; 5605 } 5606 5607 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5608 return MatchOperand_Success; 5609 } 5610 5611 OperandMatchResultTy 5612 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5613 using namespace llvm::AMDGPU::SDWA; 5614 5615 SMLoc S = Parser.getTok().getLoc(); 5616 StringRef Value; 5617 OperandMatchResultTy res; 5618 5619 res = parseStringWithPrefix("dst_unused", Value); 5620 if (res != MatchOperand_Success) { 5621 return res; 5622 } 5623 5624 int64_t Int; 5625 Int = StringSwitch<int64_t>(Value) 5626 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5627 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5628 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5629 .Default(0xffffffff); 5630 Parser.Lex(); // eat last token 5631 5632 if (Int == 0xffffffff) { 5633 return MatchOperand_ParseFail; 5634 } 5635 5636 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5637 return MatchOperand_Success; 5638 } 5639 5640 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5641 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5642 } 5643 5644 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5645 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5646 } 5647 5648 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5649 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5650 } 5651 5652 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5653 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5654 } 5655 5656 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5657 uint64_t BasicInstType, bool skipVcc) { 5658 using namespace llvm::AMDGPU::SDWA; 5659 5660 OptionalImmIndexMap OptionalIdx; 5661 bool skippedVcc = false; 5662 5663 unsigned I = 1; 5664 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5665 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5666 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5667 } 5668 5669 for (unsigned E = Operands.size(); I != E; ++I) { 5670 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5671 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5672 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5673 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5674 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5675 // Skip VCC only if we didn't skip it on previous iteration. 5676 if (BasicInstType == SIInstrFlags::VOP2 && 5677 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5678 skippedVcc = true; 5679 continue; 5680 } else if (BasicInstType == SIInstrFlags::VOPC && 5681 Inst.getNumOperands() == 0) { 5682 skippedVcc = true; 5683 continue; 5684 } 5685 } 5686 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5687 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5688 } else if (Op.isImm()) { 5689 // Handle optional arguments 5690 OptionalIdx[Op.getImmTy()] = I; 5691 } else { 5692 llvm_unreachable("Invalid operand type"); 5693 } 5694 skippedVcc = false; 5695 } 5696 5697 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5698 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5699 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5700 switch (BasicInstType) { 5701 case SIInstrFlags::VOP1: 5702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5703 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5705 } 5706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5709 break; 5710 5711 case SIInstrFlags::VOP2: 5712 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5713 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5715 } 5716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5720 break; 5721 5722 case SIInstrFlags::VOPC: 5723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5726 break; 5727 5728 default: 5729 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5730 } 5731 } 5732 5733 // special case v_mac_{f16, f32}: 5734 // it has src2 register operand that is tied to dst operand 5735 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5736 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5737 auto it = Inst.begin(); 5738 std::advance( 5739 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5740 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5741 } 5742 } 5743 5744 /// Force static initialization. 5745 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5746 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5747 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5748 } 5749 5750 #define GET_REGISTER_MATCHER 5751 #define GET_MATCHER_IMPLEMENTATION 5752 #define GET_MNEMONIC_SPELL_CHECKER 5753 #include "AMDGPUGenAsmMatcher.inc" 5754 5755 // This fuction should be defined after auto-generated include so that we have 5756 // MatchClassKind enum defined 5757 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5758 unsigned Kind) { 5759 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5760 // But MatchInstructionImpl() expects to meet token and fails to validate 5761 // operand. This method checks if we are given immediate operand but expect to 5762 // get corresponding token. 5763 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5764 switch (Kind) { 5765 case MCK_addr64: 5766 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5767 case MCK_gds: 5768 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5769 case MCK_lds: 5770 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5771 case MCK_glc: 5772 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5773 case MCK_idxen: 5774 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5775 case MCK_offen: 5776 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5777 case MCK_SSrcB32: 5778 // When operands have expression values, they will return true for isToken, 5779 // because it is not possible to distinguish between a token and an 5780 // expression at parse time. MatchInstructionImpl() will always try to 5781 // match an operand as a token, when isToken returns true, and when the 5782 // name of the expression is not a valid token, the match will fail, 5783 // so we need to handle it here. 5784 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5785 case MCK_SSrcF32: 5786 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5787 case MCK_SoppBrTarget: 5788 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5789 case MCK_VReg32OrOff: 5790 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5791 case MCK_InterpSlot: 5792 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5793 case MCK_Attr: 5794 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5795 case MCK_AttrChan: 5796 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5797 default: 5798 return Match_InvalidOperand; 5799 } 5800 } 5801