1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyEndpgm, 178 ImmTyHigh 179 }; 180 181 struct TokOp { 182 const char *Data; 183 unsigned Length; 184 }; 185 186 struct ImmOp { 187 int64_t Val; 188 ImmTy Type; 189 bool IsFPImm; 190 Modifiers Mods; 191 }; 192 193 struct RegOp { 194 unsigned RegNo; 195 bool IsForcedVOP3; 196 Modifiers Mods; 197 }; 198 199 union { 200 TokOp Tok; 201 ImmOp Imm; 202 RegOp Reg; 203 const MCExpr *Expr; 204 }; 205 206 bool isToken() const override { 207 if (Kind == Token) 208 return true; 209 210 if (Kind != Expression || !Expr) 211 return false; 212 213 // When parsing operands, we can't always tell if something was meant to be 214 // a token, like 'gds', or an expression that references a global variable. 215 // In this case, we assume the string is an expression, and if we need to 216 // interpret is a token, then we treat the symbol name as the token. 217 return isa<MCSymbolRefExpr>(Expr); 218 } 219 220 bool isImm() const override { 221 return Kind == Immediate; 222 } 223 224 bool isInlinableImm(MVT type) const; 225 bool isLiteralImm(MVT type) const; 226 227 bool isRegKind() const { 228 return Kind == Register; 229 } 230 231 bool isReg() const override { 232 return isRegKind() && !hasModifiers(); 233 } 234 235 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 236 return isRegClass(RCID) || isInlinableImm(type); 237 } 238 239 bool isRegOrImmWithInt16InputMods() const { 240 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 241 } 242 243 bool isRegOrImmWithInt32InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 245 } 246 247 bool isRegOrImmWithInt64InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 249 } 250 251 bool isRegOrImmWithFP16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 253 } 254 255 bool isRegOrImmWithFP32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 257 } 258 259 bool isRegOrImmWithFP64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 261 } 262 263 bool isVReg() const { 264 return isRegClass(AMDGPU::VGPR_32RegClassID) || 265 isRegClass(AMDGPU::VReg_64RegClassID) || 266 isRegClass(AMDGPU::VReg_96RegClassID) || 267 isRegClass(AMDGPU::VReg_128RegClassID) || 268 isRegClass(AMDGPU::VReg_256RegClassID) || 269 isRegClass(AMDGPU::VReg_512RegClassID); 270 } 271 272 bool isVReg32() const { 273 return isRegClass(AMDGPU::VGPR_32RegClassID); 274 } 275 276 bool isVReg32OrOff() const { 277 return isOff() || isVReg32(); 278 } 279 280 bool isSDWAOperand(MVT type) const; 281 bool isSDWAFP16Operand() const; 282 bool isSDWAFP32Operand() const; 283 bool isSDWAInt16Operand() const; 284 bool isSDWAInt32Operand() const; 285 286 bool isImmTy(ImmTy ImmT) const { 287 return isImm() && Imm.Type == ImmT; 288 } 289 290 bool isImmModifier() const { 291 return isImm() && Imm.Type != ImmTyNone; 292 } 293 294 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 295 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 296 bool isDMask() const { return isImmTy(ImmTyDMask); } 297 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 298 bool isDA() const { return isImmTy(ImmTyDA); } 299 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 300 bool isLWE() const { return isImmTy(ImmTyLWE); } 301 bool isOff() const { return isImmTy(ImmTyOff); } 302 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 303 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 304 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 305 bool isOffen() const { return isImmTy(ImmTyOffen); } 306 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 307 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 308 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 309 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 310 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 311 312 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 313 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 314 bool isGDS() const { return isImmTy(ImmTyGDS); } 315 bool isLDS() const { return isImmTy(ImmTyLDS); } 316 bool isGLC() const { return isImmTy(ImmTyGLC); } 317 bool isSLC() const { return isImmTy(ImmTySLC); } 318 bool isTFE() const { return isImmTy(ImmTyTFE); } 319 bool isD16() const { return isImmTy(ImmTyD16); } 320 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 321 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 322 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 323 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 324 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 325 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 326 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 327 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 328 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 329 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 330 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 331 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 332 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 333 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 334 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 335 bool isHigh() const { return isImmTy(ImmTyHigh); } 336 337 bool isMod() const { 338 return isClampSI() || isOModSI(); 339 } 340 341 bool isRegOrImm() const { 342 return isReg() || isImm(); 343 } 344 345 bool isRegClass(unsigned RCID) const; 346 347 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 348 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 349 } 350 351 bool isSCSrcB16() const { 352 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 353 } 354 355 bool isSCSrcV2B16() const { 356 return isSCSrcB16(); 357 } 358 359 bool isSCSrcB32() const { 360 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 361 } 362 363 bool isSCSrcB64() const { 364 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 365 } 366 367 bool isSCSrcF16() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 369 } 370 371 bool isSCSrcV2F16() const { 372 return isSCSrcF16(); 373 } 374 375 bool isSCSrcF32() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 377 } 378 379 bool isSCSrcF64() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 381 } 382 383 bool isSSrcB32() const { 384 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 385 } 386 387 bool isSSrcB16() const { 388 return isSCSrcB16() || isLiteralImm(MVT::i16); 389 } 390 391 bool isSSrcV2B16() const { 392 llvm_unreachable("cannot happen"); 393 return isSSrcB16(); 394 } 395 396 bool isSSrcB64() const { 397 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 398 // See isVSrc64(). 399 return isSCSrcB64() || isLiteralImm(MVT::i64); 400 } 401 402 bool isSSrcF32() const { 403 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 404 } 405 406 bool isSSrcF64() const { 407 return isSCSrcB64() || isLiteralImm(MVT::f64); 408 } 409 410 bool isSSrcF16() const { 411 return isSCSrcB16() || isLiteralImm(MVT::f16); 412 } 413 414 bool isSSrcV2F16() const { 415 llvm_unreachable("cannot happen"); 416 return isSSrcF16(); 417 } 418 419 bool isSSrcOrLdsB32() const { 420 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 421 isLiteralImm(MVT::i32) || isExpr(); 422 } 423 424 bool isVCSrcB32() const { 425 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 426 } 427 428 bool isVCSrcB64() const { 429 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 430 } 431 432 bool isVCSrcB16() const { 433 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 434 } 435 436 bool isVCSrcV2B16() const { 437 return isVCSrcB16(); 438 } 439 440 bool isVCSrcF32() const { 441 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 442 } 443 444 bool isVCSrcF64() const { 445 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 446 } 447 448 bool isVCSrcF16() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 450 } 451 452 bool isVCSrcV2F16() const { 453 return isVCSrcF16(); 454 } 455 456 bool isVSrcB32() const { 457 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 458 } 459 460 bool isVSrcB64() const { 461 return isVCSrcF64() || isLiteralImm(MVT::i64); 462 } 463 464 bool isVSrcB16() const { 465 return isVCSrcF16() || isLiteralImm(MVT::i16); 466 } 467 468 bool isVSrcV2B16() const { 469 llvm_unreachable("cannot happen"); 470 return isVSrcB16(); 471 } 472 473 bool isVSrcF32() const { 474 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 475 } 476 477 bool isVSrcF64() const { 478 return isVCSrcF64() || isLiteralImm(MVT::f64); 479 } 480 481 bool isVSrcF16() const { 482 return isVCSrcF16() || isLiteralImm(MVT::f16); 483 } 484 485 bool isVSrcV2F16() const { 486 llvm_unreachable("cannot happen"); 487 return isVSrcF16(); 488 } 489 490 bool isKImmFP32() const { 491 return isLiteralImm(MVT::f32); 492 } 493 494 bool isKImmFP16() const { 495 return isLiteralImm(MVT::f16); 496 } 497 498 bool isMem() const override { 499 return false; 500 } 501 502 bool isExpr() const { 503 return Kind == Expression; 504 } 505 506 bool isSoppBrTarget() const { 507 return isExpr() || isImm(); 508 } 509 510 bool isSWaitCnt() const; 511 bool isHwreg() const; 512 bool isSendMsg() const; 513 bool isSwizzle() const; 514 bool isSMRDOffset8() const; 515 bool isSMRDOffset20() const; 516 bool isSMRDLiteralOffset() const; 517 bool isDPPCtrl() const; 518 bool isGPRIdxMode() const; 519 bool isS16Imm() const; 520 bool isU16Imm() const; 521 bool isEndpgm() const; 522 523 StringRef getExpressionAsToken() const { 524 assert(isExpr()); 525 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 526 return S->getSymbol().getName(); 527 } 528 529 StringRef getToken() const { 530 assert(isToken()); 531 532 if (Kind == Expression) 533 return getExpressionAsToken(); 534 535 return StringRef(Tok.Data, Tok.Length); 536 } 537 538 int64_t getImm() const { 539 assert(isImm()); 540 return Imm.Val; 541 } 542 543 ImmTy getImmTy() const { 544 assert(isImm()); 545 return Imm.Type; 546 } 547 548 unsigned getReg() const override { 549 return Reg.RegNo; 550 } 551 552 SMLoc getStartLoc() const override { 553 return StartLoc; 554 } 555 556 SMLoc getEndLoc() const override { 557 return EndLoc; 558 } 559 560 SMRange getLocRange() const { 561 return SMRange(StartLoc, EndLoc); 562 } 563 564 Modifiers getModifiers() const { 565 assert(isRegKind() || isImmTy(ImmTyNone)); 566 return isRegKind() ? Reg.Mods : Imm.Mods; 567 } 568 569 void setModifiers(Modifiers Mods) { 570 assert(isRegKind() || isImmTy(ImmTyNone)); 571 if (isRegKind()) 572 Reg.Mods = Mods; 573 else 574 Imm.Mods = Mods; 575 } 576 577 bool hasModifiers() const { 578 return getModifiers().hasModifiers(); 579 } 580 581 bool hasFPModifiers() const { 582 return getModifiers().hasFPModifiers(); 583 } 584 585 bool hasIntModifiers() const { 586 return getModifiers().hasIntModifiers(); 587 } 588 589 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 590 591 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 592 593 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 594 595 template <unsigned Bitwidth> 596 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 597 598 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 599 addKImmFPOperands<16>(Inst, N); 600 } 601 602 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 603 addKImmFPOperands<32>(Inst, N); 604 } 605 606 void addRegOperands(MCInst &Inst, unsigned N) const; 607 608 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 609 if (isRegKind()) 610 addRegOperands(Inst, N); 611 else if (isExpr()) 612 Inst.addOperand(MCOperand::createExpr(Expr)); 613 else 614 addImmOperands(Inst, N); 615 } 616 617 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 618 Modifiers Mods = getModifiers(); 619 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 620 if (isRegKind()) { 621 addRegOperands(Inst, N); 622 } else { 623 addImmOperands(Inst, N, false); 624 } 625 } 626 627 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 628 assert(!hasIntModifiers()); 629 addRegOrImmWithInputModsOperands(Inst, N); 630 } 631 632 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 633 assert(!hasFPModifiers()); 634 addRegOrImmWithInputModsOperands(Inst, N); 635 } 636 637 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 638 Modifiers Mods = getModifiers(); 639 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 640 assert(isRegKind()); 641 addRegOperands(Inst, N); 642 } 643 644 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 645 assert(!hasIntModifiers()); 646 addRegWithInputModsOperands(Inst, N); 647 } 648 649 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 650 assert(!hasFPModifiers()); 651 addRegWithInputModsOperands(Inst, N); 652 } 653 654 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 655 if (isImm()) 656 addImmOperands(Inst, N); 657 else { 658 assert(isExpr()); 659 Inst.addOperand(MCOperand::createExpr(Expr)); 660 } 661 } 662 663 static void printImmTy(raw_ostream& OS, ImmTy Type) { 664 switch (Type) { 665 case ImmTyNone: OS << "None"; break; 666 case ImmTyGDS: OS << "GDS"; break; 667 case ImmTyLDS: OS << "LDS"; break; 668 case ImmTyOffen: OS << "Offen"; break; 669 case ImmTyIdxen: OS << "Idxen"; break; 670 case ImmTyAddr64: OS << "Addr64"; break; 671 case ImmTyOffset: OS << "Offset"; break; 672 case ImmTyInstOffset: OS << "InstOffset"; break; 673 case ImmTyOffset0: OS << "Offset0"; break; 674 case ImmTyOffset1: OS << "Offset1"; break; 675 case ImmTyGLC: OS << "GLC"; break; 676 case ImmTySLC: OS << "SLC"; break; 677 case ImmTyTFE: OS << "TFE"; break; 678 case ImmTyD16: OS << "D16"; break; 679 case ImmTyFORMAT: OS << "FORMAT"; break; 680 case ImmTyClampSI: OS << "ClampSI"; break; 681 case ImmTyOModSI: OS << "OModSI"; break; 682 case ImmTyDppCtrl: OS << "DppCtrl"; break; 683 case ImmTyDppRowMask: OS << "DppRowMask"; break; 684 case ImmTyDppBankMask: OS << "DppBankMask"; break; 685 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 686 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 687 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 688 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 689 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 690 case ImmTyDMask: OS << "DMask"; break; 691 case ImmTyUNorm: OS << "UNorm"; break; 692 case ImmTyDA: OS << "DA"; break; 693 case ImmTyR128A16: OS << "R128A16"; break; 694 case ImmTyLWE: OS << "LWE"; break; 695 case ImmTyOff: OS << "Off"; break; 696 case ImmTyExpTgt: OS << "ExpTgt"; break; 697 case ImmTyExpCompr: OS << "ExpCompr"; break; 698 case ImmTyExpVM: OS << "ExpVM"; break; 699 case ImmTyHwreg: OS << "Hwreg"; break; 700 case ImmTySendMsg: OS << "SendMsg"; break; 701 case ImmTyInterpSlot: OS << "InterpSlot"; break; 702 case ImmTyInterpAttr: OS << "InterpAttr"; break; 703 case ImmTyAttrChan: OS << "AttrChan"; break; 704 case ImmTyOpSel: OS << "OpSel"; break; 705 case ImmTyOpSelHi: OS << "OpSelHi"; break; 706 case ImmTyNegLo: OS << "NegLo"; break; 707 case ImmTyNegHi: OS << "NegHi"; break; 708 case ImmTySwizzle: OS << "Swizzle"; break; 709 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 710 case ImmTyHigh: OS << "High"; break; 711 case ImmTyEndpgm: 712 OS << "Endpgm"; 713 break; 714 } 715 } 716 717 void print(raw_ostream &OS) const override { 718 switch (Kind) { 719 case Register: 720 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 721 break; 722 case Immediate: 723 OS << '<' << getImm(); 724 if (getImmTy() != ImmTyNone) { 725 OS << " type: "; printImmTy(OS, getImmTy()); 726 } 727 OS << " mods: " << Imm.Mods << '>'; 728 break; 729 case Token: 730 OS << '\'' << getToken() << '\''; 731 break; 732 case Expression: 733 OS << "<expr " << *Expr << '>'; 734 break; 735 } 736 } 737 738 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 739 int64_t Val, SMLoc Loc, 740 ImmTy Type = ImmTyNone, 741 bool IsFPImm = false) { 742 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 743 Op->Imm.Val = Val; 744 Op->Imm.IsFPImm = IsFPImm; 745 Op->Imm.Type = Type; 746 Op->Imm.Mods = Modifiers(); 747 Op->StartLoc = Loc; 748 Op->EndLoc = Loc; 749 return Op; 750 } 751 752 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 753 StringRef Str, SMLoc Loc, 754 bool HasExplicitEncodingSize = true) { 755 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 756 Res->Tok.Data = Str.data(); 757 Res->Tok.Length = Str.size(); 758 Res->StartLoc = Loc; 759 Res->EndLoc = Loc; 760 return Res; 761 } 762 763 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 764 unsigned RegNo, SMLoc S, 765 SMLoc E, 766 bool ForceVOP3) { 767 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 768 Op->Reg.RegNo = RegNo; 769 Op->Reg.Mods = Modifiers(); 770 Op->Reg.IsForcedVOP3 = ForceVOP3; 771 Op->StartLoc = S; 772 Op->EndLoc = E; 773 return Op; 774 } 775 776 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 777 const class MCExpr *Expr, SMLoc S) { 778 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 779 Op->Expr = Expr; 780 Op->StartLoc = S; 781 Op->EndLoc = S; 782 return Op; 783 } 784 }; 785 786 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 787 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 788 return OS; 789 } 790 791 //===----------------------------------------------------------------------===// 792 // AsmParser 793 //===----------------------------------------------------------------------===// 794 795 // Holds info related to the current kernel, e.g. count of SGPRs used. 796 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 797 // .amdgpu_hsa_kernel or at EOF. 798 class KernelScopeInfo { 799 int SgprIndexUnusedMin = -1; 800 int VgprIndexUnusedMin = -1; 801 MCContext *Ctx = nullptr; 802 803 void usesSgprAt(int i) { 804 if (i >= SgprIndexUnusedMin) { 805 SgprIndexUnusedMin = ++i; 806 if (Ctx) { 807 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 808 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 809 } 810 } 811 } 812 813 void usesVgprAt(int i) { 814 if (i >= VgprIndexUnusedMin) { 815 VgprIndexUnusedMin = ++i; 816 if (Ctx) { 817 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 818 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 819 } 820 } 821 } 822 823 public: 824 KernelScopeInfo() = default; 825 826 void initialize(MCContext &Context) { 827 Ctx = &Context; 828 usesSgprAt(SgprIndexUnusedMin = -1); 829 usesVgprAt(VgprIndexUnusedMin = -1); 830 } 831 832 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 833 switch (RegKind) { 834 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 835 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 836 default: break; 837 } 838 } 839 }; 840 841 class AMDGPUAsmParser : public MCTargetAsmParser { 842 MCAsmParser &Parser; 843 844 // Number of extra operands parsed after the first optional operand. 845 // This may be necessary to skip hardcoded mandatory operands. 846 static const unsigned MAX_OPR_LOOKAHEAD = 8; 847 848 unsigned ForcedEncodingSize = 0; 849 bool ForcedDPP = false; 850 bool ForcedSDWA = false; 851 KernelScopeInfo KernelScope; 852 853 /// @name Auto-generated Match Functions 854 /// { 855 856 #define GET_ASSEMBLER_HEADER 857 #include "AMDGPUGenAsmMatcher.inc" 858 859 /// } 860 861 private: 862 bool ParseAsAbsoluteExpression(uint32_t &Ret); 863 bool OutOfRangeError(SMRange Range); 864 /// Calculate VGPR/SGPR blocks required for given target, reserved 865 /// registers, and user-specified NextFreeXGPR values. 866 /// 867 /// \param Features [in] Target features, used for bug corrections. 868 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 869 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 870 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 871 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 872 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 873 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 874 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 875 /// \param VGPRBlocks [out] Result VGPR block count. 876 /// \param SGPRBlocks [out] Result SGPR block count. 877 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 878 bool FlatScrUsed, bool XNACKUsed, 879 unsigned NextFreeVGPR, SMRange VGPRRange, 880 unsigned NextFreeSGPR, SMRange SGPRRange, 881 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 882 bool ParseDirectiveAMDGCNTarget(); 883 bool ParseDirectiveAMDHSAKernel(); 884 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 885 bool ParseDirectiveHSACodeObjectVersion(); 886 bool ParseDirectiveHSACodeObjectISA(); 887 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 888 bool ParseDirectiveAMDKernelCodeT(); 889 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 890 bool ParseDirectiveAMDGPUHsaKernel(); 891 892 bool ParseDirectiveISAVersion(); 893 bool ParseDirectiveHSAMetadata(); 894 bool ParseDirectivePALMetadata(); 895 896 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 897 RegisterKind RegKind, unsigned Reg1, 898 unsigned RegNum); 899 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 900 unsigned& RegNum, unsigned& RegWidth, 901 unsigned *DwordRegIndex); 902 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 903 void initializeGprCountSymbol(RegisterKind RegKind); 904 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 905 unsigned RegWidth); 906 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 907 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 908 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 909 bool IsGdsHardcoded); 910 911 public: 912 enum AMDGPUMatchResultTy { 913 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 914 }; 915 916 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 917 918 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 919 const MCInstrInfo &MII, 920 const MCTargetOptions &Options) 921 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 922 MCAsmParserExtension::Initialize(Parser); 923 924 if (getFeatureBits().none()) { 925 // Set default features. 926 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 927 } 928 929 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 930 931 { 932 // TODO: make those pre-defined variables read-only. 933 // Currently there is none suitable machinery in the core llvm-mc for this. 934 // MCSymbol::isRedefinable is intended for another purpose, and 935 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 936 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 937 MCContext &Ctx = getContext(); 938 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 939 MCSymbol *Sym = 940 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 941 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 942 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 943 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 944 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 945 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 946 } else { 947 MCSymbol *Sym = 948 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 949 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 950 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 951 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 952 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 953 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 954 } 955 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 956 initializeGprCountSymbol(IS_VGPR); 957 initializeGprCountSymbol(IS_SGPR); 958 } else 959 KernelScope.initialize(getContext()); 960 } 961 } 962 963 bool hasXNACK() const { 964 return AMDGPU::hasXNACK(getSTI()); 965 } 966 967 bool hasMIMG_R128() const { 968 return AMDGPU::hasMIMG_R128(getSTI()); 969 } 970 971 bool hasPackedD16() const { 972 return AMDGPU::hasPackedD16(getSTI()); 973 } 974 975 bool isSI() const { 976 return AMDGPU::isSI(getSTI()); 977 } 978 979 bool isCI() const { 980 return AMDGPU::isCI(getSTI()); 981 } 982 983 bool isVI() const { 984 return AMDGPU::isVI(getSTI()); 985 } 986 987 bool isGFX9() const { 988 return AMDGPU::isGFX9(getSTI()); 989 } 990 991 bool hasInv2PiInlineImm() const { 992 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 993 } 994 995 bool hasFlatOffsets() const { 996 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 997 } 998 999 bool hasSGPR102_SGPR103() const { 1000 return !isVI(); 1001 } 1002 1003 bool hasIntClamp() const { 1004 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1005 } 1006 1007 AMDGPUTargetStreamer &getTargetStreamer() { 1008 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1009 return static_cast<AMDGPUTargetStreamer &>(TS); 1010 } 1011 1012 const MCRegisterInfo *getMRI() const { 1013 // We need this const_cast because for some reason getContext() is not const 1014 // in MCAsmParser. 1015 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1016 } 1017 1018 const MCInstrInfo *getMII() const { 1019 return &MII; 1020 } 1021 1022 const FeatureBitset &getFeatureBits() const { 1023 return getSTI().getFeatureBits(); 1024 } 1025 1026 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1027 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1028 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1029 1030 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1031 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1032 bool isForcedDPP() const { return ForcedDPP; } 1033 bool isForcedSDWA() const { return ForcedSDWA; } 1034 ArrayRef<unsigned> getMatchedVariants() const; 1035 1036 std::unique_ptr<AMDGPUOperand> parseRegister(); 1037 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1038 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1039 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1040 unsigned Kind) override; 1041 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1042 OperandVector &Operands, MCStreamer &Out, 1043 uint64_t &ErrorInfo, 1044 bool MatchingInlineAsm) override; 1045 bool ParseDirective(AsmToken DirectiveID) override; 1046 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1047 StringRef parseMnemonicSuffix(StringRef Name); 1048 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1049 SMLoc NameLoc, OperandVector &Operands) override; 1050 //bool ProcessInstruction(MCInst &Inst); 1051 1052 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1053 1054 OperandMatchResultTy 1055 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1056 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1057 bool (*ConvertResult)(int64_t &) = nullptr); 1058 1059 OperandMatchResultTy parseOperandArrayWithPrefix( 1060 const char *Prefix, 1061 OperandVector &Operands, 1062 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1063 bool (*ConvertResult)(int64_t&) = nullptr); 1064 1065 OperandMatchResultTy 1066 parseNamedBit(const char *Name, OperandVector &Operands, 1067 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1068 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1069 StringRef &Value); 1070 1071 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1072 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1073 OperandMatchResultTy parseReg(OperandVector &Operands); 1074 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1075 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1076 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1077 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1078 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1079 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1080 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1081 1082 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1083 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1084 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1085 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1086 1087 bool parseCnt(int64_t &IntVal); 1088 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1089 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1090 1091 private: 1092 struct OperandInfoTy { 1093 int64_t Id; 1094 bool IsSymbolic = false; 1095 1096 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1097 }; 1098 1099 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1100 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1101 1102 void errorExpTgt(); 1103 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1104 1105 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1106 bool validateSOPLiteral(const MCInst &Inst) const; 1107 bool validateConstantBusLimitations(const MCInst &Inst); 1108 bool validateEarlyClobberLimitations(const MCInst &Inst); 1109 bool validateIntClampSupported(const MCInst &Inst); 1110 bool validateMIMGAtomicDMask(const MCInst &Inst); 1111 bool validateMIMGGatherDMask(const MCInst &Inst); 1112 bool validateMIMGDataSize(const MCInst &Inst); 1113 bool validateMIMGD16(const MCInst &Inst); 1114 bool validateLdsDirect(const MCInst &Inst); 1115 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1116 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1117 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1118 1119 bool trySkipId(const StringRef Id); 1120 bool trySkipToken(const AsmToken::TokenKind Kind); 1121 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1122 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1123 bool parseExpr(int64_t &Imm); 1124 1125 public: 1126 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1127 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1128 1129 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1130 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1131 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1132 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1133 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1134 1135 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1136 const unsigned MinVal, 1137 const unsigned MaxVal, 1138 const StringRef ErrMsg); 1139 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1140 bool parseSwizzleOffset(int64_t &Imm); 1141 bool parseSwizzleMacro(int64_t &Imm); 1142 bool parseSwizzleQuadPerm(int64_t &Imm); 1143 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1144 bool parseSwizzleBroadcast(int64_t &Imm); 1145 bool parseSwizzleSwap(int64_t &Imm); 1146 bool parseSwizzleReverse(int64_t &Imm); 1147 1148 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1149 int64_t parseGPRIdxMacro(); 1150 1151 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1152 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1153 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1154 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1155 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1156 1157 AMDGPUOperand::Ptr defaultGLC() const; 1158 AMDGPUOperand::Ptr defaultSLC() const; 1159 1160 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1161 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1162 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1163 AMDGPUOperand::Ptr defaultOffsetU12() const; 1164 AMDGPUOperand::Ptr defaultOffsetS13() const; 1165 1166 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1167 1168 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1169 OptionalImmIndexMap &OptionalIdx); 1170 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1171 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1172 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1173 1174 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1175 1176 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1177 bool IsAtomic = false); 1178 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1179 1180 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1181 AMDGPUOperand::Ptr defaultRowMask() const; 1182 AMDGPUOperand::Ptr defaultBankMask() const; 1183 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1184 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1185 1186 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1187 AMDGPUOperand::ImmTy Type); 1188 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1189 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1190 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1191 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1192 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1193 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1194 uint64_t BasicInstType, bool skipVcc = false); 1195 1196 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1197 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1198 }; 1199 1200 struct OptionalOperand { 1201 const char *Name; 1202 AMDGPUOperand::ImmTy Type; 1203 bool IsBit; 1204 bool (*ConvertResult)(int64_t&); 1205 }; 1206 1207 } // end anonymous namespace 1208 1209 // May be called with integer type with equivalent bitwidth. 1210 static const fltSemantics *getFltSemantics(unsigned Size) { 1211 switch (Size) { 1212 case 4: 1213 return &APFloat::IEEEsingle(); 1214 case 8: 1215 return &APFloat::IEEEdouble(); 1216 case 2: 1217 return &APFloat::IEEEhalf(); 1218 default: 1219 llvm_unreachable("unsupported fp type"); 1220 } 1221 } 1222 1223 static const fltSemantics *getFltSemantics(MVT VT) { 1224 return getFltSemantics(VT.getSizeInBits() / 8); 1225 } 1226 1227 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1228 switch (OperandType) { 1229 case AMDGPU::OPERAND_REG_IMM_INT32: 1230 case AMDGPU::OPERAND_REG_IMM_FP32: 1231 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1232 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1233 return &APFloat::IEEEsingle(); 1234 case AMDGPU::OPERAND_REG_IMM_INT64: 1235 case AMDGPU::OPERAND_REG_IMM_FP64: 1236 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1237 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1238 return &APFloat::IEEEdouble(); 1239 case AMDGPU::OPERAND_REG_IMM_INT16: 1240 case AMDGPU::OPERAND_REG_IMM_FP16: 1241 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1242 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1243 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1244 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1245 return &APFloat::IEEEhalf(); 1246 default: 1247 llvm_unreachable("unsupported fp type"); 1248 } 1249 } 1250 1251 //===----------------------------------------------------------------------===// 1252 // Operand 1253 //===----------------------------------------------------------------------===// 1254 1255 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1256 bool Lost; 1257 1258 // Convert literal to single precision 1259 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1260 APFloat::rmNearestTiesToEven, 1261 &Lost); 1262 // We allow precision lost but not overflow or underflow 1263 if (Status != APFloat::opOK && 1264 Lost && 1265 ((Status & APFloat::opOverflow) != 0 || 1266 (Status & APFloat::opUnderflow) != 0)) { 1267 return false; 1268 } 1269 1270 return true; 1271 } 1272 1273 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1274 if (!isImmTy(ImmTyNone)) { 1275 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1276 return false; 1277 } 1278 // TODO: We should avoid using host float here. It would be better to 1279 // check the float bit values which is what a few other places do. 1280 // We've had bot failures before due to weird NaN support on mips hosts. 1281 1282 APInt Literal(64, Imm.Val); 1283 1284 if (Imm.IsFPImm) { // We got fp literal token 1285 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1286 return AMDGPU::isInlinableLiteral64(Imm.Val, 1287 AsmParser->hasInv2PiInlineImm()); 1288 } 1289 1290 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1291 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1292 return false; 1293 1294 if (type.getScalarSizeInBits() == 16) { 1295 return AMDGPU::isInlinableLiteral16( 1296 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1297 AsmParser->hasInv2PiInlineImm()); 1298 } 1299 1300 // Check if single precision literal is inlinable 1301 return AMDGPU::isInlinableLiteral32( 1302 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1303 AsmParser->hasInv2PiInlineImm()); 1304 } 1305 1306 // We got int literal token. 1307 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1308 return AMDGPU::isInlinableLiteral64(Imm.Val, 1309 AsmParser->hasInv2PiInlineImm()); 1310 } 1311 1312 if (type.getScalarSizeInBits() == 16) { 1313 return AMDGPU::isInlinableLiteral16( 1314 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1315 AsmParser->hasInv2PiInlineImm()); 1316 } 1317 1318 return AMDGPU::isInlinableLiteral32( 1319 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1320 AsmParser->hasInv2PiInlineImm()); 1321 } 1322 1323 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1324 // Check that this immediate can be added as literal 1325 if (!isImmTy(ImmTyNone)) { 1326 return false; 1327 } 1328 1329 if (!Imm.IsFPImm) { 1330 // We got int literal token. 1331 1332 if (type == MVT::f64 && hasFPModifiers()) { 1333 // Cannot apply fp modifiers to int literals preserving the same semantics 1334 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1335 // disable these cases. 1336 return false; 1337 } 1338 1339 unsigned Size = type.getSizeInBits(); 1340 if (Size == 64) 1341 Size = 32; 1342 1343 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1344 // types. 1345 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1346 } 1347 1348 // We got fp literal token 1349 if (type == MVT::f64) { // Expected 64-bit fp operand 1350 // We would set low 64-bits of literal to zeroes but we accept this literals 1351 return true; 1352 } 1353 1354 if (type == MVT::i64) { // Expected 64-bit int operand 1355 // We don't allow fp literals in 64-bit integer instructions. It is 1356 // unclear how we should encode them. 1357 return false; 1358 } 1359 1360 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1361 return canLosslesslyConvertToFPType(FPLiteral, type); 1362 } 1363 1364 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1365 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1366 } 1367 1368 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1369 if (AsmParser->isVI()) 1370 return isVReg32(); 1371 else if (AsmParser->isGFX9()) 1372 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1373 else 1374 return false; 1375 } 1376 1377 bool AMDGPUOperand::isSDWAFP16Operand() const { 1378 return isSDWAOperand(MVT::f16); 1379 } 1380 1381 bool AMDGPUOperand::isSDWAFP32Operand() const { 1382 return isSDWAOperand(MVT::f32); 1383 } 1384 1385 bool AMDGPUOperand::isSDWAInt16Operand() const { 1386 return isSDWAOperand(MVT::i16); 1387 } 1388 1389 bool AMDGPUOperand::isSDWAInt32Operand() const { 1390 return isSDWAOperand(MVT::i32); 1391 } 1392 1393 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1394 { 1395 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1396 assert(Size == 2 || Size == 4 || Size == 8); 1397 1398 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1399 1400 if (Imm.Mods.Abs) { 1401 Val &= ~FpSignMask; 1402 } 1403 if (Imm.Mods.Neg) { 1404 Val ^= FpSignMask; 1405 } 1406 1407 return Val; 1408 } 1409 1410 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1411 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1412 Inst.getNumOperands())) { 1413 addLiteralImmOperand(Inst, Imm.Val, 1414 ApplyModifiers & 1415 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1416 } else { 1417 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1418 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1419 } 1420 } 1421 1422 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1423 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1424 auto OpNum = Inst.getNumOperands(); 1425 // Check that this operand accepts literals 1426 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1427 1428 if (ApplyModifiers) { 1429 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1430 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1431 Val = applyInputFPModifiers(Val, Size); 1432 } 1433 1434 APInt Literal(64, Val); 1435 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1436 1437 if (Imm.IsFPImm) { // We got fp literal token 1438 switch (OpTy) { 1439 case AMDGPU::OPERAND_REG_IMM_INT64: 1440 case AMDGPU::OPERAND_REG_IMM_FP64: 1441 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1442 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1443 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1444 AsmParser->hasInv2PiInlineImm())) { 1445 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1446 return; 1447 } 1448 1449 // Non-inlineable 1450 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1451 // For fp operands we check if low 32 bits are zeros 1452 if (Literal.getLoBits(32) != 0) { 1453 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1454 "Can't encode literal as exact 64-bit floating-point operand. " 1455 "Low 32-bits will be set to zero"); 1456 } 1457 1458 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1459 return; 1460 } 1461 1462 // We don't allow fp literals in 64-bit integer instructions. It is 1463 // unclear how we should encode them. This case should be checked earlier 1464 // in predicate methods (isLiteralImm()) 1465 llvm_unreachable("fp literal in 64-bit integer instruction."); 1466 1467 case AMDGPU::OPERAND_REG_IMM_INT32: 1468 case AMDGPU::OPERAND_REG_IMM_FP32: 1469 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1470 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1471 case AMDGPU::OPERAND_REG_IMM_INT16: 1472 case AMDGPU::OPERAND_REG_IMM_FP16: 1473 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1474 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1475 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1476 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1477 bool lost; 1478 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1479 // Convert literal to single precision 1480 FPLiteral.convert(*getOpFltSemantics(OpTy), 1481 APFloat::rmNearestTiesToEven, &lost); 1482 // We allow precision lost but not overflow or underflow. This should be 1483 // checked earlier in isLiteralImm() 1484 1485 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1486 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1487 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1488 ImmVal |= (ImmVal << 16); 1489 } 1490 1491 Inst.addOperand(MCOperand::createImm(ImmVal)); 1492 return; 1493 } 1494 default: 1495 llvm_unreachable("invalid operand size"); 1496 } 1497 1498 return; 1499 } 1500 1501 // We got int literal token. 1502 // Only sign extend inline immediates. 1503 // FIXME: No errors on truncation 1504 switch (OpTy) { 1505 case AMDGPU::OPERAND_REG_IMM_INT32: 1506 case AMDGPU::OPERAND_REG_IMM_FP32: 1507 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1508 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1509 if (isInt<32>(Val) && 1510 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1511 AsmParser->hasInv2PiInlineImm())) { 1512 Inst.addOperand(MCOperand::createImm(Val)); 1513 return; 1514 } 1515 1516 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1517 return; 1518 1519 case AMDGPU::OPERAND_REG_IMM_INT64: 1520 case AMDGPU::OPERAND_REG_IMM_FP64: 1521 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1522 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1523 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1524 Inst.addOperand(MCOperand::createImm(Val)); 1525 return; 1526 } 1527 1528 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1529 return; 1530 1531 case AMDGPU::OPERAND_REG_IMM_INT16: 1532 case AMDGPU::OPERAND_REG_IMM_FP16: 1533 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1534 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1535 if (isInt<16>(Val) && 1536 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1537 AsmParser->hasInv2PiInlineImm())) { 1538 Inst.addOperand(MCOperand::createImm(Val)); 1539 return; 1540 } 1541 1542 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1543 return; 1544 1545 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1546 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1547 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1548 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1549 AsmParser->hasInv2PiInlineImm())); 1550 1551 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1552 static_cast<uint32_t>(LiteralVal); 1553 Inst.addOperand(MCOperand::createImm(ImmVal)); 1554 return; 1555 } 1556 default: 1557 llvm_unreachable("invalid operand size"); 1558 } 1559 } 1560 1561 template <unsigned Bitwidth> 1562 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1563 APInt Literal(64, Imm.Val); 1564 1565 if (!Imm.IsFPImm) { 1566 // We got int literal token. 1567 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1568 return; 1569 } 1570 1571 bool Lost; 1572 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1573 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1574 APFloat::rmNearestTiesToEven, &Lost); 1575 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1576 } 1577 1578 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1579 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1580 } 1581 1582 //===----------------------------------------------------------------------===// 1583 // AsmParser 1584 //===----------------------------------------------------------------------===// 1585 1586 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1587 if (Is == IS_VGPR) { 1588 switch (RegWidth) { 1589 default: return -1; 1590 case 1: return AMDGPU::VGPR_32RegClassID; 1591 case 2: return AMDGPU::VReg_64RegClassID; 1592 case 3: return AMDGPU::VReg_96RegClassID; 1593 case 4: return AMDGPU::VReg_128RegClassID; 1594 case 8: return AMDGPU::VReg_256RegClassID; 1595 case 16: return AMDGPU::VReg_512RegClassID; 1596 } 1597 } else if (Is == IS_TTMP) { 1598 switch (RegWidth) { 1599 default: return -1; 1600 case 1: return AMDGPU::TTMP_32RegClassID; 1601 case 2: return AMDGPU::TTMP_64RegClassID; 1602 case 4: return AMDGPU::TTMP_128RegClassID; 1603 case 8: return AMDGPU::TTMP_256RegClassID; 1604 case 16: return AMDGPU::TTMP_512RegClassID; 1605 } 1606 } else if (Is == IS_SGPR) { 1607 switch (RegWidth) { 1608 default: return -1; 1609 case 1: return AMDGPU::SGPR_32RegClassID; 1610 case 2: return AMDGPU::SGPR_64RegClassID; 1611 case 4: return AMDGPU::SGPR_128RegClassID; 1612 case 8: return AMDGPU::SGPR_256RegClassID; 1613 case 16: return AMDGPU::SGPR_512RegClassID; 1614 } 1615 } 1616 return -1; 1617 } 1618 1619 static unsigned getSpecialRegForName(StringRef RegName) { 1620 return StringSwitch<unsigned>(RegName) 1621 .Case("exec", AMDGPU::EXEC) 1622 .Case("vcc", AMDGPU::VCC) 1623 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1624 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1625 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1626 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1627 .Case("m0", AMDGPU::M0) 1628 .Case("scc", AMDGPU::SCC) 1629 .Case("tba", AMDGPU::TBA) 1630 .Case("tma", AMDGPU::TMA) 1631 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1632 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1633 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1634 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1635 .Case("vcc_lo", AMDGPU::VCC_LO) 1636 .Case("vcc_hi", AMDGPU::VCC_HI) 1637 .Case("exec_lo", AMDGPU::EXEC_LO) 1638 .Case("exec_hi", AMDGPU::EXEC_HI) 1639 .Case("tma_lo", AMDGPU::TMA_LO) 1640 .Case("tma_hi", AMDGPU::TMA_HI) 1641 .Case("tba_lo", AMDGPU::TBA_LO) 1642 .Case("tba_hi", AMDGPU::TBA_HI) 1643 .Default(0); 1644 } 1645 1646 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1647 SMLoc &EndLoc) { 1648 auto R = parseRegister(); 1649 if (!R) return true; 1650 assert(R->isReg()); 1651 RegNo = R->getReg(); 1652 StartLoc = R->getStartLoc(); 1653 EndLoc = R->getEndLoc(); 1654 return false; 1655 } 1656 1657 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1658 RegisterKind RegKind, unsigned Reg1, 1659 unsigned RegNum) { 1660 switch (RegKind) { 1661 case IS_SPECIAL: 1662 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1663 Reg = AMDGPU::EXEC; 1664 RegWidth = 2; 1665 return true; 1666 } 1667 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1668 Reg = AMDGPU::FLAT_SCR; 1669 RegWidth = 2; 1670 return true; 1671 } 1672 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1673 Reg = AMDGPU::XNACK_MASK; 1674 RegWidth = 2; 1675 return true; 1676 } 1677 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1678 Reg = AMDGPU::VCC; 1679 RegWidth = 2; 1680 return true; 1681 } 1682 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1683 Reg = AMDGPU::TBA; 1684 RegWidth = 2; 1685 return true; 1686 } 1687 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1688 Reg = AMDGPU::TMA; 1689 RegWidth = 2; 1690 return true; 1691 } 1692 return false; 1693 case IS_VGPR: 1694 case IS_SGPR: 1695 case IS_TTMP: 1696 if (Reg1 != Reg + RegWidth) { 1697 return false; 1698 } 1699 RegWidth++; 1700 return true; 1701 default: 1702 llvm_unreachable("unexpected register kind"); 1703 } 1704 } 1705 1706 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1707 unsigned &RegNum, unsigned &RegWidth, 1708 unsigned *DwordRegIndex) { 1709 if (DwordRegIndex) { *DwordRegIndex = 0; } 1710 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1711 if (getLexer().is(AsmToken::Identifier)) { 1712 StringRef RegName = Parser.getTok().getString(); 1713 if ((Reg = getSpecialRegForName(RegName))) { 1714 Parser.Lex(); 1715 RegKind = IS_SPECIAL; 1716 } else { 1717 unsigned RegNumIndex = 0; 1718 if (RegName[0] == 'v') { 1719 RegNumIndex = 1; 1720 RegKind = IS_VGPR; 1721 } else if (RegName[0] == 's') { 1722 RegNumIndex = 1; 1723 RegKind = IS_SGPR; 1724 } else if (RegName.startswith("ttmp")) { 1725 RegNumIndex = strlen("ttmp"); 1726 RegKind = IS_TTMP; 1727 } else { 1728 return false; 1729 } 1730 if (RegName.size() > RegNumIndex) { 1731 // Single 32-bit register: vXX. 1732 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1733 return false; 1734 Parser.Lex(); 1735 RegWidth = 1; 1736 } else { 1737 // Range of registers: v[XX:YY]. ":YY" is optional. 1738 Parser.Lex(); 1739 int64_t RegLo, RegHi; 1740 if (getLexer().isNot(AsmToken::LBrac)) 1741 return false; 1742 Parser.Lex(); 1743 1744 if (getParser().parseAbsoluteExpression(RegLo)) 1745 return false; 1746 1747 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1748 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1749 return false; 1750 Parser.Lex(); 1751 1752 if (isRBrace) { 1753 RegHi = RegLo; 1754 } else { 1755 if (getParser().parseAbsoluteExpression(RegHi)) 1756 return false; 1757 1758 if (getLexer().isNot(AsmToken::RBrac)) 1759 return false; 1760 Parser.Lex(); 1761 } 1762 RegNum = (unsigned) RegLo; 1763 RegWidth = (RegHi - RegLo) + 1; 1764 } 1765 } 1766 } else if (getLexer().is(AsmToken::LBrac)) { 1767 // List of consecutive registers: [s0,s1,s2,s3] 1768 Parser.Lex(); 1769 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1770 return false; 1771 if (RegWidth != 1) 1772 return false; 1773 RegisterKind RegKind1; 1774 unsigned Reg1, RegNum1, RegWidth1; 1775 do { 1776 if (getLexer().is(AsmToken::Comma)) { 1777 Parser.Lex(); 1778 } else if (getLexer().is(AsmToken::RBrac)) { 1779 Parser.Lex(); 1780 break; 1781 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1782 if (RegWidth1 != 1) { 1783 return false; 1784 } 1785 if (RegKind1 != RegKind) { 1786 return false; 1787 } 1788 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1789 return false; 1790 } 1791 } else { 1792 return false; 1793 } 1794 } while (true); 1795 } else { 1796 return false; 1797 } 1798 switch (RegKind) { 1799 case IS_SPECIAL: 1800 RegNum = 0; 1801 RegWidth = 1; 1802 break; 1803 case IS_VGPR: 1804 case IS_SGPR: 1805 case IS_TTMP: 1806 { 1807 unsigned Size = 1; 1808 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1809 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1810 Size = std::min(RegWidth, 4u); 1811 } 1812 if (RegNum % Size != 0) 1813 return false; 1814 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1815 RegNum = RegNum / Size; 1816 int RCID = getRegClass(RegKind, RegWidth); 1817 if (RCID == -1) 1818 return false; 1819 const MCRegisterClass RC = TRI->getRegClass(RCID); 1820 if (RegNum >= RC.getNumRegs()) 1821 return false; 1822 Reg = RC.getRegister(RegNum); 1823 break; 1824 } 1825 1826 default: 1827 llvm_unreachable("unexpected register kind"); 1828 } 1829 1830 if (!subtargetHasRegister(*TRI, Reg)) 1831 return false; 1832 return true; 1833 } 1834 1835 Optional<StringRef> 1836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1837 switch (RegKind) { 1838 case IS_VGPR: 1839 return StringRef(".amdgcn.next_free_vgpr"); 1840 case IS_SGPR: 1841 return StringRef(".amdgcn.next_free_sgpr"); 1842 default: 1843 return None; 1844 } 1845 } 1846 1847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1848 auto SymbolName = getGprCountSymbolName(RegKind); 1849 assert(SymbolName && "initializing invalid register kind"); 1850 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1851 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1852 } 1853 1854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1855 unsigned DwordRegIndex, 1856 unsigned RegWidth) { 1857 // Symbols are only defined for GCN targets 1858 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1859 return true; 1860 1861 auto SymbolName = getGprCountSymbolName(RegKind); 1862 if (!SymbolName) 1863 return true; 1864 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1865 1866 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1867 int64_t OldCount; 1868 1869 if (!Sym->isVariable()) 1870 return !Error(getParser().getTok().getLoc(), 1871 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1872 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1873 return !Error( 1874 getParser().getTok().getLoc(), 1875 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1876 1877 if (OldCount <= NewMax) 1878 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1879 1880 return true; 1881 } 1882 1883 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1884 const auto &Tok = Parser.getTok(); 1885 SMLoc StartLoc = Tok.getLoc(); 1886 SMLoc EndLoc = Tok.getEndLoc(); 1887 RegisterKind RegKind; 1888 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1889 1890 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1891 return nullptr; 1892 } 1893 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1894 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1895 return nullptr; 1896 } else 1897 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1898 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1899 } 1900 1901 bool 1902 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1903 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1904 (getLexer().getKind() == AsmToken::Integer || 1905 getLexer().getKind() == AsmToken::Real)) { 1906 // This is a workaround for handling operands like these: 1907 // |1.0| 1908 // |-1| 1909 // This syntax is not compatible with syntax of standard 1910 // MC expressions (due to the trailing '|'). 1911 1912 SMLoc EndLoc; 1913 const MCExpr *Expr; 1914 1915 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1916 return true; 1917 } 1918 1919 return !Expr->evaluateAsAbsolute(Val); 1920 } 1921 1922 return getParser().parseAbsoluteExpression(Val); 1923 } 1924 1925 OperandMatchResultTy 1926 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1927 // TODO: add syntactic sugar for 1/(2*PI) 1928 bool Minus = false; 1929 if (getLexer().getKind() == AsmToken::Minus) { 1930 const AsmToken NextToken = getLexer().peekTok(); 1931 if (!NextToken.is(AsmToken::Integer) && 1932 !NextToken.is(AsmToken::Real)) { 1933 return MatchOperand_NoMatch; 1934 } 1935 Minus = true; 1936 Parser.Lex(); 1937 } 1938 1939 SMLoc S = Parser.getTok().getLoc(); 1940 switch(getLexer().getKind()) { 1941 case AsmToken::Integer: { 1942 int64_t IntVal; 1943 if (parseAbsoluteExpr(IntVal, AbsMod)) 1944 return MatchOperand_ParseFail; 1945 if (Minus) 1946 IntVal *= -1; 1947 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1948 return MatchOperand_Success; 1949 } 1950 case AsmToken::Real: { 1951 int64_t IntVal; 1952 if (parseAbsoluteExpr(IntVal, AbsMod)) 1953 return MatchOperand_ParseFail; 1954 1955 APFloat F(BitsToDouble(IntVal)); 1956 if (Minus) 1957 F.changeSign(); 1958 Operands.push_back( 1959 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1960 AMDGPUOperand::ImmTyNone, true)); 1961 return MatchOperand_Success; 1962 } 1963 default: 1964 return MatchOperand_NoMatch; 1965 } 1966 } 1967 1968 OperandMatchResultTy 1969 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1970 if (auto R = parseRegister()) { 1971 assert(R->isReg()); 1972 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1973 Operands.push_back(std::move(R)); 1974 return MatchOperand_Success; 1975 } 1976 return MatchOperand_NoMatch; 1977 } 1978 1979 OperandMatchResultTy 1980 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1981 auto res = parseImm(Operands, AbsMod); 1982 if (res != MatchOperand_NoMatch) { 1983 return res; 1984 } 1985 1986 return parseReg(Operands); 1987 } 1988 1989 OperandMatchResultTy 1990 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1991 bool AllowImm) { 1992 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1993 1994 if (getLexer().getKind()== AsmToken::Minus) { 1995 const AsmToken NextToken = getLexer().peekTok(); 1996 1997 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1998 if (NextToken.is(AsmToken::Minus)) { 1999 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 2000 return MatchOperand_ParseFail; 2001 } 2002 2003 // '-' followed by an integer literal N should be interpreted as integer 2004 // negation rather than a floating-point NEG modifier applied to N. 2005 // Beside being contr-intuitive, such use of floating-point NEG modifier 2006 // results in different meaning of integer literals used with VOP1/2/C 2007 // and VOP3, for example: 2008 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2009 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2010 // Negative fp literals should be handled likewise for unifomtity 2011 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 2012 Parser.Lex(); 2013 Negate = true; 2014 } 2015 } 2016 2017 if (getLexer().getKind() == AsmToken::Identifier && 2018 Parser.getTok().getString() == "neg") { 2019 if (Negate) { 2020 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2021 return MatchOperand_ParseFail; 2022 } 2023 Parser.Lex(); 2024 Negate2 = true; 2025 if (getLexer().isNot(AsmToken::LParen)) { 2026 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2027 return MatchOperand_ParseFail; 2028 } 2029 Parser.Lex(); 2030 } 2031 2032 if (getLexer().getKind() == AsmToken::Identifier && 2033 Parser.getTok().getString() == "abs") { 2034 Parser.Lex(); 2035 Abs2 = true; 2036 if (getLexer().isNot(AsmToken::LParen)) { 2037 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2038 return MatchOperand_ParseFail; 2039 } 2040 Parser.Lex(); 2041 } 2042 2043 if (getLexer().getKind() == AsmToken::Pipe) { 2044 if (Abs2) { 2045 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2046 return MatchOperand_ParseFail; 2047 } 2048 Parser.Lex(); 2049 Abs = true; 2050 } 2051 2052 OperandMatchResultTy Res; 2053 if (AllowImm) { 2054 Res = parseRegOrImm(Operands, Abs); 2055 } else { 2056 Res = parseReg(Operands); 2057 } 2058 if (Res != MatchOperand_Success) { 2059 return Res; 2060 } 2061 2062 AMDGPUOperand::Modifiers Mods; 2063 if (Abs) { 2064 if (getLexer().getKind() != AsmToken::Pipe) { 2065 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2066 return MatchOperand_ParseFail; 2067 } 2068 Parser.Lex(); 2069 Mods.Abs = true; 2070 } 2071 if (Abs2) { 2072 if (getLexer().isNot(AsmToken::RParen)) { 2073 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2074 return MatchOperand_ParseFail; 2075 } 2076 Parser.Lex(); 2077 Mods.Abs = true; 2078 } 2079 2080 if (Negate) { 2081 Mods.Neg = true; 2082 } else if (Negate2) { 2083 if (getLexer().isNot(AsmToken::RParen)) { 2084 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2085 return MatchOperand_ParseFail; 2086 } 2087 Parser.Lex(); 2088 Mods.Neg = true; 2089 } 2090 2091 if (Mods.hasFPModifiers()) { 2092 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2093 Op.setModifiers(Mods); 2094 } 2095 return MatchOperand_Success; 2096 } 2097 2098 OperandMatchResultTy 2099 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2100 bool AllowImm) { 2101 bool Sext = false; 2102 2103 if (getLexer().getKind() == AsmToken::Identifier && 2104 Parser.getTok().getString() == "sext") { 2105 Parser.Lex(); 2106 Sext = true; 2107 if (getLexer().isNot(AsmToken::LParen)) { 2108 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2109 return MatchOperand_ParseFail; 2110 } 2111 Parser.Lex(); 2112 } 2113 2114 OperandMatchResultTy Res; 2115 if (AllowImm) { 2116 Res = parseRegOrImm(Operands); 2117 } else { 2118 Res = parseReg(Operands); 2119 } 2120 if (Res != MatchOperand_Success) { 2121 return Res; 2122 } 2123 2124 AMDGPUOperand::Modifiers Mods; 2125 if (Sext) { 2126 if (getLexer().isNot(AsmToken::RParen)) { 2127 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2128 return MatchOperand_ParseFail; 2129 } 2130 Parser.Lex(); 2131 Mods.Sext = true; 2132 } 2133 2134 if (Mods.hasIntModifiers()) { 2135 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2136 Op.setModifiers(Mods); 2137 } 2138 2139 return MatchOperand_Success; 2140 } 2141 2142 OperandMatchResultTy 2143 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2144 return parseRegOrImmWithFPInputMods(Operands, false); 2145 } 2146 2147 OperandMatchResultTy 2148 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2149 return parseRegOrImmWithIntInputMods(Operands, false); 2150 } 2151 2152 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2153 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2154 if (Reg) { 2155 Operands.push_back(std::move(Reg)); 2156 return MatchOperand_Success; 2157 } 2158 2159 const AsmToken &Tok = Parser.getTok(); 2160 if (Tok.getString() == "off") { 2161 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2162 AMDGPUOperand::ImmTyOff, false)); 2163 Parser.Lex(); 2164 return MatchOperand_Success; 2165 } 2166 2167 return MatchOperand_NoMatch; 2168 } 2169 2170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2171 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2172 2173 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2174 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2175 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2176 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2177 return Match_InvalidOperand; 2178 2179 if ((TSFlags & SIInstrFlags::VOP3) && 2180 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2181 getForcedEncodingSize() != 64) 2182 return Match_PreferE32; 2183 2184 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2185 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2186 // v_mac_f32/16 allow only dst_sel == DWORD; 2187 auto OpNum = 2188 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2189 const auto &Op = Inst.getOperand(OpNum); 2190 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2191 return Match_InvalidOperand; 2192 } 2193 } 2194 2195 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2196 // FIXME: Produces error without correct column reported. 2197 auto OpNum = 2198 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2199 const auto &Op = Inst.getOperand(OpNum); 2200 if (Op.getImm() != 0) 2201 return Match_InvalidOperand; 2202 } 2203 2204 return Match_Success; 2205 } 2206 2207 // What asm variants we should check 2208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2209 if (getForcedEncodingSize() == 32) { 2210 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2211 return makeArrayRef(Variants); 2212 } 2213 2214 if (isForcedVOP3()) { 2215 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2216 return makeArrayRef(Variants); 2217 } 2218 2219 if (isForcedSDWA()) { 2220 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2221 AMDGPUAsmVariants::SDWA9}; 2222 return makeArrayRef(Variants); 2223 } 2224 2225 if (isForcedDPP()) { 2226 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2227 return makeArrayRef(Variants); 2228 } 2229 2230 static const unsigned Variants[] = { 2231 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2232 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2233 }; 2234 2235 return makeArrayRef(Variants); 2236 } 2237 2238 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2239 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2240 const unsigned Num = Desc.getNumImplicitUses(); 2241 for (unsigned i = 0; i < Num; ++i) { 2242 unsigned Reg = Desc.ImplicitUses[i]; 2243 switch (Reg) { 2244 case AMDGPU::FLAT_SCR: 2245 case AMDGPU::VCC: 2246 case AMDGPU::M0: 2247 return Reg; 2248 default: 2249 break; 2250 } 2251 } 2252 return AMDGPU::NoRegister; 2253 } 2254 2255 // NB: This code is correct only when used to check constant 2256 // bus limitations because GFX7 support no f16 inline constants. 2257 // Note that there are no cases when a GFX7 opcode violates 2258 // constant bus limitations due to the use of an f16 constant. 2259 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2260 unsigned OpIdx) const { 2261 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2262 2263 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2264 return false; 2265 } 2266 2267 const MCOperand &MO = Inst.getOperand(OpIdx); 2268 2269 int64_t Val = MO.getImm(); 2270 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2271 2272 switch (OpSize) { // expected operand size 2273 case 8: 2274 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2275 case 4: 2276 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2277 case 2: { 2278 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2279 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2280 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2281 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2282 } else { 2283 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2284 } 2285 } 2286 default: 2287 llvm_unreachable("invalid operand size"); 2288 } 2289 } 2290 2291 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2292 const MCOperand &MO = Inst.getOperand(OpIdx); 2293 if (MO.isImm()) { 2294 return !isInlineConstant(Inst, OpIdx); 2295 } 2296 return !MO.isReg() || 2297 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2298 } 2299 2300 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2301 const unsigned Opcode = Inst.getOpcode(); 2302 const MCInstrDesc &Desc = MII.get(Opcode); 2303 unsigned ConstantBusUseCount = 0; 2304 2305 if (Desc.TSFlags & 2306 (SIInstrFlags::VOPC | 2307 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2308 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2309 SIInstrFlags::SDWA)) { 2310 // Check special imm operands (used by madmk, etc) 2311 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2312 ++ConstantBusUseCount; 2313 } 2314 2315 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2316 if (SGPRUsed != AMDGPU::NoRegister) { 2317 ++ConstantBusUseCount; 2318 } 2319 2320 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2321 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2322 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2323 2324 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2325 2326 for (int OpIdx : OpIndices) { 2327 if (OpIdx == -1) break; 2328 2329 const MCOperand &MO = Inst.getOperand(OpIdx); 2330 if (usesConstantBus(Inst, OpIdx)) { 2331 if (MO.isReg()) { 2332 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2333 // Pairs of registers with a partial intersections like these 2334 // s0, s[0:1] 2335 // flat_scratch_lo, flat_scratch 2336 // flat_scratch_lo, flat_scratch_hi 2337 // are theoretically valid but they are disabled anyway. 2338 // Note that this code mimics SIInstrInfo::verifyInstruction 2339 if (Reg != SGPRUsed) { 2340 ++ConstantBusUseCount; 2341 } 2342 SGPRUsed = Reg; 2343 } else { // Expression or a literal 2344 ++ConstantBusUseCount; 2345 } 2346 } 2347 } 2348 } 2349 2350 return ConstantBusUseCount <= 1; 2351 } 2352 2353 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2354 const unsigned Opcode = Inst.getOpcode(); 2355 const MCInstrDesc &Desc = MII.get(Opcode); 2356 2357 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2358 if (DstIdx == -1 || 2359 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2360 return true; 2361 } 2362 2363 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2364 2365 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2366 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2367 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2368 2369 assert(DstIdx != -1); 2370 const MCOperand &Dst = Inst.getOperand(DstIdx); 2371 assert(Dst.isReg()); 2372 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2373 2374 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2375 2376 for (int SrcIdx : SrcIndices) { 2377 if (SrcIdx == -1) break; 2378 const MCOperand &Src = Inst.getOperand(SrcIdx); 2379 if (Src.isReg()) { 2380 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2381 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2382 return false; 2383 } 2384 } 2385 } 2386 2387 return true; 2388 } 2389 2390 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2391 2392 const unsigned Opc = Inst.getOpcode(); 2393 const MCInstrDesc &Desc = MII.get(Opc); 2394 2395 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2396 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2397 assert(ClampIdx != -1); 2398 return Inst.getOperand(ClampIdx).getImm() == 0; 2399 } 2400 2401 return true; 2402 } 2403 2404 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2405 2406 const unsigned Opc = Inst.getOpcode(); 2407 const MCInstrDesc &Desc = MII.get(Opc); 2408 2409 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2410 return true; 2411 2412 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2413 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2414 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2415 2416 assert(VDataIdx != -1); 2417 assert(DMaskIdx != -1); 2418 assert(TFEIdx != -1); 2419 2420 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2421 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2422 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2423 if (DMask == 0) 2424 DMask = 1; 2425 2426 unsigned DataSize = 2427 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2428 if (hasPackedD16()) { 2429 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2430 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2431 DataSize = (DataSize + 1) / 2; 2432 } 2433 2434 return (VDataSize / 4) == DataSize + TFESize; 2435 } 2436 2437 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2438 2439 const unsigned Opc = Inst.getOpcode(); 2440 const MCInstrDesc &Desc = MII.get(Opc); 2441 2442 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2443 return true; 2444 if (!Desc.mayLoad() || !Desc.mayStore()) 2445 return true; // Not atomic 2446 2447 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2448 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2449 2450 // This is an incomplete check because image_atomic_cmpswap 2451 // may only use 0x3 and 0xf while other atomic operations 2452 // may use 0x1 and 0x3. However these limitations are 2453 // verified when we check that dmask matches dst size. 2454 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2455 } 2456 2457 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2458 2459 const unsigned Opc = Inst.getOpcode(); 2460 const MCInstrDesc &Desc = MII.get(Opc); 2461 2462 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2463 return true; 2464 2465 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2466 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2467 2468 // GATHER4 instructions use dmask in a different fashion compared to 2469 // other MIMG instructions. The only useful DMASK values are 2470 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2471 // (red,red,red,red) etc.) The ISA document doesn't mention 2472 // this. 2473 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2474 } 2475 2476 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2477 2478 const unsigned Opc = Inst.getOpcode(); 2479 const MCInstrDesc &Desc = MII.get(Opc); 2480 2481 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2482 return true; 2483 2484 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2485 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2486 if (isCI() || isSI()) 2487 return false; 2488 } 2489 2490 return true; 2491 } 2492 2493 static bool IsRevOpcode(const unsigned Opcode) 2494 { 2495 switch (Opcode) { 2496 case AMDGPU::V_SUBREV_F32_e32: 2497 case AMDGPU::V_SUBREV_F32_e64: 2498 case AMDGPU::V_SUBREV_F32_e32_si: 2499 case AMDGPU::V_SUBREV_F32_e32_vi: 2500 case AMDGPU::V_SUBREV_F32_e64_si: 2501 case AMDGPU::V_SUBREV_F32_e64_vi: 2502 case AMDGPU::V_SUBREV_I32_e32: 2503 case AMDGPU::V_SUBREV_I32_e64: 2504 case AMDGPU::V_SUBREV_I32_e32_si: 2505 case AMDGPU::V_SUBREV_I32_e64_si: 2506 case AMDGPU::V_SUBBREV_U32_e32: 2507 case AMDGPU::V_SUBBREV_U32_e64: 2508 case AMDGPU::V_SUBBREV_U32_e32_si: 2509 case AMDGPU::V_SUBBREV_U32_e32_vi: 2510 case AMDGPU::V_SUBBREV_U32_e64_si: 2511 case AMDGPU::V_SUBBREV_U32_e64_vi: 2512 case AMDGPU::V_SUBREV_U32_e32: 2513 case AMDGPU::V_SUBREV_U32_e64: 2514 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2515 case AMDGPU::V_SUBREV_U32_e32_vi: 2516 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2517 case AMDGPU::V_SUBREV_U32_e64_vi: 2518 case AMDGPU::V_SUBREV_F16_e32: 2519 case AMDGPU::V_SUBREV_F16_e64: 2520 case AMDGPU::V_SUBREV_F16_e32_vi: 2521 case AMDGPU::V_SUBREV_F16_e64_vi: 2522 case AMDGPU::V_SUBREV_U16_e32: 2523 case AMDGPU::V_SUBREV_U16_e64: 2524 case AMDGPU::V_SUBREV_U16_e32_vi: 2525 case AMDGPU::V_SUBREV_U16_e64_vi: 2526 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2527 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2528 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2529 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2530 case AMDGPU::V_LSHLREV_B32_e32_si: 2531 case AMDGPU::V_LSHLREV_B32_e64_si: 2532 case AMDGPU::V_LSHLREV_B16_e32_vi: 2533 case AMDGPU::V_LSHLREV_B16_e64_vi: 2534 case AMDGPU::V_LSHLREV_B32_e32_vi: 2535 case AMDGPU::V_LSHLREV_B32_e64_vi: 2536 case AMDGPU::V_LSHLREV_B64_vi: 2537 case AMDGPU::V_LSHRREV_B32_e32_si: 2538 case AMDGPU::V_LSHRREV_B32_e64_si: 2539 case AMDGPU::V_LSHRREV_B16_e32_vi: 2540 case AMDGPU::V_LSHRREV_B16_e64_vi: 2541 case AMDGPU::V_LSHRREV_B32_e32_vi: 2542 case AMDGPU::V_LSHRREV_B32_e64_vi: 2543 case AMDGPU::V_LSHRREV_B64_vi: 2544 case AMDGPU::V_ASHRREV_I32_e64_si: 2545 case AMDGPU::V_ASHRREV_I32_e32_si: 2546 case AMDGPU::V_ASHRREV_I16_e32_vi: 2547 case AMDGPU::V_ASHRREV_I16_e64_vi: 2548 case AMDGPU::V_ASHRREV_I32_e32_vi: 2549 case AMDGPU::V_ASHRREV_I32_e64_vi: 2550 case AMDGPU::V_ASHRREV_I64_vi: 2551 case AMDGPU::V_PK_LSHLREV_B16_vi: 2552 case AMDGPU::V_PK_LSHRREV_B16_vi: 2553 case AMDGPU::V_PK_ASHRREV_I16_vi: 2554 return true; 2555 default: 2556 return false; 2557 } 2558 } 2559 2560 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2561 2562 using namespace SIInstrFlags; 2563 const unsigned Opcode = Inst.getOpcode(); 2564 const MCInstrDesc &Desc = MII.get(Opcode); 2565 2566 // lds_direct register is defined so that it can be used 2567 // with 9-bit operands only. Ignore encodings which do not accept these. 2568 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2569 return true; 2570 2571 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2572 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2573 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2574 2575 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2576 2577 // lds_direct cannot be specified as either src1 or src2. 2578 for (int SrcIdx : SrcIndices) { 2579 if (SrcIdx == -1) break; 2580 const MCOperand &Src = Inst.getOperand(SrcIdx); 2581 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2582 return false; 2583 } 2584 } 2585 2586 if (Src0Idx == -1) 2587 return true; 2588 2589 const MCOperand &Src = Inst.getOperand(Src0Idx); 2590 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2591 return true; 2592 2593 // lds_direct is specified as src0. Check additional limitations. 2594 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2595 } 2596 2597 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2598 unsigned Opcode = Inst.getOpcode(); 2599 const MCInstrDesc &Desc = MII.get(Opcode); 2600 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2601 return true; 2602 2603 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2604 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2605 2606 const int OpIndices[] = { Src0Idx, Src1Idx }; 2607 2608 unsigned NumLiterals = 0; 2609 uint32_t LiteralValue; 2610 2611 for (int OpIdx : OpIndices) { 2612 if (OpIdx == -1) break; 2613 2614 const MCOperand &MO = Inst.getOperand(OpIdx); 2615 if (MO.isImm() && 2616 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2617 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2618 !isInlineConstant(Inst, OpIdx)) { 2619 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2620 if (NumLiterals == 0 || LiteralValue != Value) { 2621 LiteralValue = Value; 2622 ++NumLiterals; 2623 } 2624 } 2625 } 2626 2627 return NumLiterals <= 1; 2628 } 2629 2630 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2631 const SMLoc &IDLoc) { 2632 if (!validateLdsDirect(Inst)) { 2633 Error(IDLoc, 2634 "invalid use of lds_direct"); 2635 return false; 2636 } 2637 if (!validateSOPLiteral(Inst)) { 2638 Error(IDLoc, 2639 "only one literal operand is allowed"); 2640 return false; 2641 } 2642 if (!validateConstantBusLimitations(Inst)) { 2643 Error(IDLoc, 2644 "invalid operand (violates constant bus restrictions)"); 2645 return false; 2646 } 2647 if (!validateEarlyClobberLimitations(Inst)) { 2648 Error(IDLoc, 2649 "destination must be different than all sources"); 2650 return false; 2651 } 2652 if (!validateIntClampSupported(Inst)) { 2653 Error(IDLoc, 2654 "integer clamping is not supported on this GPU"); 2655 return false; 2656 } 2657 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2658 if (!validateMIMGD16(Inst)) { 2659 Error(IDLoc, 2660 "d16 modifier is not supported on this GPU"); 2661 return false; 2662 } 2663 if (!validateMIMGDataSize(Inst)) { 2664 Error(IDLoc, 2665 "image data size does not match dmask and tfe"); 2666 return false; 2667 } 2668 if (!validateMIMGAtomicDMask(Inst)) { 2669 Error(IDLoc, 2670 "invalid atomic image dmask"); 2671 return false; 2672 } 2673 if (!validateMIMGGatherDMask(Inst)) { 2674 Error(IDLoc, 2675 "invalid image_gather dmask: only one bit must be set"); 2676 return false; 2677 } 2678 2679 return true; 2680 } 2681 2682 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2683 const FeatureBitset &FBS, 2684 unsigned VariantID = 0); 2685 2686 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2687 OperandVector &Operands, 2688 MCStreamer &Out, 2689 uint64_t &ErrorInfo, 2690 bool MatchingInlineAsm) { 2691 MCInst Inst; 2692 unsigned Result = Match_Success; 2693 for (auto Variant : getMatchedVariants()) { 2694 uint64_t EI; 2695 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2696 Variant); 2697 // We order match statuses from least to most specific. We use most specific 2698 // status as resulting 2699 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2700 if ((R == Match_Success) || 2701 (R == Match_PreferE32) || 2702 (R == Match_MissingFeature && Result != Match_PreferE32) || 2703 (R == Match_InvalidOperand && Result != Match_MissingFeature 2704 && Result != Match_PreferE32) || 2705 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2706 && Result != Match_MissingFeature 2707 && Result != Match_PreferE32)) { 2708 Result = R; 2709 ErrorInfo = EI; 2710 } 2711 if (R == Match_Success) 2712 break; 2713 } 2714 2715 switch (Result) { 2716 default: break; 2717 case Match_Success: 2718 if (!validateInstruction(Inst, IDLoc)) { 2719 return true; 2720 } 2721 Inst.setLoc(IDLoc); 2722 Out.EmitInstruction(Inst, getSTI()); 2723 return false; 2724 2725 case Match_MissingFeature: 2726 return Error(IDLoc, "instruction not supported on this GPU"); 2727 2728 case Match_MnemonicFail: { 2729 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2730 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2731 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2732 return Error(IDLoc, "invalid instruction" + Suggestion, 2733 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2734 } 2735 2736 case Match_InvalidOperand: { 2737 SMLoc ErrorLoc = IDLoc; 2738 if (ErrorInfo != ~0ULL) { 2739 if (ErrorInfo >= Operands.size()) { 2740 return Error(IDLoc, "too few operands for instruction"); 2741 } 2742 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2743 if (ErrorLoc == SMLoc()) 2744 ErrorLoc = IDLoc; 2745 } 2746 return Error(ErrorLoc, "invalid operand for instruction"); 2747 } 2748 2749 case Match_PreferE32: 2750 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2751 "should be encoded as e32"); 2752 } 2753 llvm_unreachable("Implement any new match types added!"); 2754 } 2755 2756 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2757 int64_t Tmp = -1; 2758 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2759 return true; 2760 } 2761 if (getParser().parseAbsoluteExpression(Tmp)) { 2762 return true; 2763 } 2764 Ret = static_cast<uint32_t>(Tmp); 2765 return false; 2766 } 2767 2768 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2769 uint32_t &Minor) { 2770 if (ParseAsAbsoluteExpression(Major)) 2771 return TokError("invalid major version"); 2772 2773 if (getLexer().isNot(AsmToken::Comma)) 2774 return TokError("minor version number required, comma expected"); 2775 Lex(); 2776 2777 if (ParseAsAbsoluteExpression(Minor)) 2778 return TokError("invalid minor version"); 2779 2780 return false; 2781 } 2782 2783 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2784 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2785 return TokError("directive only supported for amdgcn architecture"); 2786 2787 std::string Target; 2788 2789 SMLoc TargetStart = getTok().getLoc(); 2790 if (getParser().parseEscapedString(Target)) 2791 return true; 2792 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2793 2794 std::string ExpectedTarget; 2795 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2796 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2797 2798 if (Target != ExpectedTargetOS.str()) 2799 return getParser().Error(TargetRange.Start, "target must match options", 2800 TargetRange); 2801 2802 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2803 return false; 2804 } 2805 2806 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2807 return getParser().Error(Range.Start, "value out of range", Range); 2808 } 2809 2810 bool AMDGPUAsmParser::calculateGPRBlocks( 2811 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2812 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2813 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2814 unsigned &SGPRBlocks) { 2815 // TODO(scott.linder): These calculations are duplicated from 2816 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2817 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2818 2819 unsigned NumVGPRs = NextFreeVGPR; 2820 unsigned NumSGPRs = NextFreeSGPR; 2821 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2822 2823 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2824 NumSGPRs > MaxAddressableNumSGPRs) 2825 return OutOfRangeError(SGPRRange); 2826 2827 NumSGPRs += 2828 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2829 2830 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2831 NumSGPRs > MaxAddressableNumSGPRs) 2832 return OutOfRangeError(SGPRRange); 2833 2834 if (Features.test(FeatureSGPRInitBug)) 2835 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2836 2837 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2838 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2839 2840 return false; 2841 } 2842 2843 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2844 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2845 return TokError("directive only supported for amdgcn architecture"); 2846 2847 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2848 return TokError("directive only supported for amdhsa OS"); 2849 2850 StringRef KernelName; 2851 if (getParser().parseIdentifier(KernelName)) 2852 return true; 2853 2854 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2855 2856 StringSet<> Seen; 2857 2858 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2859 2860 SMRange VGPRRange; 2861 uint64_t NextFreeVGPR = 0; 2862 SMRange SGPRRange; 2863 uint64_t NextFreeSGPR = 0; 2864 unsigned UserSGPRCount = 0; 2865 bool ReserveVCC = true; 2866 bool ReserveFlatScr = true; 2867 bool ReserveXNACK = hasXNACK(); 2868 2869 while (true) { 2870 while (getLexer().is(AsmToken::EndOfStatement)) 2871 Lex(); 2872 2873 if (getLexer().isNot(AsmToken::Identifier)) 2874 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2875 2876 StringRef ID = getTok().getIdentifier(); 2877 SMRange IDRange = getTok().getLocRange(); 2878 Lex(); 2879 2880 if (ID == ".end_amdhsa_kernel") 2881 break; 2882 2883 if (Seen.find(ID) != Seen.end()) 2884 return TokError(".amdhsa_ directives cannot be repeated"); 2885 Seen.insert(ID); 2886 2887 SMLoc ValStart = getTok().getLoc(); 2888 int64_t IVal; 2889 if (getParser().parseAbsoluteExpression(IVal)) 2890 return true; 2891 SMLoc ValEnd = getTok().getLoc(); 2892 SMRange ValRange = SMRange(ValStart, ValEnd); 2893 2894 if (IVal < 0) 2895 return OutOfRangeError(ValRange); 2896 2897 uint64_t Val = IVal; 2898 2899 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2900 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2901 return OutOfRangeError(RANGE); \ 2902 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2903 2904 if (ID == ".amdhsa_group_segment_fixed_size") { 2905 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2906 return OutOfRangeError(ValRange); 2907 KD.group_segment_fixed_size = Val; 2908 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2909 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2910 return OutOfRangeError(ValRange); 2911 KD.private_segment_fixed_size = Val; 2912 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2913 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2914 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2915 Val, ValRange); 2916 UserSGPRCount++; 2917 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2918 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2919 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2920 ValRange); 2921 UserSGPRCount++; 2922 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2923 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2924 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2925 ValRange); 2926 UserSGPRCount++; 2927 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2928 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2929 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2930 Val, ValRange); 2931 UserSGPRCount++; 2932 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2933 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2934 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2935 ValRange); 2936 UserSGPRCount++; 2937 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2938 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2939 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2940 ValRange); 2941 UserSGPRCount++; 2942 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2943 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2944 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2945 Val, ValRange); 2946 UserSGPRCount++; 2947 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2948 PARSE_BITS_ENTRY( 2949 KD.compute_pgm_rsrc2, 2950 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2951 ValRange); 2952 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2953 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2954 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2955 ValRange); 2956 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2957 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2958 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2959 ValRange); 2960 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2961 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2962 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2963 ValRange); 2964 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2965 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2966 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2967 ValRange); 2968 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2969 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2970 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2971 ValRange); 2972 } else if (ID == ".amdhsa_next_free_vgpr") { 2973 VGPRRange = ValRange; 2974 NextFreeVGPR = Val; 2975 } else if (ID == ".amdhsa_next_free_sgpr") { 2976 SGPRRange = ValRange; 2977 NextFreeSGPR = Val; 2978 } else if (ID == ".amdhsa_reserve_vcc") { 2979 if (!isUInt<1>(Val)) 2980 return OutOfRangeError(ValRange); 2981 ReserveVCC = Val; 2982 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2983 if (IVersion.Major < 7) 2984 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2985 IDRange); 2986 if (!isUInt<1>(Val)) 2987 return OutOfRangeError(ValRange); 2988 ReserveFlatScr = Val; 2989 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2990 if (IVersion.Major < 8) 2991 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2992 IDRange); 2993 if (!isUInt<1>(Val)) 2994 return OutOfRangeError(ValRange); 2995 ReserveXNACK = Val; 2996 } else if (ID == ".amdhsa_float_round_mode_32") { 2997 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2998 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2999 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3000 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3001 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3002 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3003 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3004 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3005 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3006 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3007 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3008 ValRange); 3009 } else if (ID == ".amdhsa_dx10_clamp") { 3010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3011 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3012 } else if (ID == ".amdhsa_ieee_mode") { 3013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3014 Val, ValRange); 3015 } else if (ID == ".amdhsa_fp16_overflow") { 3016 if (IVersion.Major < 9) 3017 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3018 IDRange); 3019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3020 ValRange); 3021 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3022 PARSE_BITS_ENTRY( 3023 KD.compute_pgm_rsrc2, 3024 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3025 ValRange); 3026 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3028 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3029 Val, ValRange); 3030 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3031 PARSE_BITS_ENTRY( 3032 KD.compute_pgm_rsrc2, 3033 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3034 ValRange); 3035 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3036 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3037 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3038 Val, ValRange); 3039 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3040 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3041 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3042 Val, ValRange); 3043 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3044 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3045 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3046 Val, ValRange); 3047 } else if (ID == ".amdhsa_exception_int_div_zero") { 3048 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3049 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3050 Val, ValRange); 3051 } else { 3052 return getParser().Error(IDRange.Start, 3053 "unknown .amdhsa_kernel directive", IDRange); 3054 } 3055 3056 #undef PARSE_BITS_ENTRY 3057 } 3058 3059 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3060 return TokError(".amdhsa_next_free_vgpr directive is required"); 3061 3062 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3063 return TokError(".amdhsa_next_free_sgpr directive is required"); 3064 3065 unsigned VGPRBlocks; 3066 unsigned SGPRBlocks; 3067 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3068 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3069 SGPRRange, VGPRBlocks, SGPRBlocks)) 3070 return true; 3071 3072 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3073 VGPRBlocks)) 3074 return OutOfRangeError(VGPRRange); 3075 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3076 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3077 3078 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3079 SGPRBlocks)) 3080 return OutOfRangeError(SGPRRange); 3081 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3082 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3083 SGPRBlocks); 3084 3085 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3086 return TokError("too many user SGPRs enabled"); 3087 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3088 UserSGPRCount); 3089 3090 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3091 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3092 ReserveFlatScr, ReserveXNACK); 3093 return false; 3094 } 3095 3096 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3097 uint32_t Major; 3098 uint32_t Minor; 3099 3100 if (ParseDirectiveMajorMinor(Major, Minor)) 3101 return true; 3102 3103 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3104 return false; 3105 } 3106 3107 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3108 uint32_t Major; 3109 uint32_t Minor; 3110 uint32_t Stepping; 3111 StringRef VendorName; 3112 StringRef ArchName; 3113 3114 // If this directive has no arguments, then use the ISA version for the 3115 // targeted GPU. 3116 if (getLexer().is(AsmToken::EndOfStatement)) { 3117 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3118 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3119 ISA.Stepping, 3120 "AMD", "AMDGPU"); 3121 return false; 3122 } 3123 3124 if (ParseDirectiveMajorMinor(Major, Minor)) 3125 return true; 3126 3127 if (getLexer().isNot(AsmToken::Comma)) 3128 return TokError("stepping version number required, comma expected"); 3129 Lex(); 3130 3131 if (ParseAsAbsoluteExpression(Stepping)) 3132 return TokError("invalid stepping version"); 3133 3134 if (getLexer().isNot(AsmToken::Comma)) 3135 return TokError("vendor name required, comma expected"); 3136 Lex(); 3137 3138 if (getLexer().isNot(AsmToken::String)) 3139 return TokError("invalid vendor name"); 3140 3141 VendorName = getLexer().getTok().getStringContents(); 3142 Lex(); 3143 3144 if (getLexer().isNot(AsmToken::Comma)) 3145 return TokError("arch name required, comma expected"); 3146 Lex(); 3147 3148 if (getLexer().isNot(AsmToken::String)) 3149 return TokError("invalid arch name"); 3150 3151 ArchName = getLexer().getTok().getStringContents(); 3152 Lex(); 3153 3154 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3155 VendorName, ArchName); 3156 return false; 3157 } 3158 3159 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3160 amd_kernel_code_t &Header) { 3161 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3162 // assembly for backwards compatibility. 3163 if (ID == "max_scratch_backing_memory_byte_size") { 3164 Parser.eatToEndOfStatement(); 3165 return false; 3166 } 3167 3168 SmallString<40> ErrStr; 3169 raw_svector_ostream Err(ErrStr); 3170 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3171 return TokError(Err.str()); 3172 } 3173 Lex(); 3174 return false; 3175 } 3176 3177 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3178 amd_kernel_code_t Header; 3179 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3180 3181 while (true) { 3182 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3183 // will set the current token to EndOfStatement. 3184 while(getLexer().is(AsmToken::EndOfStatement)) 3185 Lex(); 3186 3187 if (getLexer().isNot(AsmToken::Identifier)) 3188 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3189 3190 StringRef ID = getLexer().getTok().getIdentifier(); 3191 Lex(); 3192 3193 if (ID == ".end_amd_kernel_code_t") 3194 break; 3195 3196 if (ParseAMDKernelCodeTValue(ID, Header)) 3197 return true; 3198 } 3199 3200 getTargetStreamer().EmitAMDKernelCodeT(Header); 3201 3202 return false; 3203 } 3204 3205 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3206 if (getLexer().isNot(AsmToken::Identifier)) 3207 return TokError("expected symbol name"); 3208 3209 StringRef KernelName = Parser.getTok().getString(); 3210 3211 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3212 ELF::STT_AMDGPU_HSA_KERNEL); 3213 Lex(); 3214 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3215 KernelScope.initialize(getContext()); 3216 return false; 3217 } 3218 3219 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3220 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3221 return Error(getParser().getTok().getLoc(), 3222 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3223 "architectures"); 3224 } 3225 3226 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3227 3228 std::string ISAVersionStringFromSTI; 3229 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3230 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3231 3232 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3233 return Error(getParser().getTok().getLoc(), 3234 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3235 "arguments specified through the command line"); 3236 } 3237 3238 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3239 Lex(); 3240 3241 return false; 3242 } 3243 3244 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3245 const char *AssemblerDirectiveBegin; 3246 const char *AssemblerDirectiveEnd; 3247 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3248 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3249 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3250 HSAMD::V3::AssemblerDirectiveEnd) 3251 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3252 HSAMD::AssemblerDirectiveEnd); 3253 3254 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3255 return Error(getParser().getTok().getLoc(), 3256 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3257 "not available on non-amdhsa OSes")).str()); 3258 } 3259 3260 std::string HSAMetadataString; 3261 raw_string_ostream YamlStream(HSAMetadataString); 3262 3263 getLexer().setSkipSpace(false); 3264 3265 bool FoundEnd = false; 3266 while (!getLexer().is(AsmToken::Eof)) { 3267 while (getLexer().is(AsmToken::Space)) { 3268 YamlStream << getLexer().getTok().getString(); 3269 Lex(); 3270 } 3271 3272 if (getLexer().is(AsmToken::Identifier)) { 3273 StringRef ID = getLexer().getTok().getIdentifier(); 3274 if (ID == AssemblerDirectiveEnd) { 3275 Lex(); 3276 FoundEnd = true; 3277 break; 3278 } 3279 } 3280 3281 YamlStream << Parser.parseStringToEndOfStatement() 3282 << getContext().getAsmInfo()->getSeparatorString(); 3283 3284 Parser.eatToEndOfStatement(); 3285 } 3286 3287 getLexer().setSkipSpace(true); 3288 3289 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3290 return TokError(Twine("expected directive ") + 3291 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3292 } 3293 3294 YamlStream.flush(); 3295 3296 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3297 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3298 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3299 } else { 3300 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3301 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3302 } 3303 3304 return false; 3305 } 3306 3307 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3308 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3309 return Error(getParser().getTok().getLoc(), 3310 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3311 "not available on non-amdpal OSes")).str()); 3312 } 3313 3314 PALMD::Metadata PALMetadata; 3315 for (;;) { 3316 uint32_t Value; 3317 if (ParseAsAbsoluteExpression(Value)) { 3318 return TokError(Twine("invalid value in ") + 3319 Twine(PALMD::AssemblerDirective)); 3320 } 3321 PALMetadata.push_back(Value); 3322 if (getLexer().isNot(AsmToken::Comma)) 3323 break; 3324 Lex(); 3325 } 3326 getTargetStreamer().EmitPALMetadata(PALMetadata); 3327 return false; 3328 } 3329 3330 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3331 StringRef IDVal = DirectiveID.getString(); 3332 3333 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3334 if (IDVal == ".amdgcn_target") 3335 return ParseDirectiveAMDGCNTarget(); 3336 3337 if (IDVal == ".amdhsa_kernel") 3338 return ParseDirectiveAMDHSAKernel(); 3339 3340 // TODO: Restructure/combine with PAL metadata directive. 3341 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3342 return ParseDirectiveHSAMetadata(); 3343 } else { 3344 if (IDVal == ".hsa_code_object_version") 3345 return ParseDirectiveHSACodeObjectVersion(); 3346 3347 if (IDVal == ".hsa_code_object_isa") 3348 return ParseDirectiveHSACodeObjectISA(); 3349 3350 if (IDVal == ".amd_kernel_code_t") 3351 return ParseDirectiveAMDKernelCodeT(); 3352 3353 if (IDVal == ".amdgpu_hsa_kernel") 3354 return ParseDirectiveAMDGPUHsaKernel(); 3355 3356 if (IDVal == ".amd_amdgpu_isa") 3357 return ParseDirectiveISAVersion(); 3358 3359 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3360 return ParseDirectiveHSAMetadata(); 3361 } 3362 3363 if (IDVal == PALMD::AssemblerDirective) 3364 return ParseDirectivePALMetadata(); 3365 3366 return true; 3367 } 3368 3369 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3370 unsigned RegNo) const { 3371 3372 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3373 R.isValid(); ++R) { 3374 if (*R == RegNo) 3375 return isGFX9(); 3376 } 3377 3378 switch (RegNo) { 3379 case AMDGPU::TBA: 3380 case AMDGPU::TBA_LO: 3381 case AMDGPU::TBA_HI: 3382 case AMDGPU::TMA: 3383 case AMDGPU::TMA_LO: 3384 case AMDGPU::TMA_HI: 3385 return !isGFX9(); 3386 case AMDGPU::XNACK_MASK: 3387 case AMDGPU::XNACK_MASK_LO: 3388 case AMDGPU::XNACK_MASK_HI: 3389 return !isCI() && !isSI() && hasXNACK(); 3390 default: 3391 break; 3392 } 3393 3394 if (isCI()) 3395 return true; 3396 3397 if (isSI()) { 3398 // No flat_scr 3399 switch (RegNo) { 3400 case AMDGPU::FLAT_SCR: 3401 case AMDGPU::FLAT_SCR_LO: 3402 case AMDGPU::FLAT_SCR_HI: 3403 return false; 3404 default: 3405 return true; 3406 } 3407 } 3408 3409 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3410 // SI/CI have. 3411 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3412 R.isValid(); ++R) { 3413 if (*R == RegNo) 3414 return false; 3415 } 3416 3417 return true; 3418 } 3419 3420 OperandMatchResultTy 3421 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3422 // Try to parse with a custom parser 3423 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3424 3425 // If we successfully parsed the operand or if there as an error parsing, 3426 // we are done. 3427 // 3428 // If we are parsing after we reach EndOfStatement then this means we 3429 // are appending default values to the Operands list. This is only done 3430 // by custom parser, so we shouldn't continue on to the generic parsing. 3431 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3432 getLexer().is(AsmToken::EndOfStatement)) 3433 return ResTy; 3434 3435 ResTy = parseRegOrImm(Operands); 3436 3437 if (ResTy == MatchOperand_Success) 3438 return ResTy; 3439 3440 const auto &Tok = Parser.getTok(); 3441 SMLoc S = Tok.getLoc(); 3442 3443 const MCExpr *Expr = nullptr; 3444 if (!Parser.parseExpression(Expr)) { 3445 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3446 return MatchOperand_Success; 3447 } 3448 3449 // Possibly this is an instruction flag like 'gds'. 3450 if (Tok.getKind() == AsmToken::Identifier) { 3451 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3452 Parser.Lex(); 3453 return MatchOperand_Success; 3454 } 3455 3456 return MatchOperand_NoMatch; 3457 } 3458 3459 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3460 // Clear any forced encodings from the previous instruction. 3461 setForcedEncodingSize(0); 3462 setForcedDPP(false); 3463 setForcedSDWA(false); 3464 3465 if (Name.endswith("_e64")) { 3466 setForcedEncodingSize(64); 3467 return Name.substr(0, Name.size() - 4); 3468 } else if (Name.endswith("_e32")) { 3469 setForcedEncodingSize(32); 3470 return Name.substr(0, Name.size() - 4); 3471 } else if (Name.endswith("_dpp")) { 3472 setForcedDPP(true); 3473 return Name.substr(0, Name.size() - 4); 3474 } else if (Name.endswith("_sdwa")) { 3475 setForcedSDWA(true); 3476 return Name.substr(0, Name.size() - 5); 3477 } 3478 return Name; 3479 } 3480 3481 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3482 StringRef Name, 3483 SMLoc NameLoc, OperandVector &Operands) { 3484 // Add the instruction mnemonic 3485 Name = parseMnemonicSuffix(Name); 3486 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3487 3488 while (!getLexer().is(AsmToken::EndOfStatement)) { 3489 OperandMatchResultTy Res = parseOperand(Operands, Name); 3490 3491 // Eat the comma or space if there is one. 3492 if (getLexer().is(AsmToken::Comma)) 3493 Parser.Lex(); 3494 3495 switch (Res) { 3496 case MatchOperand_Success: break; 3497 case MatchOperand_ParseFail: 3498 Error(getLexer().getLoc(), "failed parsing operand."); 3499 while (!getLexer().is(AsmToken::EndOfStatement)) { 3500 Parser.Lex(); 3501 } 3502 return true; 3503 case MatchOperand_NoMatch: 3504 Error(getLexer().getLoc(), "not a valid operand."); 3505 while (!getLexer().is(AsmToken::EndOfStatement)) { 3506 Parser.Lex(); 3507 } 3508 return true; 3509 } 3510 } 3511 3512 return false; 3513 } 3514 3515 //===----------------------------------------------------------------------===// 3516 // Utility functions 3517 //===----------------------------------------------------------------------===// 3518 3519 OperandMatchResultTy 3520 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3521 switch(getLexer().getKind()) { 3522 default: return MatchOperand_NoMatch; 3523 case AsmToken::Identifier: { 3524 StringRef Name = Parser.getTok().getString(); 3525 if (!Name.equals(Prefix)) { 3526 return MatchOperand_NoMatch; 3527 } 3528 3529 Parser.Lex(); 3530 if (getLexer().isNot(AsmToken::Colon)) 3531 return MatchOperand_ParseFail; 3532 3533 Parser.Lex(); 3534 3535 bool IsMinus = false; 3536 if (getLexer().getKind() == AsmToken::Minus) { 3537 Parser.Lex(); 3538 IsMinus = true; 3539 } 3540 3541 if (getLexer().isNot(AsmToken::Integer)) 3542 return MatchOperand_ParseFail; 3543 3544 if (getParser().parseAbsoluteExpression(Int)) 3545 return MatchOperand_ParseFail; 3546 3547 if (IsMinus) 3548 Int = -Int; 3549 break; 3550 } 3551 } 3552 return MatchOperand_Success; 3553 } 3554 3555 OperandMatchResultTy 3556 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3557 AMDGPUOperand::ImmTy ImmTy, 3558 bool (*ConvertResult)(int64_t&)) { 3559 SMLoc S = Parser.getTok().getLoc(); 3560 int64_t Value = 0; 3561 3562 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3563 if (Res != MatchOperand_Success) 3564 return Res; 3565 3566 if (ConvertResult && !ConvertResult(Value)) { 3567 return MatchOperand_ParseFail; 3568 } 3569 3570 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3571 return MatchOperand_Success; 3572 } 3573 3574 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3575 const char *Prefix, 3576 OperandVector &Operands, 3577 AMDGPUOperand::ImmTy ImmTy, 3578 bool (*ConvertResult)(int64_t&)) { 3579 StringRef Name = Parser.getTok().getString(); 3580 if (!Name.equals(Prefix)) 3581 return MatchOperand_NoMatch; 3582 3583 Parser.Lex(); 3584 if (getLexer().isNot(AsmToken::Colon)) 3585 return MatchOperand_ParseFail; 3586 3587 Parser.Lex(); 3588 if (getLexer().isNot(AsmToken::LBrac)) 3589 return MatchOperand_ParseFail; 3590 Parser.Lex(); 3591 3592 unsigned Val = 0; 3593 SMLoc S = Parser.getTok().getLoc(); 3594 3595 // FIXME: How to verify the number of elements matches the number of src 3596 // operands? 3597 for (int I = 0; I < 4; ++I) { 3598 if (I != 0) { 3599 if (getLexer().is(AsmToken::RBrac)) 3600 break; 3601 3602 if (getLexer().isNot(AsmToken::Comma)) 3603 return MatchOperand_ParseFail; 3604 Parser.Lex(); 3605 } 3606 3607 if (getLexer().isNot(AsmToken::Integer)) 3608 return MatchOperand_ParseFail; 3609 3610 int64_t Op; 3611 if (getParser().parseAbsoluteExpression(Op)) 3612 return MatchOperand_ParseFail; 3613 3614 if (Op != 0 && Op != 1) 3615 return MatchOperand_ParseFail; 3616 Val |= (Op << I); 3617 } 3618 3619 Parser.Lex(); 3620 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3621 return MatchOperand_Success; 3622 } 3623 3624 OperandMatchResultTy 3625 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3626 AMDGPUOperand::ImmTy ImmTy) { 3627 int64_t Bit = 0; 3628 SMLoc S = Parser.getTok().getLoc(); 3629 3630 // We are at the end of the statement, and this is a default argument, so 3631 // use a default value. 3632 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3633 switch(getLexer().getKind()) { 3634 case AsmToken::Identifier: { 3635 StringRef Tok = Parser.getTok().getString(); 3636 if (Tok == Name) { 3637 if (Tok == "r128" && isGFX9()) 3638 Error(S, "r128 modifier is not supported on this GPU"); 3639 if (Tok == "a16" && !isGFX9()) 3640 Error(S, "a16 modifier is not supported on this GPU"); 3641 Bit = 1; 3642 Parser.Lex(); 3643 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3644 Bit = 0; 3645 Parser.Lex(); 3646 } else { 3647 return MatchOperand_NoMatch; 3648 } 3649 break; 3650 } 3651 default: 3652 return MatchOperand_NoMatch; 3653 } 3654 } 3655 3656 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3657 return MatchOperand_Success; 3658 } 3659 3660 static void addOptionalImmOperand( 3661 MCInst& Inst, const OperandVector& Operands, 3662 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3663 AMDGPUOperand::ImmTy ImmT, 3664 int64_t Default = 0) { 3665 auto i = OptionalIdx.find(ImmT); 3666 if (i != OptionalIdx.end()) { 3667 unsigned Idx = i->second; 3668 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3669 } else { 3670 Inst.addOperand(MCOperand::createImm(Default)); 3671 } 3672 } 3673 3674 OperandMatchResultTy 3675 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3676 if (getLexer().isNot(AsmToken::Identifier)) { 3677 return MatchOperand_NoMatch; 3678 } 3679 StringRef Tok = Parser.getTok().getString(); 3680 if (Tok != Prefix) { 3681 return MatchOperand_NoMatch; 3682 } 3683 3684 Parser.Lex(); 3685 if (getLexer().isNot(AsmToken::Colon)) { 3686 return MatchOperand_ParseFail; 3687 } 3688 3689 Parser.Lex(); 3690 if (getLexer().isNot(AsmToken::Identifier)) { 3691 return MatchOperand_ParseFail; 3692 } 3693 3694 Value = Parser.getTok().getString(); 3695 return MatchOperand_Success; 3696 } 3697 3698 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3699 // values to live in a joint format operand in the MCInst encoding. 3700 OperandMatchResultTy 3701 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3702 SMLoc S = Parser.getTok().getLoc(); 3703 int64_t Dfmt = 0, Nfmt = 0; 3704 // dfmt and nfmt can appear in either order, and each is optional. 3705 bool GotDfmt = false, GotNfmt = false; 3706 while (!GotDfmt || !GotNfmt) { 3707 if (!GotDfmt) { 3708 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3709 if (Res != MatchOperand_NoMatch) { 3710 if (Res != MatchOperand_Success) 3711 return Res; 3712 if (Dfmt >= 16) { 3713 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3714 return MatchOperand_ParseFail; 3715 } 3716 GotDfmt = true; 3717 Parser.Lex(); 3718 continue; 3719 } 3720 } 3721 if (!GotNfmt) { 3722 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3723 if (Res != MatchOperand_NoMatch) { 3724 if (Res != MatchOperand_Success) 3725 return Res; 3726 if (Nfmt >= 8) { 3727 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3728 return MatchOperand_ParseFail; 3729 } 3730 GotNfmt = true; 3731 Parser.Lex(); 3732 continue; 3733 } 3734 } 3735 break; 3736 } 3737 if (!GotDfmt && !GotNfmt) 3738 return MatchOperand_NoMatch; 3739 auto Format = Dfmt | Nfmt << 4; 3740 Operands.push_back( 3741 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3742 return MatchOperand_Success; 3743 } 3744 3745 //===----------------------------------------------------------------------===// 3746 // ds 3747 //===----------------------------------------------------------------------===// 3748 3749 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3750 const OperandVector &Operands) { 3751 OptionalImmIndexMap OptionalIdx; 3752 3753 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3754 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3755 3756 // Add the register arguments 3757 if (Op.isReg()) { 3758 Op.addRegOperands(Inst, 1); 3759 continue; 3760 } 3761 3762 // Handle optional arguments 3763 OptionalIdx[Op.getImmTy()] = i; 3764 } 3765 3766 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3768 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3769 3770 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3771 } 3772 3773 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3774 bool IsGdsHardcoded) { 3775 OptionalImmIndexMap OptionalIdx; 3776 3777 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3778 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3779 3780 // Add the register arguments 3781 if (Op.isReg()) { 3782 Op.addRegOperands(Inst, 1); 3783 continue; 3784 } 3785 3786 if (Op.isToken() && Op.getToken() == "gds") { 3787 IsGdsHardcoded = true; 3788 continue; 3789 } 3790 3791 // Handle optional arguments 3792 OptionalIdx[Op.getImmTy()] = i; 3793 } 3794 3795 AMDGPUOperand::ImmTy OffsetType = 3796 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3797 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3798 AMDGPUOperand::ImmTyOffset; 3799 3800 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3801 3802 if (!IsGdsHardcoded) { 3803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3804 } 3805 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3806 } 3807 3808 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3809 OptionalImmIndexMap OptionalIdx; 3810 3811 unsigned OperandIdx[4]; 3812 unsigned EnMask = 0; 3813 int SrcIdx = 0; 3814 3815 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3816 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3817 3818 // Add the register arguments 3819 if (Op.isReg()) { 3820 assert(SrcIdx < 4); 3821 OperandIdx[SrcIdx] = Inst.size(); 3822 Op.addRegOperands(Inst, 1); 3823 ++SrcIdx; 3824 continue; 3825 } 3826 3827 if (Op.isOff()) { 3828 assert(SrcIdx < 4); 3829 OperandIdx[SrcIdx] = Inst.size(); 3830 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3831 ++SrcIdx; 3832 continue; 3833 } 3834 3835 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3836 Op.addImmOperands(Inst, 1); 3837 continue; 3838 } 3839 3840 if (Op.isToken() && Op.getToken() == "done") 3841 continue; 3842 3843 // Handle optional arguments 3844 OptionalIdx[Op.getImmTy()] = i; 3845 } 3846 3847 assert(SrcIdx == 4); 3848 3849 bool Compr = false; 3850 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3851 Compr = true; 3852 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3853 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3854 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3855 } 3856 3857 for (auto i = 0; i < SrcIdx; ++i) { 3858 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3859 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3860 } 3861 } 3862 3863 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3865 3866 Inst.addOperand(MCOperand::createImm(EnMask)); 3867 } 3868 3869 //===----------------------------------------------------------------------===// 3870 // s_waitcnt 3871 //===----------------------------------------------------------------------===// 3872 3873 static bool 3874 encodeCnt( 3875 const AMDGPU::IsaVersion ISA, 3876 int64_t &IntVal, 3877 int64_t CntVal, 3878 bool Saturate, 3879 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3880 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3881 { 3882 bool Failed = false; 3883 3884 IntVal = encode(ISA, IntVal, CntVal); 3885 if (CntVal != decode(ISA, IntVal)) { 3886 if (Saturate) { 3887 IntVal = encode(ISA, IntVal, -1); 3888 } else { 3889 Failed = true; 3890 } 3891 } 3892 return Failed; 3893 } 3894 3895 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3896 StringRef CntName = Parser.getTok().getString(); 3897 int64_t CntVal; 3898 3899 Parser.Lex(); 3900 if (getLexer().isNot(AsmToken::LParen)) 3901 return true; 3902 3903 Parser.Lex(); 3904 if (getLexer().isNot(AsmToken::Integer)) 3905 return true; 3906 3907 SMLoc ValLoc = Parser.getTok().getLoc(); 3908 if (getParser().parseAbsoluteExpression(CntVal)) 3909 return true; 3910 3911 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3912 3913 bool Failed = true; 3914 bool Sat = CntName.endswith("_sat"); 3915 3916 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3917 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3918 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3919 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3920 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3921 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3922 } 3923 3924 if (Failed) { 3925 Error(ValLoc, "too large value for " + CntName); 3926 return true; 3927 } 3928 3929 if (getLexer().isNot(AsmToken::RParen)) { 3930 return true; 3931 } 3932 3933 Parser.Lex(); 3934 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3935 const AsmToken NextToken = getLexer().peekTok(); 3936 if (NextToken.is(AsmToken::Identifier)) { 3937 Parser.Lex(); 3938 } 3939 } 3940 3941 return false; 3942 } 3943 3944 OperandMatchResultTy 3945 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3946 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3947 int64_t Waitcnt = getWaitcntBitMask(ISA); 3948 SMLoc S = Parser.getTok().getLoc(); 3949 3950 switch(getLexer().getKind()) { 3951 default: return MatchOperand_ParseFail; 3952 case AsmToken::Integer: 3953 // The operand can be an integer value. 3954 if (getParser().parseAbsoluteExpression(Waitcnt)) 3955 return MatchOperand_ParseFail; 3956 break; 3957 3958 case AsmToken::Identifier: 3959 do { 3960 if (parseCnt(Waitcnt)) 3961 return MatchOperand_ParseFail; 3962 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3963 break; 3964 } 3965 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3966 return MatchOperand_Success; 3967 } 3968 3969 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3970 int64_t &Width) { 3971 using namespace llvm::AMDGPU::Hwreg; 3972 3973 if (Parser.getTok().getString() != "hwreg") 3974 return true; 3975 Parser.Lex(); 3976 3977 if (getLexer().isNot(AsmToken::LParen)) 3978 return true; 3979 Parser.Lex(); 3980 3981 if (getLexer().is(AsmToken::Identifier)) { 3982 HwReg.IsSymbolic = true; 3983 HwReg.Id = ID_UNKNOWN_; 3984 const StringRef tok = Parser.getTok().getString(); 3985 int Last = ID_SYMBOLIC_LAST_; 3986 if (isSI() || isCI() || isVI()) 3987 Last = ID_SYMBOLIC_FIRST_GFX9_; 3988 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3989 if (tok == IdSymbolic[i]) { 3990 HwReg.Id = i; 3991 break; 3992 } 3993 } 3994 Parser.Lex(); 3995 } else { 3996 HwReg.IsSymbolic = false; 3997 if (getLexer().isNot(AsmToken::Integer)) 3998 return true; 3999 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4000 return true; 4001 } 4002 4003 if (getLexer().is(AsmToken::RParen)) { 4004 Parser.Lex(); 4005 return false; 4006 } 4007 4008 // optional params 4009 if (getLexer().isNot(AsmToken::Comma)) 4010 return true; 4011 Parser.Lex(); 4012 4013 if (getLexer().isNot(AsmToken::Integer)) 4014 return true; 4015 if (getParser().parseAbsoluteExpression(Offset)) 4016 return true; 4017 4018 if (getLexer().isNot(AsmToken::Comma)) 4019 return true; 4020 Parser.Lex(); 4021 4022 if (getLexer().isNot(AsmToken::Integer)) 4023 return true; 4024 if (getParser().parseAbsoluteExpression(Width)) 4025 return true; 4026 4027 if (getLexer().isNot(AsmToken::RParen)) 4028 return true; 4029 Parser.Lex(); 4030 4031 return false; 4032 } 4033 4034 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4035 using namespace llvm::AMDGPU::Hwreg; 4036 4037 int64_t Imm16Val = 0; 4038 SMLoc S = Parser.getTok().getLoc(); 4039 4040 switch(getLexer().getKind()) { 4041 default: return MatchOperand_NoMatch; 4042 case AsmToken::Integer: 4043 // The operand can be an integer value. 4044 if (getParser().parseAbsoluteExpression(Imm16Val)) 4045 return MatchOperand_NoMatch; 4046 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4047 Error(S, "invalid immediate: only 16-bit values are legal"); 4048 // Do not return error code, but create an imm operand anyway and proceed 4049 // to the next operand, if any. That avoids unneccessary error messages. 4050 } 4051 break; 4052 4053 case AsmToken::Identifier: { 4054 OperandInfoTy HwReg(ID_UNKNOWN_); 4055 int64_t Offset = OFFSET_DEFAULT_; 4056 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4057 if (parseHwregConstruct(HwReg, Offset, Width)) 4058 return MatchOperand_ParseFail; 4059 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4060 if (HwReg.IsSymbolic) 4061 Error(S, "invalid symbolic name of hardware register"); 4062 else 4063 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4064 } 4065 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4066 Error(S, "invalid bit offset: only 5-bit values are legal"); 4067 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4068 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4069 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4070 } 4071 break; 4072 } 4073 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4074 return MatchOperand_Success; 4075 } 4076 4077 bool AMDGPUOperand::isSWaitCnt() const { 4078 return isImm(); 4079 } 4080 4081 bool AMDGPUOperand::isHwreg() const { 4082 return isImmTy(ImmTyHwreg); 4083 } 4084 4085 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4086 using namespace llvm::AMDGPU::SendMsg; 4087 4088 if (Parser.getTok().getString() != "sendmsg") 4089 return true; 4090 Parser.Lex(); 4091 4092 if (getLexer().isNot(AsmToken::LParen)) 4093 return true; 4094 Parser.Lex(); 4095 4096 if (getLexer().is(AsmToken::Identifier)) { 4097 Msg.IsSymbolic = true; 4098 Msg.Id = ID_UNKNOWN_; 4099 const std::string tok = Parser.getTok().getString(); 4100 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4101 switch(i) { 4102 default: continue; // Omit gaps. 4103 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4104 } 4105 if (tok == IdSymbolic[i]) { 4106 Msg.Id = i; 4107 break; 4108 } 4109 } 4110 Parser.Lex(); 4111 } else { 4112 Msg.IsSymbolic = false; 4113 if (getLexer().isNot(AsmToken::Integer)) 4114 return true; 4115 if (getParser().parseAbsoluteExpression(Msg.Id)) 4116 return true; 4117 if (getLexer().is(AsmToken::Integer)) 4118 if (getParser().parseAbsoluteExpression(Msg.Id)) 4119 Msg.Id = ID_UNKNOWN_; 4120 } 4121 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4122 return false; 4123 4124 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4125 if (getLexer().isNot(AsmToken::RParen)) 4126 return true; 4127 Parser.Lex(); 4128 return false; 4129 } 4130 4131 if (getLexer().isNot(AsmToken::Comma)) 4132 return true; 4133 Parser.Lex(); 4134 4135 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4136 Operation.Id = ID_UNKNOWN_; 4137 if (getLexer().is(AsmToken::Identifier)) { 4138 Operation.IsSymbolic = true; 4139 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4140 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4141 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4142 const StringRef Tok = Parser.getTok().getString(); 4143 for (int i = F; i < L; ++i) { 4144 if (Tok == S[i]) { 4145 Operation.Id = i; 4146 break; 4147 } 4148 } 4149 Parser.Lex(); 4150 } else { 4151 Operation.IsSymbolic = false; 4152 if (getLexer().isNot(AsmToken::Integer)) 4153 return true; 4154 if (getParser().parseAbsoluteExpression(Operation.Id)) 4155 return true; 4156 } 4157 4158 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4159 // Stream id is optional. 4160 if (getLexer().is(AsmToken::RParen)) { 4161 Parser.Lex(); 4162 return false; 4163 } 4164 4165 if (getLexer().isNot(AsmToken::Comma)) 4166 return true; 4167 Parser.Lex(); 4168 4169 if (getLexer().isNot(AsmToken::Integer)) 4170 return true; 4171 if (getParser().parseAbsoluteExpression(StreamId)) 4172 return true; 4173 } 4174 4175 if (getLexer().isNot(AsmToken::RParen)) 4176 return true; 4177 Parser.Lex(); 4178 return false; 4179 } 4180 4181 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4182 if (getLexer().getKind() != AsmToken::Identifier) 4183 return MatchOperand_NoMatch; 4184 4185 StringRef Str = Parser.getTok().getString(); 4186 int Slot = StringSwitch<int>(Str) 4187 .Case("p10", 0) 4188 .Case("p20", 1) 4189 .Case("p0", 2) 4190 .Default(-1); 4191 4192 SMLoc S = Parser.getTok().getLoc(); 4193 if (Slot == -1) 4194 return MatchOperand_ParseFail; 4195 4196 Parser.Lex(); 4197 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4198 AMDGPUOperand::ImmTyInterpSlot)); 4199 return MatchOperand_Success; 4200 } 4201 4202 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4203 if (getLexer().getKind() != AsmToken::Identifier) 4204 return MatchOperand_NoMatch; 4205 4206 StringRef Str = Parser.getTok().getString(); 4207 if (!Str.startswith("attr")) 4208 return MatchOperand_NoMatch; 4209 4210 StringRef Chan = Str.take_back(2); 4211 int AttrChan = StringSwitch<int>(Chan) 4212 .Case(".x", 0) 4213 .Case(".y", 1) 4214 .Case(".z", 2) 4215 .Case(".w", 3) 4216 .Default(-1); 4217 if (AttrChan == -1) 4218 return MatchOperand_ParseFail; 4219 4220 Str = Str.drop_back(2).drop_front(4); 4221 4222 uint8_t Attr; 4223 if (Str.getAsInteger(10, Attr)) 4224 return MatchOperand_ParseFail; 4225 4226 SMLoc S = Parser.getTok().getLoc(); 4227 Parser.Lex(); 4228 if (Attr > 63) { 4229 Error(S, "out of bounds attr"); 4230 return MatchOperand_Success; 4231 } 4232 4233 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4234 4235 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4236 AMDGPUOperand::ImmTyInterpAttr)); 4237 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4238 AMDGPUOperand::ImmTyAttrChan)); 4239 return MatchOperand_Success; 4240 } 4241 4242 void AMDGPUAsmParser::errorExpTgt() { 4243 Error(Parser.getTok().getLoc(), "invalid exp target"); 4244 } 4245 4246 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4247 uint8_t &Val) { 4248 if (Str == "null") { 4249 Val = 9; 4250 return MatchOperand_Success; 4251 } 4252 4253 if (Str.startswith("mrt")) { 4254 Str = Str.drop_front(3); 4255 if (Str == "z") { // == mrtz 4256 Val = 8; 4257 return MatchOperand_Success; 4258 } 4259 4260 if (Str.getAsInteger(10, Val)) 4261 return MatchOperand_ParseFail; 4262 4263 if (Val > 7) 4264 errorExpTgt(); 4265 4266 return MatchOperand_Success; 4267 } 4268 4269 if (Str.startswith("pos")) { 4270 Str = Str.drop_front(3); 4271 if (Str.getAsInteger(10, Val)) 4272 return MatchOperand_ParseFail; 4273 4274 if (Val > 3) 4275 errorExpTgt(); 4276 4277 Val += 12; 4278 return MatchOperand_Success; 4279 } 4280 4281 if (Str.startswith("param")) { 4282 Str = Str.drop_front(5); 4283 if (Str.getAsInteger(10, Val)) 4284 return MatchOperand_ParseFail; 4285 4286 if (Val >= 32) 4287 errorExpTgt(); 4288 4289 Val += 32; 4290 return MatchOperand_Success; 4291 } 4292 4293 if (Str.startswith("invalid_target_")) { 4294 Str = Str.drop_front(15); 4295 if (Str.getAsInteger(10, Val)) 4296 return MatchOperand_ParseFail; 4297 4298 errorExpTgt(); 4299 return MatchOperand_Success; 4300 } 4301 4302 return MatchOperand_NoMatch; 4303 } 4304 4305 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4306 uint8_t Val; 4307 StringRef Str = Parser.getTok().getString(); 4308 4309 auto Res = parseExpTgtImpl(Str, Val); 4310 if (Res != MatchOperand_Success) 4311 return Res; 4312 4313 SMLoc S = Parser.getTok().getLoc(); 4314 Parser.Lex(); 4315 4316 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4317 AMDGPUOperand::ImmTyExpTgt)); 4318 return MatchOperand_Success; 4319 } 4320 4321 OperandMatchResultTy 4322 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4323 using namespace llvm::AMDGPU::SendMsg; 4324 4325 int64_t Imm16Val = 0; 4326 SMLoc S = Parser.getTok().getLoc(); 4327 4328 switch(getLexer().getKind()) { 4329 default: 4330 return MatchOperand_NoMatch; 4331 case AsmToken::Integer: 4332 // The operand can be an integer value. 4333 if (getParser().parseAbsoluteExpression(Imm16Val)) 4334 return MatchOperand_NoMatch; 4335 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4336 Error(S, "invalid immediate: only 16-bit values are legal"); 4337 // Do not return error code, but create an imm operand anyway and proceed 4338 // to the next operand, if any. That avoids unneccessary error messages. 4339 } 4340 break; 4341 case AsmToken::Identifier: { 4342 OperandInfoTy Msg(ID_UNKNOWN_); 4343 OperandInfoTy Operation(OP_UNKNOWN_); 4344 int64_t StreamId = STREAM_ID_DEFAULT_; 4345 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4346 return MatchOperand_ParseFail; 4347 do { 4348 // Validate and encode message ID. 4349 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4350 || Msg.Id == ID_SYSMSG)) { 4351 if (Msg.IsSymbolic) 4352 Error(S, "invalid/unsupported symbolic name of message"); 4353 else 4354 Error(S, "invalid/unsupported code of message"); 4355 break; 4356 } 4357 Imm16Val = (Msg.Id << ID_SHIFT_); 4358 // Validate and encode operation ID. 4359 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4360 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4361 if (Operation.IsSymbolic) 4362 Error(S, "invalid symbolic name of GS_OP"); 4363 else 4364 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4365 break; 4366 } 4367 if (Operation.Id == OP_GS_NOP 4368 && Msg.Id != ID_GS_DONE) { 4369 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4370 break; 4371 } 4372 Imm16Val |= (Operation.Id << OP_SHIFT_); 4373 } 4374 if (Msg.Id == ID_SYSMSG) { 4375 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4376 if (Operation.IsSymbolic) 4377 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4378 else 4379 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4380 break; 4381 } 4382 Imm16Val |= (Operation.Id << OP_SHIFT_); 4383 } 4384 // Validate and encode stream ID. 4385 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4386 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4387 Error(S, "invalid stream id: only 2-bit values are legal"); 4388 break; 4389 } 4390 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4391 } 4392 } while (false); 4393 } 4394 break; 4395 } 4396 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4397 return MatchOperand_Success; 4398 } 4399 4400 bool AMDGPUOperand::isSendMsg() const { 4401 return isImmTy(ImmTySendMsg); 4402 } 4403 4404 //===----------------------------------------------------------------------===// 4405 // parser helpers 4406 //===----------------------------------------------------------------------===// 4407 4408 bool 4409 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4410 if (getLexer().getKind() == AsmToken::Identifier && 4411 Parser.getTok().getString() == Id) { 4412 Parser.Lex(); 4413 return true; 4414 } 4415 return false; 4416 } 4417 4418 bool 4419 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4420 if (getLexer().getKind() == Kind) { 4421 Parser.Lex(); 4422 return true; 4423 } 4424 return false; 4425 } 4426 4427 bool 4428 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4429 const StringRef ErrMsg) { 4430 if (!trySkipToken(Kind)) { 4431 Error(Parser.getTok().getLoc(), ErrMsg); 4432 return false; 4433 } 4434 return true; 4435 } 4436 4437 bool 4438 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4439 return !getParser().parseAbsoluteExpression(Imm); 4440 } 4441 4442 bool 4443 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4444 SMLoc S = Parser.getTok().getLoc(); 4445 if (getLexer().getKind() == AsmToken::String) { 4446 Val = Parser.getTok().getStringContents(); 4447 Parser.Lex(); 4448 return true; 4449 } else { 4450 Error(S, ErrMsg); 4451 return false; 4452 } 4453 } 4454 4455 //===----------------------------------------------------------------------===// 4456 // swizzle 4457 //===----------------------------------------------------------------------===// 4458 4459 LLVM_READNONE 4460 static unsigned 4461 encodeBitmaskPerm(const unsigned AndMask, 4462 const unsigned OrMask, 4463 const unsigned XorMask) { 4464 using namespace llvm::AMDGPU::Swizzle; 4465 4466 return BITMASK_PERM_ENC | 4467 (AndMask << BITMASK_AND_SHIFT) | 4468 (OrMask << BITMASK_OR_SHIFT) | 4469 (XorMask << BITMASK_XOR_SHIFT); 4470 } 4471 4472 bool 4473 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4474 const unsigned MinVal, 4475 const unsigned MaxVal, 4476 const StringRef ErrMsg) { 4477 for (unsigned i = 0; i < OpNum; ++i) { 4478 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4479 return false; 4480 } 4481 SMLoc ExprLoc = Parser.getTok().getLoc(); 4482 if (!parseExpr(Op[i])) { 4483 return false; 4484 } 4485 if (Op[i] < MinVal || Op[i] > MaxVal) { 4486 Error(ExprLoc, ErrMsg); 4487 return false; 4488 } 4489 } 4490 4491 return true; 4492 } 4493 4494 bool 4495 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4496 using namespace llvm::AMDGPU::Swizzle; 4497 4498 int64_t Lane[LANE_NUM]; 4499 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4500 "expected a 2-bit lane id")) { 4501 Imm = QUAD_PERM_ENC; 4502 for (unsigned I = 0; I < LANE_NUM; ++I) { 4503 Imm |= Lane[I] << (LANE_SHIFT * I); 4504 } 4505 return true; 4506 } 4507 return false; 4508 } 4509 4510 bool 4511 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4512 using namespace llvm::AMDGPU::Swizzle; 4513 4514 SMLoc S = Parser.getTok().getLoc(); 4515 int64_t GroupSize; 4516 int64_t LaneIdx; 4517 4518 if (!parseSwizzleOperands(1, &GroupSize, 4519 2, 32, 4520 "group size must be in the interval [2,32]")) { 4521 return false; 4522 } 4523 if (!isPowerOf2_64(GroupSize)) { 4524 Error(S, "group size must be a power of two"); 4525 return false; 4526 } 4527 if (parseSwizzleOperands(1, &LaneIdx, 4528 0, GroupSize - 1, 4529 "lane id must be in the interval [0,group size - 1]")) { 4530 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4531 return true; 4532 } 4533 return false; 4534 } 4535 4536 bool 4537 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4538 using namespace llvm::AMDGPU::Swizzle; 4539 4540 SMLoc S = Parser.getTok().getLoc(); 4541 int64_t GroupSize; 4542 4543 if (!parseSwizzleOperands(1, &GroupSize, 4544 2, 32, "group size must be in the interval [2,32]")) { 4545 return false; 4546 } 4547 if (!isPowerOf2_64(GroupSize)) { 4548 Error(S, "group size must be a power of two"); 4549 return false; 4550 } 4551 4552 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4553 return true; 4554 } 4555 4556 bool 4557 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4558 using namespace llvm::AMDGPU::Swizzle; 4559 4560 SMLoc S = Parser.getTok().getLoc(); 4561 int64_t GroupSize; 4562 4563 if (!parseSwizzleOperands(1, &GroupSize, 4564 1, 16, "group size must be in the interval [1,16]")) { 4565 return false; 4566 } 4567 if (!isPowerOf2_64(GroupSize)) { 4568 Error(S, "group size must be a power of two"); 4569 return false; 4570 } 4571 4572 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4573 return true; 4574 } 4575 4576 bool 4577 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4578 using namespace llvm::AMDGPU::Swizzle; 4579 4580 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4581 return false; 4582 } 4583 4584 StringRef Ctl; 4585 SMLoc StrLoc = Parser.getTok().getLoc(); 4586 if (!parseString(Ctl)) { 4587 return false; 4588 } 4589 if (Ctl.size() != BITMASK_WIDTH) { 4590 Error(StrLoc, "expected a 5-character mask"); 4591 return false; 4592 } 4593 4594 unsigned AndMask = 0; 4595 unsigned OrMask = 0; 4596 unsigned XorMask = 0; 4597 4598 for (size_t i = 0; i < Ctl.size(); ++i) { 4599 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4600 switch(Ctl[i]) { 4601 default: 4602 Error(StrLoc, "invalid mask"); 4603 return false; 4604 case '0': 4605 break; 4606 case '1': 4607 OrMask |= Mask; 4608 break; 4609 case 'p': 4610 AndMask |= Mask; 4611 break; 4612 case 'i': 4613 AndMask |= Mask; 4614 XorMask |= Mask; 4615 break; 4616 } 4617 } 4618 4619 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4620 return true; 4621 } 4622 4623 bool 4624 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4625 4626 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4627 4628 if (!parseExpr(Imm)) { 4629 return false; 4630 } 4631 if (!isUInt<16>(Imm)) { 4632 Error(OffsetLoc, "expected a 16-bit offset"); 4633 return false; 4634 } 4635 return true; 4636 } 4637 4638 bool 4639 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4640 using namespace llvm::AMDGPU::Swizzle; 4641 4642 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4643 4644 SMLoc ModeLoc = Parser.getTok().getLoc(); 4645 bool Ok = false; 4646 4647 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4648 Ok = parseSwizzleQuadPerm(Imm); 4649 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4650 Ok = parseSwizzleBitmaskPerm(Imm); 4651 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4652 Ok = parseSwizzleBroadcast(Imm); 4653 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4654 Ok = parseSwizzleSwap(Imm); 4655 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4656 Ok = parseSwizzleReverse(Imm); 4657 } else { 4658 Error(ModeLoc, "expected a swizzle mode"); 4659 } 4660 4661 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4662 } 4663 4664 return false; 4665 } 4666 4667 OperandMatchResultTy 4668 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4669 SMLoc S = Parser.getTok().getLoc(); 4670 int64_t Imm = 0; 4671 4672 if (trySkipId("offset")) { 4673 4674 bool Ok = false; 4675 if (skipToken(AsmToken::Colon, "expected a colon")) { 4676 if (trySkipId("swizzle")) { 4677 Ok = parseSwizzleMacro(Imm); 4678 } else { 4679 Ok = parseSwizzleOffset(Imm); 4680 } 4681 } 4682 4683 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4684 4685 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4686 } else { 4687 // Swizzle "offset" operand is optional. 4688 // If it is omitted, try parsing other optional operands. 4689 return parseOptionalOpr(Operands); 4690 } 4691 } 4692 4693 bool 4694 AMDGPUOperand::isSwizzle() const { 4695 return isImmTy(ImmTySwizzle); 4696 } 4697 4698 //===----------------------------------------------------------------------===// 4699 // VGPR Index Mode 4700 //===----------------------------------------------------------------------===// 4701 4702 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4703 4704 using namespace llvm::AMDGPU::VGPRIndexMode; 4705 4706 if (trySkipToken(AsmToken::RParen)) { 4707 return OFF; 4708 } 4709 4710 int64_t Imm = 0; 4711 4712 while (true) { 4713 unsigned Mode = 0; 4714 SMLoc S = Parser.getTok().getLoc(); 4715 4716 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4717 if (trySkipId(IdSymbolic[ModeId])) { 4718 Mode = 1 << ModeId; 4719 break; 4720 } 4721 } 4722 4723 if (Mode == 0) { 4724 Error(S, (Imm == 0)? 4725 "expected a VGPR index mode or a closing parenthesis" : 4726 "expected a VGPR index mode"); 4727 break; 4728 } 4729 4730 if (Imm & Mode) { 4731 Error(S, "duplicate VGPR index mode"); 4732 break; 4733 } 4734 Imm |= Mode; 4735 4736 if (trySkipToken(AsmToken::RParen)) 4737 break; 4738 if (!skipToken(AsmToken::Comma, 4739 "expected a comma or a closing parenthesis")) 4740 break; 4741 } 4742 4743 return Imm; 4744 } 4745 4746 OperandMatchResultTy 4747 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4748 4749 int64_t Imm = 0; 4750 SMLoc S = Parser.getTok().getLoc(); 4751 4752 if (getLexer().getKind() == AsmToken::Identifier && 4753 Parser.getTok().getString() == "gpr_idx" && 4754 getLexer().peekTok().is(AsmToken::LParen)) { 4755 4756 Parser.Lex(); 4757 Parser.Lex(); 4758 4759 // If parse failed, trigger an error but do not return error code 4760 // to avoid excessive error messages. 4761 Imm = parseGPRIdxMacro(); 4762 4763 } else { 4764 if (getParser().parseAbsoluteExpression(Imm)) 4765 return MatchOperand_NoMatch; 4766 if (Imm < 0 || !isUInt<4>(Imm)) { 4767 Error(S, "invalid immediate: only 4-bit values are legal"); 4768 } 4769 } 4770 4771 Operands.push_back( 4772 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 4773 return MatchOperand_Success; 4774 } 4775 4776 bool AMDGPUOperand::isGPRIdxMode() const { 4777 return isImmTy(ImmTyGprIdxMode); 4778 } 4779 4780 //===----------------------------------------------------------------------===// 4781 // sopp branch targets 4782 //===----------------------------------------------------------------------===// 4783 4784 OperandMatchResultTy 4785 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4786 SMLoc S = Parser.getTok().getLoc(); 4787 4788 switch (getLexer().getKind()) { 4789 default: return MatchOperand_ParseFail; 4790 case AsmToken::Integer: { 4791 int64_t Imm; 4792 if (getParser().parseAbsoluteExpression(Imm)) 4793 return MatchOperand_ParseFail; 4794 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4795 return MatchOperand_Success; 4796 } 4797 4798 case AsmToken::Identifier: 4799 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4800 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4801 Parser.getTok().getString()), getContext()), S)); 4802 Parser.Lex(); 4803 return MatchOperand_Success; 4804 } 4805 } 4806 4807 //===----------------------------------------------------------------------===// 4808 // mubuf 4809 //===----------------------------------------------------------------------===// 4810 4811 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4812 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4813 } 4814 4815 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4816 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4817 } 4818 4819 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4820 const OperandVector &Operands, 4821 bool IsAtomic, 4822 bool IsAtomicReturn, 4823 bool IsLds) { 4824 bool IsLdsOpcode = IsLds; 4825 bool HasLdsModifier = false; 4826 OptionalImmIndexMap OptionalIdx; 4827 assert(IsAtomicReturn ? IsAtomic : true); 4828 4829 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4830 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4831 4832 // Add the register arguments 4833 if (Op.isReg()) { 4834 Op.addRegOperands(Inst, 1); 4835 continue; 4836 } 4837 4838 // Handle the case where soffset is an immediate 4839 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4840 Op.addImmOperands(Inst, 1); 4841 continue; 4842 } 4843 4844 HasLdsModifier = Op.isLDS(); 4845 4846 // Handle tokens like 'offen' which are sometimes hard-coded into the 4847 // asm string. There are no MCInst operands for these. 4848 if (Op.isToken()) { 4849 continue; 4850 } 4851 assert(Op.isImm()); 4852 4853 // Handle optional arguments 4854 OptionalIdx[Op.getImmTy()] = i; 4855 } 4856 4857 // This is a workaround for an llvm quirk which may result in an 4858 // incorrect instruction selection. Lds and non-lds versions of 4859 // MUBUF instructions are identical except that lds versions 4860 // have mandatory 'lds' modifier. However this modifier follows 4861 // optional modifiers and llvm asm matcher regards this 'lds' 4862 // modifier as an optional one. As a result, an lds version 4863 // of opcode may be selected even if it has no 'lds' modifier. 4864 if (IsLdsOpcode && !HasLdsModifier) { 4865 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4866 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4867 Inst.setOpcode(NoLdsOpcode); 4868 IsLdsOpcode = false; 4869 } 4870 } 4871 4872 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4873 if (IsAtomicReturn) { 4874 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4875 Inst.insert(I, *I); 4876 } 4877 4878 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4879 if (!IsAtomic) { // glc is hard-coded. 4880 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4881 } 4882 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4883 4884 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4886 } 4887 } 4888 4889 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4890 OptionalImmIndexMap OptionalIdx; 4891 4892 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4893 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4894 4895 // Add the register arguments 4896 if (Op.isReg()) { 4897 Op.addRegOperands(Inst, 1); 4898 continue; 4899 } 4900 4901 // Handle the case where soffset is an immediate 4902 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4903 Op.addImmOperands(Inst, 1); 4904 continue; 4905 } 4906 4907 // Handle tokens like 'offen' which are sometimes hard-coded into the 4908 // asm string. There are no MCInst operands for these. 4909 if (Op.isToken()) { 4910 continue; 4911 } 4912 assert(Op.isImm()); 4913 4914 // Handle optional arguments 4915 OptionalIdx[Op.getImmTy()] = i; 4916 } 4917 4918 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4919 AMDGPUOperand::ImmTyOffset); 4920 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4921 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4922 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4924 } 4925 4926 //===----------------------------------------------------------------------===// 4927 // mimg 4928 //===----------------------------------------------------------------------===// 4929 4930 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4931 bool IsAtomic) { 4932 unsigned I = 1; 4933 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4934 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4935 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4936 } 4937 4938 if (IsAtomic) { 4939 // Add src, same as dst 4940 assert(Desc.getNumDefs() == 1); 4941 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4942 } 4943 4944 OptionalImmIndexMap OptionalIdx; 4945 4946 for (unsigned E = Operands.size(); I != E; ++I) { 4947 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4948 4949 // Add the register arguments 4950 if (Op.isReg()) { 4951 Op.addRegOperands(Inst, 1); 4952 } else if (Op.isImmModifier()) { 4953 OptionalIdx[Op.getImmTy()] = I; 4954 } else { 4955 llvm_unreachable("unexpected operand type"); 4956 } 4957 } 4958 4959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4961 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4963 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4967 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4968 } 4969 4970 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4971 cvtMIMG(Inst, Operands, true); 4972 } 4973 4974 //===----------------------------------------------------------------------===// 4975 // smrd 4976 //===----------------------------------------------------------------------===// 4977 4978 bool AMDGPUOperand::isSMRDOffset8() const { 4979 return isImm() && isUInt<8>(getImm()); 4980 } 4981 4982 bool AMDGPUOperand::isSMRDOffset20() const { 4983 return isImm() && isUInt<20>(getImm()); 4984 } 4985 4986 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4987 // 32-bit literals are only supported on CI and we only want to use them 4988 // when the offset is > 8-bits. 4989 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4990 } 4991 4992 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4993 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4994 } 4995 4996 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4997 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4998 } 4999 5000 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5001 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5002 } 5003 5004 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5005 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5006 } 5007 5008 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5009 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5010 } 5011 5012 //===----------------------------------------------------------------------===// 5013 // vop3 5014 //===----------------------------------------------------------------------===// 5015 5016 static bool ConvertOmodMul(int64_t &Mul) { 5017 if (Mul != 1 && Mul != 2 && Mul != 4) 5018 return false; 5019 5020 Mul >>= 1; 5021 return true; 5022 } 5023 5024 static bool ConvertOmodDiv(int64_t &Div) { 5025 if (Div == 1) { 5026 Div = 0; 5027 return true; 5028 } 5029 5030 if (Div == 2) { 5031 Div = 3; 5032 return true; 5033 } 5034 5035 return false; 5036 } 5037 5038 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5039 if (BoundCtrl == 0) { 5040 BoundCtrl = 1; 5041 return true; 5042 } 5043 5044 if (BoundCtrl == -1) { 5045 BoundCtrl = 0; 5046 return true; 5047 } 5048 5049 return false; 5050 } 5051 5052 // Note: the order in this table matches the order of operands in AsmString. 5053 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5054 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5055 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5056 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5057 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5058 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5059 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5060 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5061 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5062 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5063 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5064 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5065 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5066 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5067 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5068 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5069 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5070 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5071 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5072 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5073 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5074 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5075 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5076 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5077 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5078 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5079 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5080 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5081 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5082 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5083 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5084 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5085 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5086 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5087 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5088 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5089 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5090 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5091 }; 5092 5093 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5094 unsigned size = Operands.size(); 5095 assert(size > 0); 5096 5097 OperandMatchResultTy res = parseOptionalOpr(Operands); 5098 5099 // This is a hack to enable hardcoded mandatory operands which follow 5100 // optional operands. 5101 // 5102 // Current design assumes that all operands after the first optional operand 5103 // are also optional. However implementation of some instructions violates 5104 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5105 // 5106 // To alleviate this problem, we have to (implicitly) parse extra operands 5107 // to make sure autogenerated parser of custom operands never hit hardcoded 5108 // mandatory operands. 5109 5110 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5111 5112 // We have parsed the first optional operand. 5113 // Parse as many operands as necessary to skip all mandatory operands. 5114 5115 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5116 if (res != MatchOperand_Success || 5117 getLexer().is(AsmToken::EndOfStatement)) break; 5118 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5119 res = parseOptionalOpr(Operands); 5120 } 5121 } 5122 5123 return res; 5124 } 5125 5126 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5127 OperandMatchResultTy res; 5128 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5129 // try to parse any optional operand here 5130 if (Op.IsBit) { 5131 res = parseNamedBit(Op.Name, Operands, Op.Type); 5132 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5133 res = parseOModOperand(Operands); 5134 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5135 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5136 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5137 res = parseSDWASel(Operands, Op.Name, Op.Type); 5138 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5139 res = parseSDWADstUnused(Operands); 5140 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5141 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5142 Op.Type == AMDGPUOperand::ImmTyNegLo || 5143 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5144 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5145 Op.ConvertResult); 5146 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5147 res = parseDfmtNfmt(Operands); 5148 } else { 5149 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5150 } 5151 if (res != MatchOperand_NoMatch) { 5152 return res; 5153 } 5154 } 5155 return MatchOperand_NoMatch; 5156 } 5157 5158 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5159 StringRef Name = Parser.getTok().getString(); 5160 if (Name == "mul") { 5161 return parseIntWithPrefix("mul", Operands, 5162 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5163 } 5164 5165 if (Name == "div") { 5166 return parseIntWithPrefix("div", Operands, 5167 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5168 } 5169 5170 return MatchOperand_NoMatch; 5171 } 5172 5173 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5174 cvtVOP3P(Inst, Operands); 5175 5176 int Opc = Inst.getOpcode(); 5177 5178 int SrcNum; 5179 const int Ops[] = { AMDGPU::OpName::src0, 5180 AMDGPU::OpName::src1, 5181 AMDGPU::OpName::src2 }; 5182 for (SrcNum = 0; 5183 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5184 ++SrcNum); 5185 assert(SrcNum > 0); 5186 5187 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5188 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5189 5190 if ((OpSel & (1 << SrcNum)) != 0) { 5191 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5192 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5193 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5194 } 5195 } 5196 5197 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5198 // 1. This operand is input modifiers 5199 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5200 // 2. This is not last operand 5201 && Desc.NumOperands > (OpNum + 1) 5202 // 3. Next operand is register class 5203 && Desc.OpInfo[OpNum + 1].RegClass != -1 5204 // 4. Next register is not tied to any other operand 5205 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5206 } 5207 5208 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5209 { 5210 OptionalImmIndexMap OptionalIdx; 5211 unsigned Opc = Inst.getOpcode(); 5212 5213 unsigned I = 1; 5214 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5215 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5216 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5217 } 5218 5219 for (unsigned E = Operands.size(); I != E; ++I) { 5220 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5221 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5222 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5223 } else if (Op.isInterpSlot() || 5224 Op.isInterpAttr() || 5225 Op.isAttrChan()) { 5226 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5227 } else if (Op.isImmModifier()) { 5228 OptionalIdx[Op.getImmTy()] = I; 5229 } else { 5230 llvm_unreachable("unhandled operand type"); 5231 } 5232 } 5233 5234 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5236 } 5237 5238 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5240 } 5241 5242 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5244 } 5245 } 5246 5247 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5248 OptionalImmIndexMap &OptionalIdx) { 5249 unsigned Opc = Inst.getOpcode(); 5250 5251 unsigned I = 1; 5252 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5253 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5254 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5255 } 5256 5257 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5258 // This instruction has src modifiers 5259 for (unsigned E = Operands.size(); I != E; ++I) { 5260 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5261 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5262 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5263 } else if (Op.isImmModifier()) { 5264 OptionalIdx[Op.getImmTy()] = I; 5265 } else if (Op.isRegOrImm()) { 5266 Op.addRegOrImmOperands(Inst, 1); 5267 } else { 5268 llvm_unreachable("unhandled operand type"); 5269 } 5270 } 5271 } else { 5272 // No src modifiers 5273 for (unsigned E = Operands.size(); I != E; ++I) { 5274 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5275 if (Op.isMod()) { 5276 OptionalIdx[Op.getImmTy()] = I; 5277 } else { 5278 Op.addRegOrImmOperands(Inst, 1); 5279 } 5280 } 5281 } 5282 5283 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5285 } 5286 5287 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5289 } 5290 5291 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5292 // it has src2 register operand that is tied to dst operand 5293 // we don't allow modifiers for this operand in assembler so src2_modifiers 5294 // should be 0. 5295 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5296 Opc == AMDGPU::V_MAC_F32_e64_vi || 5297 Opc == AMDGPU::V_MAC_F16_e64_vi || 5298 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5299 auto it = Inst.begin(); 5300 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5301 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5302 ++it; 5303 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5304 } 5305 } 5306 5307 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5308 OptionalImmIndexMap OptionalIdx; 5309 cvtVOP3(Inst, Operands, OptionalIdx); 5310 } 5311 5312 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5313 const OperandVector &Operands) { 5314 OptionalImmIndexMap OptIdx; 5315 const int Opc = Inst.getOpcode(); 5316 const MCInstrDesc &Desc = MII.get(Opc); 5317 5318 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5319 5320 cvtVOP3(Inst, Operands, OptIdx); 5321 5322 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5323 assert(!IsPacked); 5324 Inst.addOperand(Inst.getOperand(0)); 5325 } 5326 5327 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5328 // instruction, and then figure out where to actually put the modifiers 5329 5330 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5331 5332 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5333 if (OpSelHiIdx != -1) { 5334 int DefaultVal = IsPacked ? -1 : 0; 5335 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5336 DefaultVal); 5337 } 5338 5339 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5340 if (NegLoIdx != -1) { 5341 assert(IsPacked); 5342 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5343 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5344 } 5345 5346 const int Ops[] = { AMDGPU::OpName::src0, 5347 AMDGPU::OpName::src1, 5348 AMDGPU::OpName::src2 }; 5349 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5350 AMDGPU::OpName::src1_modifiers, 5351 AMDGPU::OpName::src2_modifiers }; 5352 5353 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5354 5355 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5356 unsigned OpSelHi = 0; 5357 unsigned NegLo = 0; 5358 unsigned NegHi = 0; 5359 5360 if (OpSelHiIdx != -1) { 5361 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5362 } 5363 5364 if (NegLoIdx != -1) { 5365 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5366 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5367 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5368 } 5369 5370 for (int J = 0; J < 3; ++J) { 5371 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5372 if (OpIdx == -1) 5373 break; 5374 5375 uint32_t ModVal = 0; 5376 5377 if ((OpSel & (1 << J)) != 0) 5378 ModVal |= SISrcMods::OP_SEL_0; 5379 5380 if ((OpSelHi & (1 << J)) != 0) 5381 ModVal |= SISrcMods::OP_SEL_1; 5382 5383 if ((NegLo & (1 << J)) != 0) 5384 ModVal |= SISrcMods::NEG; 5385 5386 if ((NegHi & (1 << J)) != 0) 5387 ModVal |= SISrcMods::NEG_HI; 5388 5389 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5390 5391 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5392 } 5393 } 5394 5395 //===----------------------------------------------------------------------===// 5396 // dpp 5397 //===----------------------------------------------------------------------===// 5398 5399 bool AMDGPUOperand::isDPPCtrl() const { 5400 using namespace AMDGPU::DPP; 5401 5402 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5403 if (result) { 5404 int64_t Imm = getImm(); 5405 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5406 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5407 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5408 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5409 (Imm == DppCtrl::WAVE_SHL1) || 5410 (Imm == DppCtrl::WAVE_ROL1) || 5411 (Imm == DppCtrl::WAVE_SHR1) || 5412 (Imm == DppCtrl::WAVE_ROR1) || 5413 (Imm == DppCtrl::ROW_MIRROR) || 5414 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5415 (Imm == DppCtrl::BCAST15) || 5416 (Imm == DppCtrl::BCAST31); 5417 } 5418 return false; 5419 } 5420 5421 bool AMDGPUOperand::isS16Imm() const { 5422 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5423 } 5424 5425 bool AMDGPUOperand::isU16Imm() const { 5426 return isImm() && isUInt<16>(getImm()); 5427 } 5428 5429 OperandMatchResultTy 5430 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5431 using namespace AMDGPU::DPP; 5432 5433 SMLoc S = Parser.getTok().getLoc(); 5434 StringRef Prefix; 5435 int64_t Int; 5436 5437 if (getLexer().getKind() == AsmToken::Identifier) { 5438 Prefix = Parser.getTok().getString(); 5439 } else { 5440 return MatchOperand_NoMatch; 5441 } 5442 5443 if (Prefix == "row_mirror") { 5444 Int = DppCtrl::ROW_MIRROR; 5445 Parser.Lex(); 5446 } else if (Prefix == "row_half_mirror") { 5447 Int = DppCtrl::ROW_HALF_MIRROR; 5448 Parser.Lex(); 5449 } else { 5450 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5451 if (Prefix != "quad_perm" 5452 && Prefix != "row_shl" 5453 && Prefix != "row_shr" 5454 && Prefix != "row_ror" 5455 && Prefix != "wave_shl" 5456 && Prefix != "wave_rol" 5457 && Prefix != "wave_shr" 5458 && Prefix != "wave_ror" 5459 && Prefix != "row_bcast") { 5460 return MatchOperand_NoMatch; 5461 } 5462 5463 Parser.Lex(); 5464 if (getLexer().isNot(AsmToken::Colon)) 5465 return MatchOperand_ParseFail; 5466 5467 if (Prefix == "quad_perm") { 5468 // quad_perm:[%d,%d,%d,%d] 5469 Parser.Lex(); 5470 if (getLexer().isNot(AsmToken::LBrac)) 5471 return MatchOperand_ParseFail; 5472 Parser.Lex(); 5473 5474 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5475 return MatchOperand_ParseFail; 5476 5477 for (int i = 0; i < 3; ++i) { 5478 if (getLexer().isNot(AsmToken::Comma)) 5479 return MatchOperand_ParseFail; 5480 Parser.Lex(); 5481 5482 int64_t Temp; 5483 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5484 return MatchOperand_ParseFail; 5485 const int shift = i*2 + 2; 5486 Int += (Temp << shift); 5487 } 5488 5489 if (getLexer().isNot(AsmToken::RBrac)) 5490 return MatchOperand_ParseFail; 5491 Parser.Lex(); 5492 } else { 5493 // sel:%d 5494 Parser.Lex(); 5495 if (getParser().parseAbsoluteExpression(Int)) 5496 return MatchOperand_ParseFail; 5497 5498 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5499 Int |= DppCtrl::ROW_SHL0; 5500 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5501 Int |= DppCtrl::ROW_SHR0; 5502 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5503 Int |= DppCtrl::ROW_ROR0; 5504 } else if (Prefix == "wave_shl" && 1 == Int) { 5505 Int = DppCtrl::WAVE_SHL1; 5506 } else if (Prefix == "wave_rol" && 1 == Int) { 5507 Int = DppCtrl::WAVE_ROL1; 5508 } else if (Prefix == "wave_shr" && 1 == Int) { 5509 Int = DppCtrl::WAVE_SHR1; 5510 } else if (Prefix == "wave_ror" && 1 == Int) { 5511 Int = DppCtrl::WAVE_ROR1; 5512 } else if (Prefix == "row_bcast") { 5513 if (Int == 15) { 5514 Int = DppCtrl::BCAST15; 5515 } else if (Int == 31) { 5516 Int = DppCtrl::BCAST31; 5517 } else { 5518 return MatchOperand_ParseFail; 5519 } 5520 } else { 5521 return MatchOperand_ParseFail; 5522 } 5523 } 5524 } 5525 5526 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5527 return MatchOperand_Success; 5528 } 5529 5530 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5531 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5532 } 5533 5534 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 5535 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 5536 } 5537 5538 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5539 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5540 } 5541 5542 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5543 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5544 } 5545 5546 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5547 OptionalImmIndexMap OptionalIdx; 5548 5549 unsigned I = 1; 5550 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5551 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5552 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5553 } 5554 5555 for (unsigned E = Operands.size(); I != E; ++I) { 5556 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5557 MCOI::TIED_TO); 5558 if (TiedTo != -1) { 5559 assert((unsigned)TiedTo < Inst.getNumOperands()); 5560 // handle tied old or src2 for MAC instructions 5561 Inst.addOperand(Inst.getOperand(TiedTo)); 5562 } 5563 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5564 // Add the register arguments 5565 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5566 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5567 // Skip it. 5568 continue; 5569 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5570 Op.addRegWithFPInputModsOperands(Inst, 2); 5571 } else if (Op.isDPPCtrl()) { 5572 Op.addImmOperands(Inst, 1); 5573 } else if (Op.isImm()) { 5574 // Handle optional arguments 5575 OptionalIdx[Op.getImmTy()] = I; 5576 } else { 5577 llvm_unreachable("Invalid operand type"); 5578 } 5579 } 5580 5581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5584 } 5585 5586 //===----------------------------------------------------------------------===// 5587 // sdwa 5588 //===----------------------------------------------------------------------===// 5589 5590 OperandMatchResultTy 5591 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5592 AMDGPUOperand::ImmTy Type) { 5593 using namespace llvm::AMDGPU::SDWA; 5594 5595 SMLoc S = Parser.getTok().getLoc(); 5596 StringRef Value; 5597 OperandMatchResultTy res; 5598 5599 res = parseStringWithPrefix(Prefix, Value); 5600 if (res != MatchOperand_Success) { 5601 return res; 5602 } 5603 5604 int64_t Int; 5605 Int = StringSwitch<int64_t>(Value) 5606 .Case("BYTE_0", SdwaSel::BYTE_0) 5607 .Case("BYTE_1", SdwaSel::BYTE_1) 5608 .Case("BYTE_2", SdwaSel::BYTE_2) 5609 .Case("BYTE_3", SdwaSel::BYTE_3) 5610 .Case("WORD_0", SdwaSel::WORD_0) 5611 .Case("WORD_1", SdwaSel::WORD_1) 5612 .Case("DWORD", SdwaSel::DWORD) 5613 .Default(0xffffffff); 5614 Parser.Lex(); // eat last token 5615 5616 if (Int == 0xffffffff) { 5617 return MatchOperand_ParseFail; 5618 } 5619 5620 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5621 return MatchOperand_Success; 5622 } 5623 5624 OperandMatchResultTy 5625 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5626 using namespace llvm::AMDGPU::SDWA; 5627 5628 SMLoc S = Parser.getTok().getLoc(); 5629 StringRef Value; 5630 OperandMatchResultTy res; 5631 5632 res = parseStringWithPrefix("dst_unused", Value); 5633 if (res != MatchOperand_Success) { 5634 return res; 5635 } 5636 5637 int64_t Int; 5638 Int = StringSwitch<int64_t>(Value) 5639 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5640 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5641 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5642 .Default(0xffffffff); 5643 Parser.Lex(); // eat last token 5644 5645 if (Int == 0xffffffff) { 5646 return MatchOperand_ParseFail; 5647 } 5648 5649 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5650 return MatchOperand_Success; 5651 } 5652 5653 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5654 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5655 } 5656 5657 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5658 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5659 } 5660 5661 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5662 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5663 } 5664 5665 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5666 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5667 } 5668 5669 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5670 uint64_t BasicInstType, bool skipVcc) { 5671 using namespace llvm::AMDGPU::SDWA; 5672 5673 OptionalImmIndexMap OptionalIdx; 5674 bool skippedVcc = false; 5675 5676 unsigned I = 1; 5677 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5678 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5679 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5680 } 5681 5682 for (unsigned E = Operands.size(); I != E; ++I) { 5683 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5684 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5685 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5686 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5687 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5688 // Skip VCC only if we didn't skip it on previous iteration. 5689 if (BasicInstType == SIInstrFlags::VOP2 && 5690 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5691 skippedVcc = true; 5692 continue; 5693 } else if (BasicInstType == SIInstrFlags::VOPC && 5694 Inst.getNumOperands() == 0) { 5695 skippedVcc = true; 5696 continue; 5697 } 5698 } 5699 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5700 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5701 } else if (Op.isImm()) { 5702 // Handle optional arguments 5703 OptionalIdx[Op.getImmTy()] = I; 5704 } else { 5705 llvm_unreachable("Invalid operand type"); 5706 } 5707 skippedVcc = false; 5708 } 5709 5710 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5711 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5712 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5713 switch (BasicInstType) { 5714 case SIInstrFlags::VOP1: 5715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5716 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5718 } 5719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5722 break; 5723 5724 case SIInstrFlags::VOP2: 5725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5726 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5728 } 5729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5733 break; 5734 5735 case SIInstrFlags::VOPC: 5736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5739 break; 5740 5741 default: 5742 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5743 } 5744 } 5745 5746 // special case v_mac_{f16, f32}: 5747 // it has src2 register operand that is tied to dst operand 5748 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5749 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5750 auto it = Inst.begin(); 5751 std::advance( 5752 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5753 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5754 } 5755 } 5756 5757 /// Force static initialization. 5758 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5759 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5760 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5761 } 5762 5763 #define GET_REGISTER_MATCHER 5764 #define GET_MATCHER_IMPLEMENTATION 5765 #define GET_MNEMONIC_SPELL_CHECKER 5766 #include "AMDGPUGenAsmMatcher.inc" 5767 5768 // This fuction should be defined after auto-generated include so that we have 5769 // MatchClassKind enum defined 5770 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5771 unsigned Kind) { 5772 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5773 // But MatchInstructionImpl() expects to meet token and fails to validate 5774 // operand. This method checks if we are given immediate operand but expect to 5775 // get corresponding token. 5776 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5777 switch (Kind) { 5778 case MCK_addr64: 5779 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5780 case MCK_gds: 5781 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5782 case MCK_lds: 5783 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5784 case MCK_glc: 5785 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5786 case MCK_idxen: 5787 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5788 case MCK_offen: 5789 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5790 case MCK_SSrcB32: 5791 // When operands have expression values, they will return true for isToken, 5792 // because it is not possible to distinguish between a token and an 5793 // expression at parse time. MatchInstructionImpl() will always try to 5794 // match an operand as a token, when isToken returns true, and when the 5795 // name of the expression is not a valid token, the match will fail, 5796 // so we need to handle it here. 5797 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5798 case MCK_SSrcF32: 5799 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5800 case MCK_SoppBrTarget: 5801 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5802 case MCK_VReg32OrOff: 5803 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5804 case MCK_InterpSlot: 5805 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5806 case MCK_Attr: 5807 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5808 case MCK_AttrChan: 5809 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5810 default: 5811 return Match_InvalidOperand; 5812 } 5813 } 5814 5815 //===----------------------------------------------------------------------===// 5816 // endpgm 5817 //===----------------------------------------------------------------------===// 5818 5819 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 5820 SMLoc S = Parser.getTok().getLoc(); 5821 int64_t Imm = 0; 5822 5823 if (!parseExpr(Imm)) { 5824 // The operand is optional, if not present default to 0 5825 Imm = 0; 5826 } 5827 5828 if (!isUInt<16>(Imm)) { 5829 Error(S, "expected a 16-bit value"); 5830 return MatchOperand_ParseFail; 5831 } 5832 5833 Operands.push_back( 5834 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 5835 return MatchOperand_Success; 5836 } 5837 5838 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 5839