1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDppCtrl, 151 ImmTyDppRowMask, 152 ImmTyDppBankMask, 153 ImmTyDppBoundCtrl, 154 ImmTySdwaDstSel, 155 ImmTySdwaSrc0Sel, 156 ImmTySdwaSrc1Sel, 157 ImmTySdwaDstUnused, 158 ImmTyDMask, 159 ImmTyDim, 160 ImmTyUNorm, 161 ImmTyDA, 162 ImmTyR128A16, 163 ImmTyLWE, 164 ImmTyExpTgt, 165 ImmTyExpCompr, 166 ImmTyExpVM, 167 ImmTyFORMAT, 168 ImmTyHwreg, 169 ImmTyOff, 170 ImmTySendMsg, 171 ImmTyInterpSlot, 172 ImmTyInterpAttr, 173 ImmTyAttrChan, 174 ImmTyOpSel, 175 ImmTyOpSelHi, 176 ImmTyNegLo, 177 ImmTyNegHi, 178 ImmTySwizzle, 179 ImmTyGprIdxMode, 180 ImmTyEndpgm, 181 ImmTyHigh 182 }; 183 184 private: 185 struct TokOp { 186 const char *Data; 187 unsigned Length; 188 }; 189 190 struct ImmOp { 191 int64_t Val; 192 ImmTy Type; 193 bool IsFPImm; 194 Modifiers Mods; 195 }; 196 197 struct RegOp { 198 unsigned RegNo; 199 Modifiers Mods; 200 }; 201 202 union { 203 TokOp Tok; 204 ImmOp Imm; 205 RegOp Reg; 206 const MCExpr *Expr; 207 }; 208 209 public: 210 bool isToken() const override { 211 if (Kind == Token) 212 return true; 213 214 if (Kind != Expression || !Expr) 215 return false; 216 217 // When parsing operands, we can't always tell if something was meant to be 218 // a token, like 'gds', or an expression that references a global variable. 219 // In this case, we assume the string is an expression, and if we need to 220 // interpret is a token, then we treat the symbol name as the token. 221 return isa<MCSymbolRefExpr>(Expr); 222 } 223 224 bool isImm() const override { 225 return Kind == Immediate; 226 } 227 228 bool isInlinableImm(MVT type) const; 229 bool isLiteralImm(MVT type) const; 230 231 bool isRegKind() const { 232 return Kind == Register; 233 } 234 235 bool isReg() const override { 236 return isRegKind() && !hasModifiers(); 237 } 238 239 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 240 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 241 } 242 243 bool isRegOrImmWithInt16InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 245 } 246 247 bool isRegOrImmWithInt32InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 249 } 250 251 bool isRegOrImmWithInt64InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 253 } 254 255 bool isRegOrImmWithFP16InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 257 } 258 259 bool isRegOrImmWithFP32InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 261 } 262 263 bool isRegOrImmWithFP64InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 265 } 266 267 bool isVReg() const { 268 return isRegClass(AMDGPU::VGPR_32RegClassID) || 269 isRegClass(AMDGPU::VReg_64RegClassID) || 270 isRegClass(AMDGPU::VReg_96RegClassID) || 271 isRegClass(AMDGPU::VReg_128RegClassID) || 272 isRegClass(AMDGPU::VReg_256RegClassID) || 273 isRegClass(AMDGPU::VReg_512RegClassID); 274 } 275 276 bool isVReg32() const { 277 return isRegClass(AMDGPU::VGPR_32RegClassID); 278 } 279 280 bool isVReg32OrOff() const { 281 return isOff() || isVReg32(); 282 } 283 284 bool isSDWAOperand(MVT type) const; 285 bool isSDWAFP16Operand() const; 286 bool isSDWAFP32Operand() const; 287 bool isSDWAInt16Operand() const; 288 bool isSDWAInt32Operand() const; 289 290 bool isImmTy(ImmTy ImmT) const { 291 return isImm() && Imm.Type == ImmT; 292 } 293 294 bool isImmModifier() const { 295 return isImm() && Imm.Type != ImmTyNone; 296 } 297 298 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 299 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 300 bool isDMask() const { return isImmTy(ImmTyDMask); } 301 bool isDim() const { return isImmTy(ImmTyDim); } 302 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 303 bool isDA() const { return isImmTy(ImmTyDA); } 304 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 305 bool isLWE() const { return isImmTy(ImmTyLWE); } 306 bool isOff() const { return isImmTy(ImmTyOff); } 307 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 308 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 309 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 310 bool isOffen() const { return isImmTy(ImmTyOffen); } 311 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 312 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 313 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 314 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 315 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 316 317 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 318 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 319 bool isGDS() const { return isImmTy(ImmTyGDS); } 320 bool isLDS() const { return isImmTy(ImmTyLDS); } 321 bool isDLC() const { return isImmTy(ImmTyDLC); } 322 bool isGLC() const { return isImmTy(ImmTyGLC); } 323 bool isSLC() const { return isImmTy(ImmTySLC); } 324 bool isTFE() const { return isImmTy(ImmTyTFE); } 325 bool isD16() const { return isImmTy(ImmTyD16); } 326 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 327 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 328 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 329 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 330 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 331 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 332 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 333 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 334 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 335 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 336 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 337 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 338 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 339 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 340 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 341 bool isHigh() const { return isImmTy(ImmTyHigh); } 342 343 bool isMod() const { 344 return isClampSI() || isOModSI(); 345 } 346 347 bool isRegOrImm() const { 348 return isReg() || isImm(); 349 } 350 351 bool isRegClass(unsigned RCID) const; 352 353 bool isInlineValue() const; 354 355 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 356 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 357 } 358 359 bool isSCSrcB16() const { 360 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 361 } 362 363 bool isSCSrcV2B16() const { 364 return isSCSrcB16(); 365 } 366 367 bool isSCSrcB32() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 369 } 370 371 bool isSCSrcB64() const { 372 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 373 } 374 375 bool isSCSrcF16() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 377 } 378 379 bool isSCSrcV2F16() const { 380 return isSCSrcF16(); 381 } 382 383 bool isSCSrcF32() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 385 } 386 387 bool isSCSrcF64() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 389 } 390 391 bool isSSrcB32() const { 392 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 393 } 394 395 bool isSSrcB16() const { 396 return isSCSrcB16() || isLiteralImm(MVT::i16); 397 } 398 399 bool isSSrcV2B16() const { 400 llvm_unreachable("cannot happen"); 401 return isSSrcB16(); 402 } 403 404 bool isSSrcB64() const { 405 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 406 // See isVSrc64(). 407 return isSCSrcB64() || isLiteralImm(MVT::i64); 408 } 409 410 bool isSSrcF32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 412 } 413 414 bool isSSrcF64() const { 415 return isSCSrcB64() || isLiteralImm(MVT::f64); 416 } 417 418 bool isSSrcF16() const { 419 return isSCSrcB16() || isLiteralImm(MVT::f16); 420 } 421 422 bool isSSrcV2F16() const { 423 llvm_unreachable("cannot happen"); 424 return isSSrcF16(); 425 } 426 427 bool isSSrcOrLdsB32() const { 428 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 429 isLiteralImm(MVT::i32) || isExpr(); 430 } 431 432 bool isVCSrcB32() const { 433 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 434 } 435 436 bool isVCSrcB64() const { 437 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 438 } 439 440 bool isVCSrcB16() const { 441 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 442 } 443 444 bool isVCSrcV2B16() const { 445 return isVCSrcB16(); 446 } 447 448 bool isVCSrcF32() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 450 } 451 452 bool isVCSrcF64() const { 453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 454 } 455 456 bool isVCSrcF16() const { 457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 458 } 459 460 bool isVCSrcV2F16() const { 461 return isVCSrcF16(); 462 } 463 464 bool isVSrcB32() const { 465 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 466 } 467 468 bool isVSrcB64() const { 469 return isVCSrcF64() || isLiteralImm(MVT::i64); 470 } 471 472 bool isVSrcB16() const { 473 return isVCSrcF16() || isLiteralImm(MVT::i16); 474 } 475 476 bool isVSrcV2B16() const { 477 return isVSrcB16() || isLiteralImm(MVT::v2i16); 478 } 479 480 bool isVSrcF32() const { 481 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 482 } 483 484 bool isVSrcF64() const { 485 return isVCSrcF64() || isLiteralImm(MVT::f64); 486 } 487 488 bool isVSrcF16() const { 489 return isVCSrcF16() || isLiteralImm(MVT::f16); 490 } 491 492 bool isVSrcV2F16() const { 493 return isVSrcF16() || isLiteralImm(MVT::v2f16); 494 } 495 496 bool isKImmFP32() const { 497 return isLiteralImm(MVT::f32); 498 } 499 500 bool isKImmFP16() const { 501 return isLiteralImm(MVT::f16); 502 } 503 504 bool isMem() const override { 505 return false; 506 } 507 508 bool isExpr() const { 509 return Kind == Expression; 510 } 511 512 bool isSoppBrTarget() const { 513 return isExpr() || isImm(); 514 } 515 516 bool isSWaitCnt() const; 517 bool isHwreg() const; 518 bool isSendMsg() const; 519 bool isSwizzle() const; 520 bool isSMRDOffset8() const; 521 bool isSMRDOffset20() const; 522 bool isSMRDLiteralOffset() const; 523 bool isDPPCtrl() const; 524 bool isGPRIdxMode() const; 525 bool isS16Imm() const; 526 bool isU16Imm() const; 527 bool isEndpgm() const; 528 529 StringRef getExpressionAsToken() const { 530 assert(isExpr()); 531 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 532 return S->getSymbol().getName(); 533 } 534 535 StringRef getToken() const { 536 assert(isToken()); 537 538 if (Kind == Expression) 539 return getExpressionAsToken(); 540 541 return StringRef(Tok.Data, Tok.Length); 542 } 543 544 int64_t getImm() const { 545 assert(isImm()); 546 return Imm.Val; 547 } 548 549 ImmTy getImmTy() const { 550 assert(isImm()); 551 return Imm.Type; 552 } 553 554 unsigned getReg() const override { 555 assert(isRegKind()); 556 return Reg.RegNo; 557 } 558 559 SMLoc getStartLoc() const override { 560 return StartLoc; 561 } 562 563 SMLoc getEndLoc() const override { 564 return EndLoc; 565 } 566 567 SMRange getLocRange() const { 568 return SMRange(StartLoc, EndLoc); 569 } 570 571 Modifiers getModifiers() const { 572 assert(isRegKind() || isImmTy(ImmTyNone)); 573 return isRegKind() ? Reg.Mods : Imm.Mods; 574 } 575 576 void setModifiers(Modifiers Mods) { 577 assert(isRegKind() || isImmTy(ImmTyNone)); 578 if (isRegKind()) 579 Reg.Mods = Mods; 580 else 581 Imm.Mods = Mods; 582 } 583 584 bool hasModifiers() const { 585 return getModifiers().hasModifiers(); 586 } 587 588 bool hasFPModifiers() const { 589 return getModifiers().hasFPModifiers(); 590 } 591 592 bool hasIntModifiers() const { 593 return getModifiers().hasIntModifiers(); 594 } 595 596 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 597 598 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 599 600 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 601 602 template <unsigned Bitwidth> 603 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 604 605 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 606 addKImmFPOperands<16>(Inst, N); 607 } 608 609 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 610 addKImmFPOperands<32>(Inst, N); 611 } 612 613 void addRegOperands(MCInst &Inst, unsigned N) const; 614 615 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 616 if (isRegKind()) 617 addRegOperands(Inst, N); 618 else if (isExpr()) 619 Inst.addOperand(MCOperand::createExpr(Expr)); 620 else 621 addImmOperands(Inst, N); 622 } 623 624 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 625 Modifiers Mods = getModifiers(); 626 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 627 if (isRegKind()) { 628 addRegOperands(Inst, N); 629 } else { 630 addImmOperands(Inst, N, false); 631 } 632 } 633 634 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasIntModifiers()); 636 addRegOrImmWithInputModsOperands(Inst, N); 637 } 638 639 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 640 assert(!hasFPModifiers()); 641 addRegOrImmWithInputModsOperands(Inst, N); 642 } 643 644 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 645 Modifiers Mods = getModifiers(); 646 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 647 assert(isRegKind()); 648 addRegOperands(Inst, N); 649 } 650 651 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 652 assert(!hasIntModifiers()); 653 addRegWithInputModsOperands(Inst, N); 654 } 655 656 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 657 assert(!hasFPModifiers()); 658 addRegWithInputModsOperands(Inst, N); 659 } 660 661 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 662 if (isImm()) 663 addImmOperands(Inst, N); 664 else { 665 assert(isExpr()); 666 Inst.addOperand(MCOperand::createExpr(Expr)); 667 } 668 } 669 670 static void printImmTy(raw_ostream& OS, ImmTy Type) { 671 switch (Type) { 672 case ImmTyNone: OS << "None"; break; 673 case ImmTyGDS: OS << "GDS"; break; 674 case ImmTyLDS: OS << "LDS"; break; 675 case ImmTyOffen: OS << "Offen"; break; 676 case ImmTyIdxen: OS << "Idxen"; break; 677 case ImmTyAddr64: OS << "Addr64"; break; 678 case ImmTyOffset: OS << "Offset"; break; 679 case ImmTyInstOffset: OS << "InstOffset"; break; 680 case ImmTyOffset0: OS << "Offset0"; break; 681 case ImmTyOffset1: OS << "Offset1"; break; 682 case ImmTyDLC: OS << "DLC"; break; 683 case ImmTyGLC: OS << "GLC"; break; 684 case ImmTySLC: OS << "SLC"; break; 685 case ImmTyTFE: OS << "TFE"; break; 686 case ImmTyD16: OS << "D16"; break; 687 case ImmTyFORMAT: OS << "FORMAT"; break; 688 case ImmTyClampSI: OS << "ClampSI"; break; 689 case ImmTyOModSI: OS << "OModSI"; break; 690 case ImmTyDppCtrl: OS << "DppCtrl"; break; 691 case ImmTyDppRowMask: OS << "DppRowMask"; break; 692 case ImmTyDppBankMask: OS << "DppBankMask"; break; 693 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 694 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 695 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 696 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 697 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 698 case ImmTyDMask: OS << "DMask"; break; 699 case ImmTyDim: OS << "Dim"; break; 700 case ImmTyUNorm: OS << "UNorm"; break; 701 case ImmTyDA: OS << "DA"; break; 702 case ImmTyR128A16: OS << "R128A16"; break; 703 case ImmTyLWE: OS << "LWE"; break; 704 case ImmTyOff: OS << "Off"; break; 705 case ImmTyExpTgt: OS << "ExpTgt"; break; 706 case ImmTyExpCompr: OS << "ExpCompr"; break; 707 case ImmTyExpVM: OS << "ExpVM"; break; 708 case ImmTyHwreg: OS << "Hwreg"; break; 709 case ImmTySendMsg: OS << "SendMsg"; break; 710 case ImmTyInterpSlot: OS << "InterpSlot"; break; 711 case ImmTyInterpAttr: OS << "InterpAttr"; break; 712 case ImmTyAttrChan: OS << "AttrChan"; break; 713 case ImmTyOpSel: OS << "OpSel"; break; 714 case ImmTyOpSelHi: OS << "OpSelHi"; break; 715 case ImmTyNegLo: OS << "NegLo"; break; 716 case ImmTyNegHi: OS << "NegHi"; break; 717 case ImmTySwizzle: OS << "Swizzle"; break; 718 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 719 case ImmTyHigh: OS << "High"; break; 720 case ImmTyEndpgm: 721 OS << "Endpgm"; 722 break; 723 } 724 } 725 726 void print(raw_ostream &OS) const override { 727 switch (Kind) { 728 case Register: 729 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 730 break; 731 case Immediate: 732 OS << '<' << getImm(); 733 if (getImmTy() != ImmTyNone) { 734 OS << " type: "; printImmTy(OS, getImmTy()); 735 } 736 OS << " mods: " << Imm.Mods << '>'; 737 break; 738 case Token: 739 OS << '\'' << getToken() << '\''; 740 break; 741 case Expression: 742 OS << "<expr " << *Expr << '>'; 743 break; 744 } 745 } 746 747 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 748 int64_t Val, SMLoc Loc, 749 ImmTy Type = ImmTyNone, 750 bool IsFPImm = false) { 751 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 752 Op->Imm.Val = Val; 753 Op->Imm.IsFPImm = IsFPImm; 754 Op->Imm.Type = Type; 755 Op->Imm.Mods = Modifiers(); 756 Op->StartLoc = Loc; 757 Op->EndLoc = Loc; 758 return Op; 759 } 760 761 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 762 StringRef Str, SMLoc Loc, 763 bool HasExplicitEncodingSize = true) { 764 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 765 Res->Tok.Data = Str.data(); 766 Res->Tok.Length = Str.size(); 767 Res->StartLoc = Loc; 768 Res->EndLoc = Loc; 769 return Res; 770 } 771 772 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 773 unsigned RegNo, SMLoc S, 774 SMLoc E) { 775 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 776 Op->Reg.RegNo = RegNo; 777 Op->Reg.Mods = Modifiers(); 778 Op->StartLoc = S; 779 Op->EndLoc = E; 780 return Op; 781 } 782 783 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 784 const class MCExpr *Expr, SMLoc S) { 785 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 786 Op->Expr = Expr; 787 Op->StartLoc = S; 788 Op->EndLoc = S; 789 return Op; 790 } 791 }; 792 793 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 794 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 795 return OS; 796 } 797 798 //===----------------------------------------------------------------------===// 799 // AsmParser 800 //===----------------------------------------------------------------------===// 801 802 // Holds info related to the current kernel, e.g. count of SGPRs used. 803 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 804 // .amdgpu_hsa_kernel or at EOF. 805 class KernelScopeInfo { 806 int SgprIndexUnusedMin = -1; 807 int VgprIndexUnusedMin = -1; 808 MCContext *Ctx = nullptr; 809 810 void usesSgprAt(int i) { 811 if (i >= SgprIndexUnusedMin) { 812 SgprIndexUnusedMin = ++i; 813 if (Ctx) { 814 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 815 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 816 } 817 } 818 } 819 820 void usesVgprAt(int i) { 821 if (i >= VgprIndexUnusedMin) { 822 VgprIndexUnusedMin = ++i; 823 if (Ctx) { 824 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 825 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 826 } 827 } 828 } 829 830 public: 831 KernelScopeInfo() = default; 832 833 void initialize(MCContext &Context) { 834 Ctx = &Context; 835 usesSgprAt(SgprIndexUnusedMin = -1); 836 usesVgprAt(VgprIndexUnusedMin = -1); 837 } 838 839 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 840 switch (RegKind) { 841 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 842 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 843 default: break; 844 } 845 } 846 }; 847 848 class AMDGPUAsmParser : public MCTargetAsmParser { 849 MCAsmParser &Parser; 850 851 // Number of extra operands parsed after the first optional operand. 852 // This may be necessary to skip hardcoded mandatory operands. 853 static const unsigned MAX_OPR_LOOKAHEAD = 8; 854 855 unsigned ForcedEncodingSize = 0; 856 bool ForcedDPP = false; 857 bool ForcedSDWA = false; 858 KernelScopeInfo KernelScope; 859 860 /// @name Auto-generated Match Functions 861 /// { 862 863 #define GET_ASSEMBLER_HEADER 864 #include "AMDGPUGenAsmMatcher.inc" 865 866 /// } 867 868 private: 869 bool ParseAsAbsoluteExpression(uint32_t &Ret); 870 bool OutOfRangeError(SMRange Range); 871 /// Calculate VGPR/SGPR blocks required for given target, reserved 872 /// registers, and user-specified NextFreeXGPR values. 873 /// 874 /// \param Features [in] Target features, used for bug corrections. 875 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 876 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 877 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 878 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 879 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 880 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 881 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 882 /// \param VGPRBlocks [out] Result VGPR block count. 883 /// \param SGPRBlocks [out] Result SGPR block count. 884 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 885 bool FlatScrUsed, bool XNACKUsed, 886 unsigned NextFreeVGPR, SMRange VGPRRange, 887 unsigned NextFreeSGPR, SMRange SGPRRange, 888 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 889 bool ParseDirectiveAMDGCNTarget(); 890 bool ParseDirectiveAMDHSAKernel(); 891 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 892 bool ParseDirectiveHSACodeObjectVersion(); 893 bool ParseDirectiveHSACodeObjectISA(); 894 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 895 bool ParseDirectiveAMDKernelCodeT(); 896 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 897 bool ParseDirectiveAMDGPUHsaKernel(); 898 899 bool ParseDirectiveISAVersion(); 900 bool ParseDirectiveHSAMetadata(); 901 bool ParseDirectivePALMetadataBegin(); 902 bool ParseDirectivePALMetadata(); 903 904 /// Common code to parse out a block of text (typically YAML) between start and 905 /// end directives. 906 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 907 const char *AssemblerDirectiveEnd, 908 std::string &CollectString); 909 910 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 911 RegisterKind RegKind, unsigned Reg1, 912 unsigned RegNum); 913 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 914 unsigned& RegNum, unsigned& RegWidth, 915 unsigned *DwordRegIndex); 916 bool isRegister(); 917 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 918 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 919 void initializeGprCountSymbol(RegisterKind RegKind); 920 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 921 unsigned RegWidth); 922 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 923 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 924 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 925 bool IsGdsHardcoded); 926 927 public: 928 enum AMDGPUMatchResultTy { 929 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 930 }; 931 enum OperandMode { 932 OperandMode_Default, 933 OperandMode_NSA, 934 }; 935 936 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 937 938 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 939 const MCInstrInfo &MII, 940 const MCTargetOptions &Options) 941 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 942 MCAsmParserExtension::Initialize(Parser); 943 944 if (getFeatureBits().none()) { 945 // Set default features. 946 copySTI().ToggleFeature("southern-islands"); 947 } 948 949 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 950 951 { 952 // TODO: make those pre-defined variables read-only. 953 // Currently there is none suitable machinery in the core llvm-mc for this. 954 // MCSymbol::isRedefinable is intended for another purpose, and 955 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 956 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 957 MCContext &Ctx = getContext(); 958 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 959 MCSymbol *Sym = 960 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 961 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 962 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 963 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 964 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 965 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 966 } else { 967 MCSymbol *Sym = 968 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 969 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 970 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 971 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 972 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 973 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 974 } 975 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 976 initializeGprCountSymbol(IS_VGPR); 977 initializeGprCountSymbol(IS_SGPR); 978 } else 979 KernelScope.initialize(getContext()); 980 } 981 } 982 983 bool hasXNACK() const { 984 return AMDGPU::hasXNACK(getSTI()); 985 } 986 987 bool hasMIMG_R128() const { 988 return AMDGPU::hasMIMG_R128(getSTI()); 989 } 990 991 bool hasPackedD16() const { 992 return AMDGPU::hasPackedD16(getSTI()); 993 } 994 995 bool isSI() const { 996 return AMDGPU::isSI(getSTI()); 997 } 998 999 bool isCI() const { 1000 return AMDGPU::isCI(getSTI()); 1001 } 1002 1003 bool isVI() const { 1004 return AMDGPU::isVI(getSTI()); 1005 } 1006 1007 bool isGFX9() const { 1008 return AMDGPU::isGFX9(getSTI()); 1009 } 1010 1011 bool isGFX10() const { 1012 return AMDGPU::isGFX10(getSTI()); 1013 } 1014 1015 bool hasInv2PiInlineImm() const { 1016 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1017 } 1018 1019 bool hasFlatOffsets() const { 1020 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1021 } 1022 1023 bool hasSGPR102_SGPR103() const { 1024 return !isVI() && !isGFX9(); 1025 } 1026 1027 bool hasSGPR104_SGPR105() const { 1028 return isGFX10(); 1029 } 1030 1031 bool hasIntClamp() const { 1032 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1033 } 1034 1035 AMDGPUTargetStreamer &getTargetStreamer() { 1036 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1037 return static_cast<AMDGPUTargetStreamer &>(TS); 1038 } 1039 1040 const MCRegisterInfo *getMRI() const { 1041 // We need this const_cast because for some reason getContext() is not const 1042 // in MCAsmParser. 1043 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1044 } 1045 1046 const MCInstrInfo *getMII() const { 1047 return &MII; 1048 } 1049 1050 const FeatureBitset &getFeatureBits() const { 1051 return getSTI().getFeatureBits(); 1052 } 1053 1054 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1055 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1056 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1057 1058 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1059 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1060 bool isForcedDPP() const { return ForcedDPP; } 1061 bool isForcedSDWA() const { return ForcedSDWA; } 1062 ArrayRef<unsigned> getMatchedVariants() const; 1063 1064 std::unique_ptr<AMDGPUOperand> parseRegister(); 1065 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1066 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1067 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1068 unsigned Kind) override; 1069 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1070 OperandVector &Operands, MCStreamer &Out, 1071 uint64_t &ErrorInfo, 1072 bool MatchingInlineAsm) override; 1073 bool ParseDirective(AsmToken DirectiveID) override; 1074 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1075 OperandMode Mode = OperandMode_Default); 1076 StringRef parseMnemonicSuffix(StringRef Name); 1077 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1078 SMLoc NameLoc, OperandVector &Operands) override; 1079 //bool ProcessInstruction(MCInst &Inst); 1080 1081 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1082 1083 OperandMatchResultTy 1084 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1085 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1086 bool (*ConvertResult)(int64_t &) = nullptr); 1087 1088 OperandMatchResultTy 1089 parseOperandArrayWithPrefix(const char *Prefix, 1090 OperandVector &Operands, 1091 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1092 bool (*ConvertResult)(int64_t&) = nullptr); 1093 1094 OperandMatchResultTy 1095 parseNamedBit(const char *Name, OperandVector &Operands, 1096 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1097 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1098 StringRef &Value); 1099 1100 bool isModifier(); 1101 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1102 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1103 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1104 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1105 bool parseSP3NegModifier(); 1106 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1107 OperandMatchResultTy parseReg(OperandVector &Operands); 1108 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1109 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1110 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1111 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1112 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1113 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1114 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1115 1116 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1117 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1118 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1119 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1120 1121 bool parseCnt(int64_t &IntVal); 1122 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1123 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1124 1125 private: 1126 struct OperandInfoTy { 1127 int64_t Id; 1128 bool IsSymbolic = false; 1129 1130 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1131 }; 1132 1133 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1134 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1135 1136 void errorExpTgt(); 1137 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1138 1139 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1140 bool validateSOPLiteral(const MCInst &Inst) const; 1141 bool validateConstantBusLimitations(const MCInst &Inst); 1142 bool validateEarlyClobberLimitations(const MCInst &Inst); 1143 bool validateIntClampSupported(const MCInst &Inst); 1144 bool validateMIMGAtomicDMask(const MCInst &Inst); 1145 bool validateMIMGGatherDMask(const MCInst &Inst); 1146 bool validateMIMGDataSize(const MCInst &Inst); 1147 bool validateMIMGAddrSize(const MCInst &Inst); 1148 bool validateMIMGD16(const MCInst &Inst); 1149 bool validateMIMGDim(const MCInst &Inst); 1150 bool validateLdsDirect(const MCInst &Inst); 1151 bool validateVOP3Literal(const MCInst &Inst) const; 1152 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1153 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1154 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1155 1156 bool isId(const StringRef Id) const; 1157 bool isId(const AsmToken &Token, const StringRef Id) const; 1158 bool isToken(const AsmToken::TokenKind Kind) const; 1159 bool trySkipId(const StringRef Id); 1160 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1161 bool trySkipToken(const AsmToken::TokenKind Kind); 1162 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1163 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1164 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1165 AsmToken::TokenKind getTokenKind() const; 1166 bool parseExpr(int64_t &Imm); 1167 StringRef getTokenStr() const; 1168 AsmToken peekToken(); 1169 AsmToken getToken() const; 1170 SMLoc getLoc() const; 1171 void lex(); 1172 1173 public: 1174 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1175 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1176 1177 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1178 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1179 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1180 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1181 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1182 1183 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1184 const unsigned MinVal, 1185 const unsigned MaxVal, 1186 const StringRef ErrMsg); 1187 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1188 bool parseSwizzleOffset(int64_t &Imm); 1189 bool parseSwizzleMacro(int64_t &Imm); 1190 bool parseSwizzleQuadPerm(int64_t &Imm); 1191 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1192 bool parseSwizzleBroadcast(int64_t &Imm); 1193 bool parseSwizzleSwap(int64_t &Imm); 1194 bool parseSwizzleReverse(int64_t &Imm); 1195 1196 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1197 int64_t parseGPRIdxMacro(); 1198 1199 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1200 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1201 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1202 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1203 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1204 1205 AMDGPUOperand::Ptr defaultDLC() const; 1206 AMDGPUOperand::Ptr defaultGLC() const; 1207 AMDGPUOperand::Ptr defaultSLC() const; 1208 1209 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1210 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1211 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1212 AMDGPUOperand::Ptr defaultOffsetU12() const; 1213 AMDGPUOperand::Ptr defaultOffsetS13() const; 1214 1215 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1216 1217 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1218 OptionalImmIndexMap &OptionalIdx); 1219 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1220 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1221 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1222 1223 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1224 1225 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1226 bool IsAtomic = false); 1227 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1228 1229 OperandMatchResultTy parseDim(OperandVector &Operands); 1230 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1231 AMDGPUOperand::Ptr defaultRowMask() const; 1232 AMDGPUOperand::Ptr defaultBankMask() const; 1233 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1234 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1235 1236 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1237 AMDGPUOperand::ImmTy Type); 1238 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1239 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1240 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1241 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1242 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1243 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1244 uint64_t BasicInstType, bool skipVcc = false); 1245 1246 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1247 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1248 }; 1249 1250 struct OptionalOperand { 1251 const char *Name; 1252 AMDGPUOperand::ImmTy Type; 1253 bool IsBit; 1254 bool (*ConvertResult)(int64_t&); 1255 }; 1256 1257 } // end anonymous namespace 1258 1259 // May be called with integer type with equivalent bitwidth. 1260 static const fltSemantics *getFltSemantics(unsigned Size) { 1261 switch (Size) { 1262 case 4: 1263 return &APFloat::IEEEsingle(); 1264 case 8: 1265 return &APFloat::IEEEdouble(); 1266 case 2: 1267 return &APFloat::IEEEhalf(); 1268 default: 1269 llvm_unreachable("unsupported fp type"); 1270 } 1271 } 1272 1273 static const fltSemantics *getFltSemantics(MVT VT) { 1274 return getFltSemantics(VT.getSizeInBits() / 8); 1275 } 1276 1277 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1278 switch (OperandType) { 1279 case AMDGPU::OPERAND_REG_IMM_INT32: 1280 case AMDGPU::OPERAND_REG_IMM_FP32: 1281 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1282 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1283 return &APFloat::IEEEsingle(); 1284 case AMDGPU::OPERAND_REG_IMM_INT64: 1285 case AMDGPU::OPERAND_REG_IMM_FP64: 1286 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1287 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1288 return &APFloat::IEEEdouble(); 1289 case AMDGPU::OPERAND_REG_IMM_INT16: 1290 case AMDGPU::OPERAND_REG_IMM_FP16: 1291 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1292 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1293 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1294 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1295 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1296 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1297 return &APFloat::IEEEhalf(); 1298 default: 1299 llvm_unreachable("unsupported fp type"); 1300 } 1301 } 1302 1303 //===----------------------------------------------------------------------===// 1304 // Operand 1305 //===----------------------------------------------------------------------===// 1306 1307 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1308 bool Lost; 1309 1310 // Convert literal to single precision 1311 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1312 APFloat::rmNearestTiesToEven, 1313 &Lost); 1314 // We allow precision lost but not overflow or underflow 1315 if (Status != APFloat::opOK && 1316 Lost && 1317 ((Status & APFloat::opOverflow) != 0 || 1318 (Status & APFloat::opUnderflow) != 0)) { 1319 return false; 1320 } 1321 1322 return true; 1323 } 1324 1325 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1326 return isUIntN(Size, Val) || isIntN(Size, Val); 1327 } 1328 1329 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1330 1331 // This is a hack to enable named inline values like 1332 // shared_base with both 32-bit and 64-bit operands. 1333 // Note that these values are defined as 1334 // 32-bit operands only. 1335 if (isInlineValue()) { 1336 return true; 1337 } 1338 1339 if (!isImmTy(ImmTyNone)) { 1340 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1341 return false; 1342 } 1343 // TODO: We should avoid using host float here. It would be better to 1344 // check the float bit values which is what a few other places do. 1345 // We've had bot failures before due to weird NaN support on mips hosts. 1346 1347 APInt Literal(64, Imm.Val); 1348 1349 if (Imm.IsFPImm) { // We got fp literal token 1350 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1351 return AMDGPU::isInlinableLiteral64(Imm.Val, 1352 AsmParser->hasInv2PiInlineImm()); 1353 } 1354 1355 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1356 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1357 return false; 1358 1359 if (type.getScalarSizeInBits() == 16) { 1360 return AMDGPU::isInlinableLiteral16( 1361 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1362 AsmParser->hasInv2PiInlineImm()); 1363 } 1364 1365 // Check if single precision literal is inlinable 1366 return AMDGPU::isInlinableLiteral32( 1367 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1368 AsmParser->hasInv2PiInlineImm()); 1369 } 1370 1371 // We got int literal token. 1372 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1373 return AMDGPU::isInlinableLiteral64(Imm.Val, 1374 AsmParser->hasInv2PiInlineImm()); 1375 } 1376 1377 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1378 return false; 1379 } 1380 1381 if (type.getScalarSizeInBits() == 16) { 1382 return AMDGPU::isInlinableLiteral16( 1383 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1384 AsmParser->hasInv2PiInlineImm()); 1385 } 1386 1387 return AMDGPU::isInlinableLiteral32( 1388 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1389 AsmParser->hasInv2PiInlineImm()); 1390 } 1391 1392 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1393 // Check that this immediate can be added as literal 1394 if (!isImmTy(ImmTyNone)) { 1395 return false; 1396 } 1397 1398 if (!Imm.IsFPImm) { 1399 // We got int literal token. 1400 1401 if (type == MVT::f64 && hasFPModifiers()) { 1402 // Cannot apply fp modifiers to int literals preserving the same semantics 1403 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1404 // disable these cases. 1405 return false; 1406 } 1407 1408 unsigned Size = type.getSizeInBits(); 1409 if (Size == 64) 1410 Size = 32; 1411 1412 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1413 // types. 1414 return isSafeTruncation(Imm.Val, Size); 1415 } 1416 1417 // We got fp literal token 1418 if (type == MVT::f64) { // Expected 64-bit fp operand 1419 // We would set low 64-bits of literal to zeroes but we accept this literals 1420 return true; 1421 } 1422 1423 if (type == MVT::i64) { // Expected 64-bit int operand 1424 // We don't allow fp literals in 64-bit integer instructions. It is 1425 // unclear how we should encode them. 1426 return false; 1427 } 1428 1429 // We allow fp literals with f16x2 operands assuming that the specified 1430 // literal goes into the lower half and the upper half is zero. We also 1431 // require that the literal may be losslesly converted to f16. 1432 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1433 (type == MVT::v2i16)? MVT::i16 : type; 1434 1435 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1436 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1437 } 1438 1439 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1440 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1441 } 1442 1443 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1444 if (AsmParser->isVI()) 1445 return isVReg32(); 1446 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1447 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1448 else 1449 return false; 1450 } 1451 1452 bool AMDGPUOperand::isSDWAFP16Operand() const { 1453 return isSDWAOperand(MVT::f16); 1454 } 1455 1456 bool AMDGPUOperand::isSDWAFP32Operand() const { 1457 return isSDWAOperand(MVT::f32); 1458 } 1459 1460 bool AMDGPUOperand::isSDWAInt16Operand() const { 1461 return isSDWAOperand(MVT::i16); 1462 } 1463 1464 bool AMDGPUOperand::isSDWAInt32Operand() const { 1465 return isSDWAOperand(MVT::i32); 1466 } 1467 1468 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1469 { 1470 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1471 assert(Size == 2 || Size == 4 || Size == 8); 1472 1473 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1474 1475 if (Imm.Mods.Abs) { 1476 Val &= ~FpSignMask; 1477 } 1478 if (Imm.Mods.Neg) { 1479 Val ^= FpSignMask; 1480 } 1481 1482 return Val; 1483 } 1484 1485 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1486 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1487 Inst.getNumOperands())) { 1488 addLiteralImmOperand(Inst, Imm.Val, 1489 ApplyModifiers & 1490 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1491 } else { 1492 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1493 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1494 } 1495 } 1496 1497 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1498 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1499 auto OpNum = Inst.getNumOperands(); 1500 // Check that this operand accepts literals 1501 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1502 1503 if (ApplyModifiers) { 1504 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1505 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1506 Val = applyInputFPModifiers(Val, Size); 1507 } 1508 1509 APInt Literal(64, Val); 1510 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1511 1512 if (Imm.IsFPImm) { // We got fp literal token 1513 switch (OpTy) { 1514 case AMDGPU::OPERAND_REG_IMM_INT64: 1515 case AMDGPU::OPERAND_REG_IMM_FP64: 1516 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1517 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1518 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1519 AsmParser->hasInv2PiInlineImm())) { 1520 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1521 return; 1522 } 1523 1524 // Non-inlineable 1525 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1526 // For fp operands we check if low 32 bits are zeros 1527 if (Literal.getLoBits(32) != 0) { 1528 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1529 "Can't encode literal as exact 64-bit floating-point operand. " 1530 "Low 32-bits will be set to zero"); 1531 } 1532 1533 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1534 return; 1535 } 1536 1537 // We don't allow fp literals in 64-bit integer instructions. It is 1538 // unclear how we should encode them. This case should be checked earlier 1539 // in predicate methods (isLiteralImm()) 1540 llvm_unreachable("fp literal in 64-bit integer instruction."); 1541 1542 case AMDGPU::OPERAND_REG_IMM_INT32: 1543 case AMDGPU::OPERAND_REG_IMM_FP32: 1544 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1545 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1546 case AMDGPU::OPERAND_REG_IMM_INT16: 1547 case AMDGPU::OPERAND_REG_IMM_FP16: 1548 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1549 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1550 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1551 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1552 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1553 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1554 bool lost; 1555 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1556 // Convert literal to single precision 1557 FPLiteral.convert(*getOpFltSemantics(OpTy), 1558 APFloat::rmNearestTiesToEven, &lost); 1559 // We allow precision lost but not overflow or underflow. This should be 1560 // checked earlier in isLiteralImm() 1561 1562 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1563 Inst.addOperand(MCOperand::createImm(ImmVal)); 1564 return; 1565 } 1566 default: 1567 llvm_unreachable("invalid operand size"); 1568 } 1569 1570 return; 1571 } 1572 1573 // We got int literal token. 1574 // Only sign extend inline immediates. 1575 switch (OpTy) { 1576 case AMDGPU::OPERAND_REG_IMM_INT32: 1577 case AMDGPU::OPERAND_REG_IMM_FP32: 1578 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1579 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1580 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1581 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1582 if (isSafeTruncation(Val, 32) && 1583 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1584 AsmParser->hasInv2PiInlineImm())) { 1585 Inst.addOperand(MCOperand::createImm(Val)); 1586 return; 1587 } 1588 1589 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1590 return; 1591 1592 case AMDGPU::OPERAND_REG_IMM_INT64: 1593 case AMDGPU::OPERAND_REG_IMM_FP64: 1594 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1595 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1596 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1597 Inst.addOperand(MCOperand::createImm(Val)); 1598 return; 1599 } 1600 1601 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1602 return; 1603 1604 case AMDGPU::OPERAND_REG_IMM_INT16: 1605 case AMDGPU::OPERAND_REG_IMM_FP16: 1606 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1607 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1608 if (isSafeTruncation(Val, 16) && 1609 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1610 AsmParser->hasInv2PiInlineImm())) { 1611 Inst.addOperand(MCOperand::createImm(Val)); 1612 return; 1613 } 1614 1615 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1616 return; 1617 1618 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1619 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1620 assert(isSafeTruncation(Val, 16)); 1621 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1622 AsmParser->hasInv2PiInlineImm())); 1623 1624 Inst.addOperand(MCOperand::createImm(Val)); 1625 return; 1626 } 1627 default: 1628 llvm_unreachable("invalid operand size"); 1629 } 1630 } 1631 1632 template <unsigned Bitwidth> 1633 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1634 APInt Literal(64, Imm.Val); 1635 1636 if (!Imm.IsFPImm) { 1637 // We got int literal token. 1638 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1639 return; 1640 } 1641 1642 bool Lost; 1643 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1644 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1645 APFloat::rmNearestTiesToEven, &Lost); 1646 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1647 } 1648 1649 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1650 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1651 } 1652 1653 static bool isInlineValue(unsigned Reg) { 1654 switch (Reg) { 1655 case AMDGPU::SRC_SHARED_BASE: 1656 case AMDGPU::SRC_SHARED_LIMIT: 1657 case AMDGPU::SRC_PRIVATE_BASE: 1658 case AMDGPU::SRC_PRIVATE_LIMIT: 1659 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1660 return true; 1661 default: 1662 return false; 1663 } 1664 } 1665 1666 bool AMDGPUOperand::isInlineValue() const { 1667 return isRegKind() && ::isInlineValue(getReg()); 1668 } 1669 1670 //===----------------------------------------------------------------------===// 1671 // AsmParser 1672 //===----------------------------------------------------------------------===// 1673 1674 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1675 if (Is == IS_VGPR) { 1676 switch (RegWidth) { 1677 default: return -1; 1678 case 1: return AMDGPU::VGPR_32RegClassID; 1679 case 2: return AMDGPU::VReg_64RegClassID; 1680 case 3: return AMDGPU::VReg_96RegClassID; 1681 case 4: return AMDGPU::VReg_128RegClassID; 1682 case 8: return AMDGPU::VReg_256RegClassID; 1683 case 16: return AMDGPU::VReg_512RegClassID; 1684 } 1685 } else if (Is == IS_TTMP) { 1686 switch (RegWidth) { 1687 default: return -1; 1688 case 1: return AMDGPU::TTMP_32RegClassID; 1689 case 2: return AMDGPU::TTMP_64RegClassID; 1690 case 4: return AMDGPU::TTMP_128RegClassID; 1691 case 8: return AMDGPU::TTMP_256RegClassID; 1692 case 16: return AMDGPU::TTMP_512RegClassID; 1693 } 1694 } else if (Is == IS_SGPR) { 1695 switch (RegWidth) { 1696 default: return -1; 1697 case 1: return AMDGPU::SGPR_32RegClassID; 1698 case 2: return AMDGPU::SGPR_64RegClassID; 1699 case 4: return AMDGPU::SGPR_128RegClassID; 1700 case 8: return AMDGPU::SGPR_256RegClassID; 1701 case 16: return AMDGPU::SGPR_512RegClassID; 1702 } 1703 } 1704 return -1; 1705 } 1706 1707 static unsigned getSpecialRegForName(StringRef RegName) { 1708 return StringSwitch<unsigned>(RegName) 1709 .Case("exec", AMDGPU::EXEC) 1710 .Case("vcc", AMDGPU::VCC) 1711 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1712 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1713 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1714 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1715 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1716 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1717 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1718 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1719 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1720 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1721 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1722 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1723 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1724 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1725 .Case("m0", AMDGPU::M0) 1726 .Case("scc", AMDGPU::SCC) 1727 .Case("tba", AMDGPU::TBA) 1728 .Case("tma", AMDGPU::TMA) 1729 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1730 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1731 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1732 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1733 .Case("vcc_lo", AMDGPU::VCC_LO) 1734 .Case("vcc_hi", AMDGPU::VCC_HI) 1735 .Case("exec_lo", AMDGPU::EXEC_LO) 1736 .Case("exec_hi", AMDGPU::EXEC_HI) 1737 .Case("tma_lo", AMDGPU::TMA_LO) 1738 .Case("tma_hi", AMDGPU::TMA_HI) 1739 .Case("tba_lo", AMDGPU::TBA_LO) 1740 .Case("tba_hi", AMDGPU::TBA_HI) 1741 .Case("null", AMDGPU::SGPR_NULL) 1742 .Default(0); 1743 } 1744 1745 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1746 SMLoc &EndLoc) { 1747 auto R = parseRegister(); 1748 if (!R) return true; 1749 assert(R->isReg()); 1750 RegNo = R->getReg(); 1751 StartLoc = R->getStartLoc(); 1752 EndLoc = R->getEndLoc(); 1753 return false; 1754 } 1755 1756 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1757 RegisterKind RegKind, unsigned Reg1, 1758 unsigned RegNum) { 1759 switch (RegKind) { 1760 case IS_SPECIAL: 1761 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1762 Reg = AMDGPU::EXEC; 1763 RegWidth = 2; 1764 return true; 1765 } 1766 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1767 Reg = AMDGPU::FLAT_SCR; 1768 RegWidth = 2; 1769 return true; 1770 } 1771 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1772 Reg = AMDGPU::XNACK_MASK; 1773 RegWidth = 2; 1774 return true; 1775 } 1776 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1777 Reg = AMDGPU::VCC; 1778 RegWidth = 2; 1779 return true; 1780 } 1781 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1782 Reg = AMDGPU::TBA; 1783 RegWidth = 2; 1784 return true; 1785 } 1786 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1787 Reg = AMDGPU::TMA; 1788 RegWidth = 2; 1789 return true; 1790 } 1791 return false; 1792 case IS_VGPR: 1793 case IS_SGPR: 1794 case IS_TTMP: 1795 if (Reg1 != Reg + RegWidth) { 1796 return false; 1797 } 1798 RegWidth++; 1799 return true; 1800 default: 1801 llvm_unreachable("unexpected register kind"); 1802 } 1803 } 1804 1805 static const StringRef Registers[] = { 1806 { "v" }, 1807 { "s" }, 1808 { "ttmp" }, 1809 }; 1810 1811 bool 1812 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1813 const AsmToken &NextToken) const { 1814 1815 // A list of consecutive registers: [s0,s1,s2,s3] 1816 if (Token.is(AsmToken::LBrac)) 1817 return true; 1818 1819 if (!Token.is(AsmToken::Identifier)) 1820 return false; 1821 1822 // A single register like s0 or a range of registers like s[0:1] 1823 1824 StringRef RegName = Token.getString(); 1825 1826 for (StringRef Reg : Registers) { 1827 if (RegName.startswith(Reg)) { 1828 if (Reg.size() < RegName.size()) { 1829 unsigned RegNum; 1830 // A single register with an index: rXX 1831 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1832 return true; 1833 } else { 1834 // A range of registers: r[XX:YY]. 1835 if (NextToken.is(AsmToken::LBrac)) 1836 return true; 1837 } 1838 } 1839 } 1840 1841 return getSpecialRegForName(RegName); 1842 } 1843 1844 bool 1845 AMDGPUAsmParser::isRegister() 1846 { 1847 return isRegister(getToken(), peekToken()); 1848 } 1849 1850 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1851 unsigned &RegNum, unsigned &RegWidth, 1852 unsigned *DwordRegIndex) { 1853 if (DwordRegIndex) { *DwordRegIndex = 0; } 1854 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1855 if (getLexer().is(AsmToken::Identifier)) { 1856 StringRef RegName = Parser.getTok().getString(); 1857 if ((Reg = getSpecialRegForName(RegName))) { 1858 Parser.Lex(); 1859 RegKind = IS_SPECIAL; 1860 } else { 1861 unsigned RegNumIndex = 0; 1862 if (RegName[0] == 'v') { 1863 RegNumIndex = 1; 1864 RegKind = IS_VGPR; 1865 } else if (RegName[0] == 's') { 1866 RegNumIndex = 1; 1867 RegKind = IS_SGPR; 1868 } else if (RegName.startswith("ttmp")) { 1869 RegNumIndex = strlen("ttmp"); 1870 RegKind = IS_TTMP; 1871 } else { 1872 return false; 1873 } 1874 if (RegName.size() > RegNumIndex) { 1875 // Single 32-bit register: vXX. 1876 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1877 return false; 1878 Parser.Lex(); 1879 RegWidth = 1; 1880 } else { 1881 // Range of registers: v[XX:YY]. ":YY" is optional. 1882 Parser.Lex(); 1883 int64_t RegLo, RegHi; 1884 if (getLexer().isNot(AsmToken::LBrac)) 1885 return false; 1886 Parser.Lex(); 1887 1888 if (getParser().parseAbsoluteExpression(RegLo)) 1889 return false; 1890 1891 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1892 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1893 return false; 1894 Parser.Lex(); 1895 1896 if (isRBrace) { 1897 RegHi = RegLo; 1898 } else { 1899 if (getParser().parseAbsoluteExpression(RegHi)) 1900 return false; 1901 1902 if (getLexer().isNot(AsmToken::RBrac)) 1903 return false; 1904 Parser.Lex(); 1905 } 1906 RegNum = (unsigned) RegLo; 1907 RegWidth = (RegHi - RegLo) + 1; 1908 } 1909 } 1910 } else if (getLexer().is(AsmToken::LBrac)) { 1911 // List of consecutive registers: [s0,s1,s2,s3] 1912 Parser.Lex(); 1913 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1914 return false; 1915 if (RegWidth != 1) 1916 return false; 1917 RegisterKind RegKind1; 1918 unsigned Reg1, RegNum1, RegWidth1; 1919 do { 1920 if (getLexer().is(AsmToken::Comma)) { 1921 Parser.Lex(); 1922 } else if (getLexer().is(AsmToken::RBrac)) { 1923 Parser.Lex(); 1924 break; 1925 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1926 if (RegWidth1 != 1) { 1927 return false; 1928 } 1929 if (RegKind1 != RegKind) { 1930 return false; 1931 } 1932 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1933 return false; 1934 } 1935 } else { 1936 return false; 1937 } 1938 } while (true); 1939 } else { 1940 return false; 1941 } 1942 switch (RegKind) { 1943 case IS_SPECIAL: 1944 RegNum = 0; 1945 RegWidth = 1; 1946 break; 1947 case IS_VGPR: 1948 case IS_SGPR: 1949 case IS_TTMP: 1950 { 1951 unsigned Size = 1; 1952 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1953 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1954 Size = std::min(RegWidth, 4u); 1955 } 1956 if (RegNum % Size != 0) 1957 return false; 1958 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1959 RegNum = RegNum / Size; 1960 int RCID = getRegClass(RegKind, RegWidth); 1961 if (RCID == -1) 1962 return false; 1963 const MCRegisterClass RC = TRI->getRegClass(RCID); 1964 if (RegNum >= RC.getNumRegs()) 1965 return false; 1966 Reg = RC.getRegister(RegNum); 1967 break; 1968 } 1969 1970 default: 1971 llvm_unreachable("unexpected register kind"); 1972 } 1973 1974 if (!subtargetHasRegister(*TRI, Reg)) 1975 return false; 1976 return true; 1977 } 1978 1979 Optional<StringRef> 1980 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1981 switch (RegKind) { 1982 case IS_VGPR: 1983 return StringRef(".amdgcn.next_free_vgpr"); 1984 case IS_SGPR: 1985 return StringRef(".amdgcn.next_free_sgpr"); 1986 default: 1987 return None; 1988 } 1989 } 1990 1991 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1992 auto SymbolName = getGprCountSymbolName(RegKind); 1993 assert(SymbolName && "initializing invalid register kind"); 1994 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1995 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1996 } 1997 1998 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1999 unsigned DwordRegIndex, 2000 unsigned RegWidth) { 2001 // Symbols are only defined for GCN targets 2002 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2003 return true; 2004 2005 auto SymbolName = getGprCountSymbolName(RegKind); 2006 if (!SymbolName) 2007 return true; 2008 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2009 2010 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2011 int64_t OldCount; 2012 2013 if (!Sym->isVariable()) 2014 return !Error(getParser().getTok().getLoc(), 2015 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2016 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2017 return !Error( 2018 getParser().getTok().getLoc(), 2019 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2020 2021 if (OldCount <= NewMax) 2022 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2023 2024 return true; 2025 } 2026 2027 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2028 const auto &Tok = Parser.getTok(); 2029 SMLoc StartLoc = Tok.getLoc(); 2030 SMLoc EndLoc = Tok.getEndLoc(); 2031 RegisterKind RegKind; 2032 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2033 2034 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2035 //FIXME: improve error messages (bug 41303). 2036 Error(StartLoc, "not a valid operand."); 2037 return nullptr; 2038 } 2039 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2040 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2041 return nullptr; 2042 } else 2043 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2044 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2045 } 2046 2047 OperandMatchResultTy 2048 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2049 // TODO: add syntactic sugar for 1/(2*PI) 2050 2051 assert(!isRegister()); 2052 assert(!isModifier()); 2053 2054 const auto& Tok = getToken(); 2055 const auto& NextTok = peekToken(); 2056 bool IsReal = Tok.is(AsmToken::Real); 2057 SMLoc S = getLoc(); 2058 bool Negate = false; 2059 2060 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2061 lex(); 2062 IsReal = true; 2063 Negate = true; 2064 } 2065 2066 if (IsReal) { 2067 // Floating-point expressions are not supported. 2068 // Can only allow floating-point literals with an 2069 // optional sign. 2070 2071 StringRef Num = getTokenStr(); 2072 lex(); 2073 2074 APFloat RealVal(APFloat::IEEEdouble()); 2075 auto roundMode = APFloat::rmNearestTiesToEven; 2076 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2077 return MatchOperand_ParseFail; 2078 } 2079 if (Negate) 2080 RealVal.changeSign(); 2081 2082 Operands.push_back( 2083 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2084 AMDGPUOperand::ImmTyNone, true)); 2085 2086 return MatchOperand_Success; 2087 2088 } else { 2089 int64_t IntVal; 2090 const MCExpr *Expr; 2091 SMLoc S = getLoc(); 2092 2093 if (HasSP3AbsModifier) { 2094 // This is a workaround for handling expressions 2095 // as arguments of SP3 'abs' modifier, for example: 2096 // |1.0| 2097 // |-1| 2098 // |1+x| 2099 // This syntax is not compatible with syntax of standard 2100 // MC expressions (due to the trailing '|'). 2101 SMLoc EndLoc; 2102 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2103 return MatchOperand_ParseFail; 2104 } else { 2105 if (Parser.parseExpression(Expr)) 2106 return MatchOperand_ParseFail; 2107 } 2108 2109 if (Expr->evaluateAsAbsolute(IntVal)) { 2110 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2111 } else { 2112 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2113 } 2114 2115 return MatchOperand_Success; 2116 } 2117 2118 return MatchOperand_NoMatch; 2119 } 2120 2121 OperandMatchResultTy 2122 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2123 if (!isRegister()) 2124 return MatchOperand_NoMatch; 2125 2126 if (auto R = parseRegister()) { 2127 assert(R->isReg()); 2128 Operands.push_back(std::move(R)); 2129 return MatchOperand_Success; 2130 } 2131 return MatchOperand_ParseFail; 2132 } 2133 2134 OperandMatchResultTy 2135 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2136 auto res = parseReg(Operands); 2137 if (res != MatchOperand_NoMatch) { 2138 return res; 2139 } else if (isModifier()) { 2140 return MatchOperand_NoMatch; 2141 } else { 2142 return parseImm(Operands, HasSP3AbsMod); 2143 } 2144 } 2145 2146 bool 2147 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2148 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2149 const auto &str = Token.getString(); 2150 return str == "abs" || str == "neg" || str == "sext"; 2151 } 2152 return false; 2153 } 2154 2155 bool 2156 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2157 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2158 } 2159 2160 bool 2161 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2162 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2163 } 2164 2165 bool 2166 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2167 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2168 } 2169 2170 // Check if this is an operand modifier or an opcode modifier 2171 // which may look like an expression but it is not. We should 2172 // avoid parsing these modifiers as expressions. Currently 2173 // recognized sequences are: 2174 // |...| 2175 // abs(...) 2176 // neg(...) 2177 // sext(...) 2178 // -reg 2179 // -|...| 2180 // -abs(...) 2181 // name:... 2182 // Note that simple opcode modifiers like 'gds' may be parsed as 2183 // expressions; this is a special case. See getExpressionAsToken. 2184 // 2185 bool 2186 AMDGPUAsmParser::isModifier() { 2187 2188 AsmToken Tok = getToken(); 2189 AsmToken NextToken[2]; 2190 peekTokens(NextToken); 2191 2192 return isOperandModifier(Tok, NextToken[0]) || 2193 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2194 isOpcodeModifierWithVal(Tok, NextToken[0]); 2195 } 2196 2197 // Check if the current token is an SP3 'neg' modifier. 2198 // Currently this modifier is allowed in the following context: 2199 // 2200 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2201 // 2. Before an 'abs' modifier: -abs(...) 2202 // 3. Before an SP3 'abs' modifier: -|...| 2203 // 2204 // In all other cases "-" is handled as a part 2205 // of an expression that follows the sign. 2206 // 2207 // Note: When "-" is followed by an integer literal, 2208 // this is interpreted as integer negation rather 2209 // than a floating-point NEG modifier applied to N. 2210 // Beside being contr-intuitive, such use of floating-point 2211 // NEG modifier would have resulted in different meaning 2212 // of integer literals used with VOP1/2/C and VOP3, 2213 // for example: 2214 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2215 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2216 // Negative fp literals with preceding "-" are 2217 // handled likewise for unifomtity 2218 // 2219 bool 2220 AMDGPUAsmParser::parseSP3NegModifier() { 2221 2222 AsmToken NextToken[2]; 2223 peekTokens(NextToken); 2224 2225 if (isToken(AsmToken::Minus) && 2226 (isRegister(NextToken[0], NextToken[1]) || 2227 NextToken[0].is(AsmToken::Pipe) || 2228 isId(NextToken[0], "abs"))) { 2229 lex(); 2230 return true; 2231 } 2232 2233 return false; 2234 } 2235 2236 OperandMatchResultTy 2237 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2238 bool AllowImm) { 2239 bool Neg, SP3Neg; 2240 bool Abs, SP3Abs; 2241 SMLoc Loc; 2242 2243 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2244 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2245 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2246 return MatchOperand_ParseFail; 2247 } 2248 2249 SP3Neg = parseSP3NegModifier(); 2250 2251 Loc = getLoc(); 2252 Neg = trySkipId("neg"); 2253 if (Neg && SP3Neg) { 2254 Error(Loc, "expected register or immediate"); 2255 return MatchOperand_ParseFail; 2256 } 2257 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2258 return MatchOperand_ParseFail; 2259 2260 Abs = trySkipId("abs"); 2261 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2262 return MatchOperand_ParseFail; 2263 2264 Loc = getLoc(); 2265 SP3Abs = trySkipToken(AsmToken::Pipe); 2266 if (Abs && SP3Abs) { 2267 Error(Loc, "expected register or immediate"); 2268 return MatchOperand_ParseFail; 2269 } 2270 2271 OperandMatchResultTy Res; 2272 if (AllowImm) { 2273 Res = parseRegOrImm(Operands, SP3Abs); 2274 } else { 2275 Res = parseReg(Operands); 2276 } 2277 if (Res != MatchOperand_Success) { 2278 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2279 } 2280 2281 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2282 return MatchOperand_ParseFail; 2283 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2284 return MatchOperand_ParseFail; 2285 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2286 return MatchOperand_ParseFail; 2287 2288 AMDGPUOperand::Modifiers Mods; 2289 Mods.Abs = Abs || SP3Abs; 2290 Mods.Neg = Neg || SP3Neg; 2291 2292 if (Mods.hasFPModifiers()) { 2293 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2294 if (Op.isExpr()) { 2295 Error(Op.getStartLoc(), "expected an absolute expression"); 2296 return MatchOperand_ParseFail; 2297 } 2298 Op.setModifiers(Mods); 2299 } 2300 return MatchOperand_Success; 2301 } 2302 2303 OperandMatchResultTy 2304 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2305 bool AllowImm) { 2306 bool Sext = trySkipId("sext"); 2307 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2308 return MatchOperand_ParseFail; 2309 2310 OperandMatchResultTy Res; 2311 if (AllowImm) { 2312 Res = parseRegOrImm(Operands); 2313 } else { 2314 Res = parseReg(Operands); 2315 } 2316 if (Res != MatchOperand_Success) { 2317 return Sext? MatchOperand_ParseFail : Res; 2318 } 2319 2320 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2321 return MatchOperand_ParseFail; 2322 2323 AMDGPUOperand::Modifiers Mods; 2324 Mods.Sext = Sext; 2325 2326 if (Mods.hasIntModifiers()) { 2327 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2328 if (Op.isExpr()) { 2329 Error(Op.getStartLoc(), "expected an absolute expression"); 2330 return MatchOperand_ParseFail; 2331 } 2332 Op.setModifiers(Mods); 2333 } 2334 2335 return MatchOperand_Success; 2336 } 2337 2338 OperandMatchResultTy 2339 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2340 return parseRegOrImmWithFPInputMods(Operands, false); 2341 } 2342 2343 OperandMatchResultTy 2344 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2345 return parseRegOrImmWithIntInputMods(Operands, false); 2346 } 2347 2348 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2349 auto Loc = getLoc(); 2350 if (trySkipId("off")) { 2351 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2352 AMDGPUOperand::ImmTyOff, false)); 2353 return MatchOperand_Success; 2354 } 2355 2356 if (!isRegister()) 2357 return MatchOperand_NoMatch; 2358 2359 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2360 if (Reg) { 2361 Operands.push_back(std::move(Reg)); 2362 return MatchOperand_Success; 2363 } 2364 2365 return MatchOperand_ParseFail; 2366 2367 } 2368 2369 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2370 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2371 2372 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2373 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2374 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2375 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2376 return Match_InvalidOperand; 2377 2378 if ((TSFlags & SIInstrFlags::VOP3) && 2379 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2380 getForcedEncodingSize() != 64) 2381 return Match_PreferE32; 2382 2383 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2384 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2385 // v_mac_f32/16 allow only dst_sel == DWORD; 2386 auto OpNum = 2387 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2388 const auto &Op = Inst.getOperand(OpNum); 2389 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2390 return Match_InvalidOperand; 2391 } 2392 } 2393 2394 if (TSFlags & SIInstrFlags::FLAT) { 2395 // FIXME: Produces error without correct column reported. 2396 auto Opcode = Inst.getOpcode(); 2397 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 2398 2399 const auto &Op = Inst.getOperand(OpNum); 2400 if (!hasFlatOffsets() && Op.getImm() != 0) 2401 return Match_InvalidOperand; 2402 2403 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for 2404 // FLAT segment. For FLAT segment MSB is ignored and forced to zero. 2405 if (isGFX10()) { 2406 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 2407 if (!isInt<12>(Op.getImm())) 2408 return Match_InvalidOperand; 2409 } else { 2410 if (!isUInt<11>(Op.getImm())) 2411 return Match_InvalidOperand; 2412 } 2413 } 2414 } 2415 2416 return Match_Success; 2417 } 2418 2419 // What asm variants we should check 2420 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2421 if (getForcedEncodingSize() == 32) { 2422 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2423 return makeArrayRef(Variants); 2424 } 2425 2426 if (isForcedVOP3()) { 2427 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2428 return makeArrayRef(Variants); 2429 } 2430 2431 if (isForcedSDWA()) { 2432 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2433 AMDGPUAsmVariants::SDWA9}; 2434 return makeArrayRef(Variants); 2435 } 2436 2437 if (isForcedDPP()) { 2438 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2439 return makeArrayRef(Variants); 2440 } 2441 2442 static const unsigned Variants[] = { 2443 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2444 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2445 }; 2446 2447 return makeArrayRef(Variants); 2448 } 2449 2450 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2451 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2452 const unsigned Num = Desc.getNumImplicitUses(); 2453 for (unsigned i = 0; i < Num; ++i) { 2454 unsigned Reg = Desc.ImplicitUses[i]; 2455 switch (Reg) { 2456 case AMDGPU::FLAT_SCR: 2457 case AMDGPU::VCC: 2458 case AMDGPU::VCC_LO: 2459 case AMDGPU::VCC_HI: 2460 case AMDGPU::M0: 2461 case AMDGPU::SGPR_NULL: 2462 return Reg; 2463 default: 2464 break; 2465 } 2466 } 2467 return AMDGPU::NoRegister; 2468 } 2469 2470 // NB: This code is correct only when used to check constant 2471 // bus limitations because GFX7 support no f16 inline constants. 2472 // Note that there are no cases when a GFX7 opcode violates 2473 // constant bus limitations due to the use of an f16 constant. 2474 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2475 unsigned OpIdx) const { 2476 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2477 2478 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2479 return false; 2480 } 2481 2482 const MCOperand &MO = Inst.getOperand(OpIdx); 2483 2484 int64_t Val = MO.getImm(); 2485 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2486 2487 switch (OpSize) { // expected operand size 2488 case 8: 2489 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2490 case 4: 2491 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2492 case 2: { 2493 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2494 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2495 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2496 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2497 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2498 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2499 } else { 2500 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2501 } 2502 } 2503 default: 2504 llvm_unreachable("invalid operand size"); 2505 } 2506 } 2507 2508 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2509 const MCOperand &MO = Inst.getOperand(OpIdx); 2510 if (MO.isImm()) { 2511 return !isInlineConstant(Inst, OpIdx); 2512 } 2513 return !MO.isReg() || 2514 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2515 } 2516 2517 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2518 const unsigned Opcode = Inst.getOpcode(); 2519 const MCInstrDesc &Desc = MII.get(Opcode); 2520 unsigned ConstantBusUseCount = 0; 2521 unsigned NumLiterals = 0; 2522 unsigned LiteralSize; 2523 2524 if (Desc.TSFlags & 2525 (SIInstrFlags::VOPC | 2526 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2527 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2528 SIInstrFlags::SDWA)) { 2529 // Check special imm operands (used by madmk, etc) 2530 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2531 ++ConstantBusUseCount; 2532 } 2533 2534 SmallDenseSet<unsigned> SGPRsUsed; 2535 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2536 if (SGPRUsed != AMDGPU::NoRegister) { 2537 SGPRsUsed.insert(SGPRUsed); 2538 ++ConstantBusUseCount; 2539 } 2540 2541 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2542 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2543 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2544 2545 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2546 2547 for (int OpIdx : OpIndices) { 2548 if (OpIdx == -1) break; 2549 2550 const MCOperand &MO = Inst.getOperand(OpIdx); 2551 if (usesConstantBus(Inst, OpIdx)) { 2552 if (MO.isReg()) { 2553 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2554 // Pairs of registers with a partial intersections like these 2555 // s0, s[0:1] 2556 // flat_scratch_lo, flat_scratch 2557 // flat_scratch_lo, flat_scratch_hi 2558 // are theoretically valid but they are disabled anyway. 2559 // Note that this code mimics SIInstrInfo::verifyInstruction 2560 if (!SGPRsUsed.count(Reg)) { 2561 SGPRsUsed.insert(Reg); 2562 ++ConstantBusUseCount; 2563 } 2564 SGPRUsed = Reg; 2565 } else { // Expression or a literal 2566 2567 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2568 continue; // special operand like VINTERP attr_chan 2569 2570 // An instruction may use only one literal. 2571 // This has been validated on the previous step. 2572 // See validateVOP3Literal. 2573 // This literal may be used as more than one operand. 2574 // If all these operands are of the same size, 2575 // this literal counts as one scalar value. 2576 // Otherwise it counts as 2 scalar values. 2577 // See "GFX10 Shader Programming", section 3.6.2.3. 2578 2579 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2580 if (Size < 4) Size = 4; 2581 2582 if (NumLiterals == 0) { 2583 NumLiterals = 1; 2584 LiteralSize = Size; 2585 } else if (LiteralSize != Size) { 2586 NumLiterals = 2; 2587 } 2588 } 2589 } 2590 } 2591 } 2592 ConstantBusUseCount += NumLiterals; 2593 2594 if (isGFX10()) 2595 return ConstantBusUseCount <= 2; 2596 2597 return ConstantBusUseCount <= 1; 2598 } 2599 2600 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2601 const unsigned Opcode = Inst.getOpcode(); 2602 const MCInstrDesc &Desc = MII.get(Opcode); 2603 2604 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2605 if (DstIdx == -1 || 2606 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2607 return true; 2608 } 2609 2610 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2611 2612 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2613 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2614 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2615 2616 assert(DstIdx != -1); 2617 const MCOperand &Dst = Inst.getOperand(DstIdx); 2618 assert(Dst.isReg()); 2619 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2620 2621 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2622 2623 for (int SrcIdx : SrcIndices) { 2624 if (SrcIdx == -1) break; 2625 const MCOperand &Src = Inst.getOperand(SrcIdx); 2626 if (Src.isReg()) { 2627 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2628 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2629 return false; 2630 } 2631 } 2632 } 2633 2634 return true; 2635 } 2636 2637 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2638 2639 const unsigned Opc = Inst.getOpcode(); 2640 const MCInstrDesc &Desc = MII.get(Opc); 2641 2642 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2643 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2644 assert(ClampIdx != -1); 2645 return Inst.getOperand(ClampIdx).getImm() == 0; 2646 } 2647 2648 return true; 2649 } 2650 2651 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2652 2653 const unsigned Opc = Inst.getOpcode(); 2654 const MCInstrDesc &Desc = MII.get(Opc); 2655 2656 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2657 return true; 2658 2659 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2660 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2661 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2662 2663 assert(VDataIdx != -1); 2664 assert(DMaskIdx != -1); 2665 assert(TFEIdx != -1); 2666 2667 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2668 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2669 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2670 if (DMask == 0) 2671 DMask = 1; 2672 2673 unsigned DataSize = 2674 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2675 if (hasPackedD16()) { 2676 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2677 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2678 DataSize = (DataSize + 1) / 2; 2679 } 2680 2681 return (VDataSize / 4) == DataSize + TFESize; 2682 } 2683 2684 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2685 const unsigned Opc = Inst.getOpcode(); 2686 const MCInstrDesc &Desc = MII.get(Opc); 2687 2688 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2689 return true; 2690 2691 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2692 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2693 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2694 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2695 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2696 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2697 2698 assert(VAddr0Idx != -1); 2699 assert(SrsrcIdx != -1); 2700 assert(DimIdx != -1); 2701 assert(SrsrcIdx > VAddr0Idx); 2702 2703 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2704 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2705 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2706 unsigned VAddrSize = 2707 IsNSA ? SrsrcIdx - VAddr0Idx 2708 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2709 2710 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2711 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2712 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2713 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2714 if (!IsNSA) { 2715 if (AddrSize > 8) 2716 AddrSize = 16; 2717 else if (AddrSize > 4) 2718 AddrSize = 8; 2719 } 2720 2721 return VAddrSize == AddrSize; 2722 } 2723 2724 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2725 2726 const unsigned Opc = Inst.getOpcode(); 2727 const MCInstrDesc &Desc = MII.get(Opc); 2728 2729 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2730 return true; 2731 if (!Desc.mayLoad() || !Desc.mayStore()) 2732 return true; // Not atomic 2733 2734 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2735 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2736 2737 // This is an incomplete check because image_atomic_cmpswap 2738 // may only use 0x3 and 0xf while other atomic operations 2739 // may use 0x1 and 0x3. However these limitations are 2740 // verified when we check that dmask matches dst size. 2741 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2742 } 2743 2744 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2745 2746 const unsigned Opc = Inst.getOpcode(); 2747 const MCInstrDesc &Desc = MII.get(Opc); 2748 2749 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2750 return true; 2751 2752 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2753 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2754 2755 // GATHER4 instructions use dmask in a different fashion compared to 2756 // other MIMG instructions. The only useful DMASK values are 2757 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2758 // (red,red,red,red) etc.) The ISA document doesn't mention 2759 // this. 2760 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2761 } 2762 2763 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2764 2765 const unsigned Opc = Inst.getOpcode(); 2766 const MCInstrDesc &Desc = MII.get(Opc); 2767 2768 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2769 return true; 2770 2771 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2772 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2773 if (isCI() || isSI()) 2774 return false; 2775 } 2776 2777 return true; 2778 } 2779 2780 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2781 const unsigned Opc = Inst.getOpcode(); 2782 const MCInstrDesc &Desc = MII.get(Opc); 2783 2784 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2785 return true; 2786 2787 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2788 if (DimIdx < 0) 2789 return true; 2790 2791 long Imm = Inst.getOperand(DimIdx).getImm(); 2792 if (Imm < 0 || Imm >= 8) 2793 return false; 2794 2795 return true; 2796 } 2797 2798 static bool IsRevOpcode(const unsigned Opcode) 2799 { 2800 switch (Opcode) { 2801 case AMDGPU::V_SUBREV_F32_e32: 2802 case AMDGPU::V_SUBREV_F32_e64: 2803 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2804 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2805 case AMDGPU::V_SUBREV_F32_e32_vi: 2806 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2807 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2808 case AMDGPU::V_SUBREV_F32_e64_vi: 2809 2810 case AMDGPU::V_SUBREV_I32_e32: 2811 case AMDGPU::V_SUBREV_I32_e64: 2812 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 2813 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 2814 2815 case AMDGPU::V_SUBBREV_U32_e32: 2816 case AMDGPU::V_SUBBREV_U32_e64: 2817 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 2818 case AMDGPU::V_SUBBREV_U32_e32_vi: 2819 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 2820 case AMDGPU::V_SUBBREV_U32_e64_vi: 2821 2822 case AMDGPU::V_SUBREV_U32_e32: 2823 case AMDGPU::V_SUBREV_U32_e64: 2824 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2825 case AMDGPU::V_SUBREV_U32_e32_vi: 2826 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2827 case AMDGPU::V_SUBREV_U32_e64_vi: 2828 2829 case AMDGPU::V_SUBREV_F16_e32: 2830 case AMDGPU::V_SUBREV_F16_e64: 2831 case AMDGPU::V_SUBREV_F16_e32_gfx10: 2832 case AMDGPU::V_SUBREV_F16_e32_vi: 2833 case AMDGPU::V_SUBREV_F16_e64_gfx10: 2834 case AMDGPU::V_SUBREV_F16_e64_vi: 2835 2836 case AMDGPU::V_SUBREV_U16_e32: 2837 case AMDGPU::V_SUBREV_U16_e64: 2838 case AMDGPU::V_SUBREV_U16_e32_vi: 2839 case AMDGPU::V_SUBREV_U16_e64_vi: 2840 2841 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2842 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 2843 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2844 2845 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2846 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2847 2848 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 2849 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 2850 2851 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 2852 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 2853 2854 case AMDGPU::V_LSHRREV_B32_e32: 2855 case AMDGPU::V_LSHRREV_B32_e64: 2856 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 2857 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 2858 case AMDGPU::V_LSHRREV_B32_e32_vi: 2859 case AMDGPU::V_LSHRREV_B32_e64_vi: 2860 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 2861 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 2862 2863 case AMDGPU::V_ASHRREV_I32_e32: 2864 case AMDGPU::V_ASHRREV_I32_e64: 2865 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 2866 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 2867 case AMDGPU::V_ASHRREV_I32_e32_vi: 2868 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 2869 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 2870 case AMDGPU::V_ASHRREV_I32_e64_vi: 2871 2872 case AMDGPU::V_LSHLREV_B32_e32: 2873 case AMDGPU::V_LSHLREV_B32_e64: 2874 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 2875 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 2876 case AMDGPU::V_LSHLREV_B32_e32_vi: 2877 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 2878 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 2879 case AMDGPU::V_LSHLREV_B32_e64_vi: 2880 2881 case AMDGPU::V_LSHLREV_B16_e32: 2882 case AMDGPU::V_LSHLREV_B16_e64: 2883 case AMDGPU::V_LSHLREV_B16_e32_vi: 2884 case AMDGPU::V_LSHLREV_B16_e64_vi: 2885 case AMDGPU::V_LSHLREV_B16_gfx10: 2886 2887 case AMDGPU::V_LSHRREV_B16_e32: 2888 case AMDGPU::V_LSHRREV_B16_e64: 2889 case AMDGPU::V_LSHRREV_B16_e32_vi: 2890 case AMDGPU::V_LSHRREV_B16_e64_vi: 2891 case AMDGPU::V_LSHRREV_B16_gfx10: 2892 2893 case AMDGPU::V_ASHRREV_I16_e32: 2894 case AMDGPU::V_ASHRREV_I16_e64: 2895 case AMDGPU::V_ASHRREV_I16_e32_vi: 2896 case AMDGPU::V_ASHRREV_I16_e64_vi: 2897 case AMDGPU::V_ASHRREV_I16_gfx10: 2898 2899 case AMDGPU::V_LSHLREV_B64: 2900 case AMDGPU::V_LSHLREV_B64_gfx10: 2901 case AMDGPU::V_LSHLREV_B64_vi: 2902 2903 case AMDGPU::V_LSHRREV_B64: 2904 case AMDGPU::V_LSHRREV_B64_gfx10: 2905 case AMDGPU::V_LSHRREV_B64_vi: 2906 2907 case AMDGPU::V_ASHRREV_I64: 2908 case AMDGPU::V_ASHRREV_I64_gfx10: 2909 case AMDGPU::V_ASHRREV_I64_vi: 2910 2911 case AMDGPU::V_PK_LSHLREV_B16: 2912 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 2913 case AMDGPU::V_PK_LSHLREV_B16_vi: 2914 2915 case AMDGPU::V_PK_LSHRREV_B16: 2916 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 2917 case AMDGPU::V_PK_LSHRREV_B16_vi: 2918 case AMDGPU::V_PK_ASHRREV_I16: 2919 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 2920 case AMDGPU::V_PK_ASHRREV_I16_vi: 2921 return true; 2922 default: 2923 return false; 2924 } 2925 } 2926 2927 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2928 2929 using namespace SIInstrFlags; 2930 const unsigned Opcode = Inst.getOpcode(); 2931 const MCInstrDesc &Desc = MII.get(Opcode); 2932 2933 // lds_direct register is defined so that it can be used 2934 // with 9-bit operands only. Ignore encodings which do not accept these. 2935 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2936 return true; 2937 2938 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2939 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2940 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2941 2942 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2943 2944 // lds_direct cannot be specified as either src1 or src2. 2945 for (int SrcIdx : SrcIndices) { 2946 if (SrcIdx == -1) break; 2947 const MCOperand &Src = Inst.getOperand(SrcIdx); 2948 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2949 return false; 2950 } 2951 } 2952 2953 if (Src0Idx == -1) 2954 return true; 2955 2956 const MCOperand &Src = Inst.getOperand(Src0Idx); 2957 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2958 return true; 2959 2960 // lds_direct is specified as src0. Check additional limitations. 2961 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2962 } 2963 2964 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2965 unsigned Opcode = Inst.getOpcode(); 2966 const MCInstrDesc &Desc = MII.get(Opcode); 2967 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2968 return true; 2969 2970 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2971 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2972 2973 const int OpIndices[] = { Src0Idx, Src1Idx }; 2974 2975 unsigned NumLiterals = 0; 2976 uint32_t LiteralValue; 2977 2978 for (int OpIdx : OpIndices) { 2979 if (OpIdx == -1) break; 2980 2981 const MCOperand &MO = Inst.getOperand(OpIdx); 2982 if (MO.isImm() && 2983 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2984 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2985 !isInlineConstant(Inst, OpIdx)) { 2986 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2987 if (NumLiterals == 0 || LiteralValue != Value) { 2988 LiteralValue = Value; 2989 ++NumLiterals; 2990 } 2991 } 2992 } 2993 2994 return NumLiterals <= 1; 2995 } 2996 2997 // VOP3 literal is only allowed in GFX10+ and only one can be used 2998 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 2999 unsigned Opcode = Inst.getOpcode(); 3000 const MCInstrDesc &Desc = MII.get(Opcode); 3001 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3002 return true; 3003 3004 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3005 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3006 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3007 3008 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3009 3010 unsigned NumLiterals = 0; 3011 uint32_t LiteralValue; 3012 3013 for (int OpIdx : OpIndices) { 3014 if (OpIdx == -1) break; 3015 3016 const MCOperand &MO = Inst.getOperand(OpIdx); 3017 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3018 continue; 3019 3020 if (!isInlineConstant(Inst, OpIdx)) { 3021 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3022 if (NumLiterals == 0 || LiteralValue != Value) { 3023 LiteralValue = Value; 3024 ++NumLiterals; 3025 } 3026 } 3027 } 3028 3029 return !NumLiterals || 3030 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3031 } 3032 3033 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3034 const SMLoc &IDLoc) { 3035 if (!validateLdsDirect(Inst)) { 3036 Error(IDLoc, 3037 "invalid use of lds_direct"); 3038 return false; 3039 } 3040 if (!validateSOPLiteral(Inst)) { 3041 Error(IDLoc, 3042 "only one literal operand is allowed"); 3043 return false; 3044 } 3045 if (!validateVOP3Literal(Inst)) { 3046 Error(IDLoc, 3047 "invalid literal operand"); 3048 return false; 3049 } 3050 if (!validateConstantBusLimitations(Inst)) { 3051 Error(IDLoc, 3052 "invalid operand (violates constant bus restrictions)"); 3053 return false; 3054 } 3055 if (!validateEarlyClobberLimitations(Inst)) { 3056 Error(IDLoc, 3057 "destination must be different than all sources"); 3058 return false; 3059 } 3060 if (!validateIntClampSupported(Inst)) { 3061 Error(IDLoc, 3062 "integer clamping is not supported on this GPU"); 3063 return false; 3064 } 3065 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3066 if (!validateMIMGD16(Inst)) { 3067 Error(IDLoc, 3068 "d16 modifier is not supported on this GPU"); 3069 return false; 3070 } 3071 if (!validateMIMGDim(Inst)) { 3072 Error(IDLoc, "dim modifier is required on this GPU"); 3073 return false; 3074 } 3075 if (!validateMIMGDataSize(Inst)) { 3076 Error(IDLoc, 3077 "image data size does not match dmask and tfe"); 3078 return false; 3079 } 3080 if (!validateMIMGAddrSize(Inst)) { 3081 Error(IDLoc, 3082 "image address size does not match dim and a16"); 3083 return false; 3084 } 3085 if (!validateMIMGAtomicDMask(Inst)) { 3086 Error(IDLoc, 3087 "invalid atomic image dmask"); 3088 return false; 3089 } 3090 if (!validateMIMGGatherDMask(Inst)) { 3091 Error(IDLoc, 3092 "invalid image_gather dmask: only one bit must be set"); 3093 return false; 3094 } 3095 3096 return true; 3097 } 3098 3099 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3100 const FeatureBitset &FBS, 3101 unsigned VariantID = 0); 3102 3103 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3104 OperandVector &Operands, 3105 MCStreamer &Out, 3106 uint64_t &ErrorInfo, 3107 bool MatchingInlineAsm) { 3108 MCInst Inst; 3109 unsigned Result = Match_Success; 3110 for (auto Variant : getMatchedVariants()) { 3111 uint64_t EI; 3112 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3113 Variant); 3114 // We order match statuses from least to most specific. We use most specific 3115 // status as resulting 3116 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3117 if ((R == Match_Success) || 3118 (R == Match_PreferE32) || 3119 (R == Match_MissingFeature && Result != Match_PreferE32) || 3120 (R == Match_InvalidOperand && Result != Match_MissingFeature 3121 && Result != Match_PreferE32) || 3122 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3123 && Result != Match_MissingFeature 3124 && Result != Match_PreferE32)) { 3125 Result = R; 3126 ErrorInfo = EI; 3127 } 3128 if (R == Match_Success) 3129 break; 3130 } 3131 3132 switch (Result) { 3133 default: break; 3134 case Match_Success: 3135 if (!validateInstruction(Inst, IDLoc)) { 3136 return true; 3137 } 3138 Inst.setLoc(IDLoc); 3139 Out.EmitInstruction(Inst, getSTI()); 3140 return false; 3141 3142 case Match_MissingFeature: 3143 return Error(IDLoc, "instruction not supported on this GPU"); 3144 3145 case Match_MnemonicFail: { 3146 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3147 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3148 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3149 return Error(IDLoc, "invalid instruction" + Suggestion, 3150 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3151 } 3152 3153 case Match_InvalidOperand: { 3154 SMLoc ErrorLoc = IDLoc; 3155 if (ErrorInfo != ~0ULL) { 3156 if (ErrorInfo >= Operands.size()) { 3157 return Error(IDLoc, "too few operands for instruction"); 3158 } 3159 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3160 if (ErrorLoc == SMLoc()) 3161 ErrorLoc = IDLoc; 3162 } 3163 return Error(ErrorLoc, "invalid operand for instruction"); 3164 } 3165 3166 case Match_PreferE32: 3167 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3168 "should be encoded as e32"); 3169 } 3170 llvm_unreachable("Implement any new match types added!"); 3171 } 3172 3173 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3174 int64_t Tmp = -1; 3175 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3176 return true; 3177 } 3178 if (getParser().parseAbsoluteExpression(Tmp)) { 3179 return true; 3180 } 3181 Ret = static_cast<uint32_t>(Tmp); 3182 return false; 3183 } 3184 3185 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3186 uint32_t &Minor) { 3187 if (ParseAsAbsoluteExpression(Major)) 3188 return TokError("invalid major version"); 3189 3190 if (getLexer().isNot(AsmToken::Comma)) 3191 return TokError("minor version number required, comma expected"); 3192 Lex(); 3193 3194 if (ParseAsAbsoluteExpression(Minor)) 3195 return TokError("invalid minor version"); 3196 3197 return false; 3198 } 3199 3200 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3201 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3202 return TokError("directive only supported for amdgcn architecture"); 3203 3204 std::string Target; 3205 3206 SMLoc TargetStart = getTok().getLoc(); 3207 if (getParser().parseEscapedString(Target)) 3208 return true; 3209 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3210 3211 std::string ExpectedTarget; 3212 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3213 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3214 3215 if (Target != ExpectedTargetOS.str()) 3216 return getParser().Error(TargetRange.Start, "target must match options", 3217 TargetRange); 3218 3219 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3220 return false; 3221 } 3222 3223 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3224 return getParser().Error(Range.Start, "value out of range", Range); 3225 } 3226 3227 bool AMDGPUAsmParser::calculateGPRBlocks( 3228 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3229 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 3230 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 3231 unsigned &SGPRBlocks) { 3232 // TODO(scott.linder): These calculations are duplicated from 3233 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3234 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3235 3236 unsigned NumVGPRs = NextFreeVGPR; 3237 unsigned NumSGPRs = NextFreeSGPR; 3238 3239 if (Version.Major >= 10) 3240 NumSGPRs = 0; 3241 else { 3242 unsigned MaxAddressableNumSGPRs = 3243 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3244 3245 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3246 NumSGPRs > MaxAddressableNumSGPRs) 3247 return OutOfRangeError(SGPRRange); 3248 3249 NumSGPRs += 3250 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3251 3252 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3253 NumSGPRs > MaxAddressableNumSGPRs) 3254 return OutOfRangeError(SGPRRange); 3255 3256 if (Features.test(FeatureSGPRInitBug)) 3257 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3258 } 3259 3260 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 3261 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3262 3263 return false; 3264 } 3265 3266 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3267 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3268 return TokError("directive only supported for amdgcn architecture"); 3269 3270 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3271 return TokError("directive only supported for amdhsa OS"); 3272 3273 StringRef KernelName; 3274 if (getParser().parseIdentifier(KernelName)) 3275 return true; 3276 3277 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3278 3279 StringSet<> Seen; 3280 3281 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3282 3283 SMRange VGPRRange; 3284 uint64_t NextFreeVGPR = 0; 3285 SMRange SGPRRange; 3286 uint64_t NextFreeSGPR = 0; 3287 unsigned UserSGPRCount = 0; 3288 bool ReserveVCC = true; 3289 bool ReserveFlatScr = true; 3290 bool ReserveXNACK = hasXNACK(); 3291 3292 while (true) { 3293 while (getLexer().is(AsmToken::EndOfStatement)) 3294 Lex(); 3295 3296 if (getLexer().isNot(AsmToken::Identifier)) 3297 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3298 3299 StringRef ID = getTok().getIdentifier(); 3300 SMRange IDRange = getTok().getLocRange(); 3301 Lex(); 3302 3303 if (ID == ".end_amdhsa_kernel") 3304 break; 3305 3306 if (Seen.find(ID) != Seen.end()) 3307 return TokError(".amdhsa_ directives cannot be repeated"); 3308 Seen.insert(ID); 3309 3310 SMLoc ValStart = getTok().getLoc(); 3311 int64_t IVal; 3312 if (getParser().parseAbsoluteExpression(IVal)) 3313 return true; 3314 SMLoc ValEnd = getTok().getLoc(); 3315 SMRange ValRange = SMRange(ValStart, ValEnd); 3316 3317 if (IVal < 0) 3318 return OutOfRangeError(ValRange); 3319 3320 uint64_t Val = IVal; 3321 3322 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3323 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3324 return OutOfRangeError(RANGE); \ 3325 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3326 3327 if (ID == ".amdhsa_group_segment_fixed_size") { 3328 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3329 return OutOfRangeError(ValRange); 3330 KD.group_segment_fixed_size = Val; 3331 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3332 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3333 return OutOfRangeError(ValRange); 3334 KD.private_segment_fixed_size = Val; 3335 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3336 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3337 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3338 Val, ValRange); 3339 UserSGPRCount += 4; 3340 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3341 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3342 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3343 ValRange); 3344 UserSGPRCount += 2; 3345 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3346 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3347 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3348 ValRange); 3349 UserSGPRCount += 2; 3350 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3351 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3352 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3353 Val, ValRange); 3354 UserSGPRCount += 2; 3355 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3356 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3357 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3358 ValRange); 3359 UserSGPRCount += 2; 3360 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3361 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3362 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3363 ValRange); 3364 UserSGPRCount += 2; 3365 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3366 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3367 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3368 Val, ValRange); 3369 UserSGPRCount += 1; 3370 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3371 PARSE_BITS_ENTRY( 3372 KD.compute_pgm_rsrc2, 3373 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3374 ValRange); 3375 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3376 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3377 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3378 ValRange); 3379 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3380 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3381 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3382 ValRange); 3383 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3384 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3385 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3386 ValRange); 3387 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3388 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3389 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3390 ValRange); 3391 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3392 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3393 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3394 ValRange); 3395 } else if (ID == ".amdhsa_next_free_vgpr") { 3396 VGPRRange = ValRange; 3397 NextFreeVGPR = Val; 3398 } else if (ID == ".amdhsa_next_free_sgpr") { 3399 SGPRRange = ValRange; 3400 NextFreeSGPR = Val; 3401 } else if (ID == ".amdhsa_reserve_vcc") { 3402 if (!isUInt<1>(Val)) 3403 return OutOfRangeError(ValRange); 3404 ReserveVCC = Val; 3405 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3406 if (IVersion.Major < 7) 3407 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3408 IDRange); 3409 if (!isUInt<1>(Val)) 3410 return OutOfRangeError(ValRange); 3411 ReserveFlatScr = Val; 3412 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3413 if (IVersion.Major < 8) 3414 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3415 IDRange); 3416 if (!isUInt<1>(Val)) 3417 return OutOfRangeError(ValRange); 3418 ReserveXNACK = Val; 3419 } else if (ID == ".amdhsa_float_round_mode_32") { 3420 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3421 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3422 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3423 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3424 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3425 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3426 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3427 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3428 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3429 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3430 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3431 ValRange); 3432 } else if (ID == ".amdhsa_dx10_clamp") { 3433 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3434 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3435 } else if (ID == ".amdhsa_ieee_mode") { 3436 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3437 Val, ValRange); 3438 } else if (ID == ".amdhsa_fp16_overflow") { 3439 if (IVersion.Major < 9) 3440 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3441 IDRange); 3442 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3443 ValRange); 3444 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3445 if (IVersion.Major < 10) 3446 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3447 IDRange); 3448 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3449 ValRange); 3450 } else if (ID == ".amdhsa_memory_ordered") { 3451 if (IVersion.Major < 10) 3452 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3453 IDRange); 3454 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3455 ValRange); 3456 } else if (ID == ".amdhsa_forward_progress") { 3457 if (IVersion.Major < 10) 3458 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3459 IDRange); 3460 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3461 ValRange); 3462 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3463 PARSE_BITS_ENTRY( 3464 KD.compute_pgm_rsrc2, 3465 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3466 ValRange); 3467 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3468 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3469 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3470 Val, ValRange); 3471 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3472 PARSE_BITS_ENTRY( 3473 KD.compute_pgm_rsrc2, 3474 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3475 ValRange); 3476 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3477 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3478 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3479 Val, ValRange); 3480 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3481 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3482 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3483 Val, ValRange); 3484 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3485 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3486 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3487 Val, ValRange); 3488 } else if (ID == ".amdhsa_exception_int_div_zero") { 3489 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3490 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3491 Val, ValRange); 3492 } else { 3493 return getParser().Error(IDRange.Start, 3494 "unknown .amdhsa_kernel directive", IDRange); 3495 } 3496 3497 #undef PARSE_BITS_ENTRY 3498 } 3499 3500 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3501 return TokError(".amdhsa_next_free_vgpr directive is required"); 3502 3503 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3504 return TokError(".amdhsa_next_free_sgpr directive is required"); 3505 3506 unsigned VGPRBlocks; 3507 unsigned SGPRBlocks; 3508 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3509 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3510 SGPRRange, VGPRBlocks, SGPRBlocks)) 3511 return true; 3512 3513 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3514 VGPRBlocks)) 3515 return OutOfRangeError(VGPRRange); 3516 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3517 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3518 3519 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3520 SGPRBlocks)) 3521 return OutOfRangeError(SGPRRange); 3522 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3523 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3524 SGPRBlocks); 3525 3526 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3527 return TokError("too many user SGPRs enabled"); 3528 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3529 UserSGPRCount); 3530 3531 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3532 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3533 ReserveFlatScr, ReserveXNACK); 3534 return false; 3535 } 3536 3537 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3538 uint32_t Major; 3539 uint32_t Minor; 3540 3541 if (ParseDirectiveMajorMinor(Major, Minor)) 3542 return true; 3543 3544 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3545 return false; 3546 } 3547 3548 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3549 uint32_t Major; 3550 uint32_t Minor; 3551 uint32_t Stepping; 3552 StringRef VendorName; 3553 StringRef ArchName; 3554 3555 // If this directive has no arguments, then use the ISA version for the 3556 // targeted GPU. 3557 if (getLexer().is(AsmToken::EndOfStatement)) { 3558 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3559 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3560 ISA.Stepping, 3561 "AMD", "AMDGPU"); 3562 return false; 3563 } 3564 3565 if (ParseDirectiveMajorMinor(Major, Minor)) 3566 return true; 3567 3568 if (getLexer().isNot(AsmToken::Comma)) 3569 return TokError("stepping version number required, comma expected"); 3570 Lex(); 3571 3572 if (ParseAsAbsoluteExpression(Stepping)) 3573 return TokError("invalid stepping version"); 3574 3575 if (getLexer().isNot(AsmToken::Comma)) 3576 return TokError("vendor name required, comma expected"); 3577 Lex(); 3578 3579 if (getLexer().isNot(AsmToken::String)) 3580 return TokError("invalid vendor name"); 3581 3582 VendorName = getLexer().getTok().getStringContents(); 3583 Lex(); 3584 3585 if (getLexer().isNot(AsmToken::Comma)) 3586 return TokError("arch name required, comma expected"); 3587 Lex(); 3588 3589 if (getLexer().isNot(AsmToken::String)) 3590 return TokError("invalid arch name"); 3591 3592 ArchName = getLexer().getTok().getStringContents(); 3593 Lex(); 3594 3595 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3596 VendorName, ArchName); 3597 return false; 3598 } 3599 3600 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3601 amd_kernel_code_t &Header) { 3602 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3603 // assembly for backwards compatibility. 3604 if (ID == "max_scratch_backing_memory_byte_size") { 3605 Parser.eatToEndOfStatement(); 3606 return false; 3607 } 3608 3609 SmallString<40> ErrStr; 3610 raw_svector_ostream Err(ErrStr); 3611 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3612 return TokError(Err.str()); 3613 } 3614 Lex(); 3615 3616 if (ID == "enable_wgp_mode") { 3617 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3618 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3619 } 3620 3621 if (ID == "enable_mem_ordered") { 3622 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3623 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3624 } 3625 3626 if (ID == "enable_fwd_progress") { 3627 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3628 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3629 } 3630 3631 return false; 3632 } 3633 3634 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3635 amd_kernel_code_t Header; 3636 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3637 3638 while (true) { 3639 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3640 // will set the current token to EndOfStatement. 3641 while(getLexer().is(AsmToken::EndOfStatement)) 3642 Lex(); 3643 3644 if (getLexer().isNot(AsmToken::Identifier)) 3645 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3646 3647 StringRef ID = getLexer().getTok().getIdentifier(); 3648 Lex(); 3649 3650 if (ID == ".end_amd_kernel_code_t") 3651 break; 3652 3653 if (ParseAMDKernelCodeTValue(ID, Header)) 3654 return true; 3655 } 3656 3657 getTargetStreamer().EmitAMDKernelCodeT(Header); 3658 3659 return false; 3660 } 3661 3662 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3663 if (getLexer().isNot(AsmToken::Identifier)) 3664 return TokError("expected symbol name"); 3665 3666 StringRef KernelName = Parser.getTok().getString(); 3667 3668 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3669 ELF::STT_AMDGPU_HSA_KERNEL); 3670 Lex(); 3671 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3672 KernelScope.initialize(getContext()); 3673 return false; 3674 } 3675 3676 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3677 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3678 return Error(getParser().getTok().getLoc(), 3679 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3680 "architectures"); 3681 } 3682 3683 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3684 3685 std::string ISAVersionStringFromSTI; 3686 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3687 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3688 3689 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3690 return Error(getParser().getTok().getLoc(), 3691 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3692 "arguments specified through the command line"); 3693 } 3694 3695 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3696 Lex(); 3697 3698 return false; 3699 } 3700 3701 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3702 const char *AssemblerDirectiveBegin; 3703 const char *AssemblerDirectiveEnd; 3704 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3705 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3706 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3707 HSAMD::V3::AssemblerDirectiveEnd) 3708 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3709 HSAMD::AssemblerDirectiveEnd); 3710 3711 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3712 return Error(getParser().getTok().getLoc(), 3713 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3714 "not available on non-amdhsa OSes")).str()); 3715 } 3716 3717 std::string HSAMetadataString; 3718 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3719 HSAMetadataString)) 3720 return true; 3721 3722 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3723 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3724 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3725 } else { 3726 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3727 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3728 } 3729 3730 return false; 3731 } 3732 3733 /// Common code to parse out a block of text (typically YAML) between start and 3734 /// end directives. 3735 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3736 const char *AssemblerDirectiveEnd, 3737 std::string &CollectString) { 3738 3739 raw_string_ostream CollectStream(CollectString); 3740 3741 getLexer().setSkipSpace(false); 3742 3743 bool FoundEnd = false; 3744 while (!getLexer().is(AsmToken::Eof)) { 3745 while (getLexer().is(AsmToken::Space)) { 3746 CollectStream << getLexer().getTok().getString(); 3747 Lex(); 3748 } 3749 3750 if (getLexer().is(AsmToken::Identifier)) { 3751 StringRef ID = getLexer().getTok().getIdentifier(); 3752 if (ID == AssemblerDirectiveEnd) { 3753 Lex(); 3754 FoundEnd = true; 3755 break; 3756 } 3757 } 3758 3759 CollectStream << Parser.parseStringToEndOfStatement() 3760 << getContext().getAsmInfo()->getSeparatorString(); 3761 3762 Parser.eatToEndOfStatement(); 3763 } 3764 3765 getLexer().setSkipSpace(true); 3766 3767 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3768 return TokError(Twine("expected directive ") + 3769 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3770 } 3771 3772 CollectStream.flush(); 3773 return false; 3774 } 3775 3776 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3777 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3778 std::string String; 3779 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3780 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3781 return true; 3782 3783 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3784 if (!PALMetadata->setFromString(String)) 3785 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3786 return false; 3787 } 3788 3789 /// Parse the assembler directive for old linear-format PAL metadata. 3790 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3791 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3792 return Error(getParser().getTok().getLoc(), 3793 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3794 "not available on non-amdpal OSes")).str()); 3795 } 3796 3797 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3798 PALMetadata->setLegacy(); 3799 for (;;) { 3800 uint32_t Key, Value; 3801 if (ParseAsAbsoluteExpression(Key)) { 3802 return TokError(Twine("invalid value in ") + 3803 Twine(PALMD::AssemblerDirective)); 3804 } 3805 if (getLexer().isNot(AsmToken::Comma)) { 3806 return TokError(Twine("expected an even number of values in ") + 3807 Twine(PALMD::AssemblerDirective)); 3808 } 3809 Lex(); 3810 if (ParseAsAbsoluteExpression(Value)) { 3811 return TokError(Twine("invalid value in ") + 3812 Twine(PALMD::AssemblerDirective)); 3813 } 3814 PALMetadata->setRegister(Key, Value); 3815 if (getLexer().isNot(AsmToken::Comma)) 3816 break; 3817 Lex(); 3818 } 3819 return false; 3820 } 3821 3822 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3823 StringRef IDVal = DirectiveID.getString(); 3824 3825 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3826 if (IDVal == ".amdgcn_target") 3827 return ParseDirectiveAMDGCNTarget(); 3828 3829 if (IDVal == ".amdhsa_kernel") 3830 return ParseDirectiveAMDHSAKernel(); 3831 3832 // TODO: Restructure/combine with PAL metadata directive. 3833 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3834 return ParseDirectiveHSAMetadata(); 3835 } else { 3836 if (IDVal == ".hsa_code_object_version") 3837 return ParseDirectiveHSACodeObjectVersion(); 3838 3839 if (IDVal == ".hsa_code_object_isa") 3840 return ParseDirectiveHSACodeObjectISA(); 3841 3842 if (IDVal == ".amd_kernel_code_t") 3843 return ParseDirectiveAMDKernelCodeT(); 3844 3845 if (IDVal == ".amdgpu_hsa_kernel") 3846 return ParseDirectiveAMDGPUHsaKernel(); 3847 3848 if (IDVal == ".amd_amdgpu_isa") 3849 return ParseDirectiveISAVersion(); 3850 3851 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3852 return ParseDirectiveHSAMetadata(); 3853 } 3854 3855 if (IDVal == PALMD::AssemblerDirectiveBegin) 3856 return ParseDirectivePALMetadataBegin(); 3857 3858 if (IDVal == PALMD::AssemblerDirective) 3859 return ParseDirectivePALMetadata(); 3860 3861 return true; 3862 } 3863 3864 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3865 unsigned RegNo) const { 3866 3867 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3868 R.isValid(); ++R) { 3869 if (*R == RegNo) 3870 return isGFX9() || isGFX10(); 3871 } 3872 3873 // GFX10 has 2 more SGPRs 104 and 105. 3874 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 3875 R.isValid(); ++R) { 3876 if (*R == RegNo) 3877 return hasSGPR104_SGPR105(); 3878 } 3879 3880 switch (RegNo) { 3881 case AMDGPU::TBA: 3882 case AMDGPU::TBA_LO: 3883 case AMDGPU::TBA_HI: 3884 case AMDGPU::TMA: 3885 case AMDGPU::TMA_LO: 3886 case AMDGPU::TMA_HI: 3887 return !isGFX9() && !isGFX10(); 3888 case AMDGPU::XNACK_MASK: 3889 case AMDGPU::XNACK_MASK_LO: 3890 case AMDGPU::XNACK_MASK_HI: 3891 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 3892 case AMDGPU::SGPR_NULL: 3893 return isGFX10(); 3894 default: 3895 break; 3896 } 3897 3898 if (isInlineValue(RegNo)) 3899 return !isCI() && !isSI() && !isVI(); 3900 3901 if (isCI()) 3902 return true; 3903 3904 if (isSI() || isGFX10()) { 3905 // No flat_scr on SI. 3906 // On GFX10 flat scratch is not a valid register operand and can only be 3907 // accessed with s_setreg/s_getreg. 3908 switch (RegNo) { 3909 case AMDGPU::FLAT_SCR: 3910 case AMDGPU::FLAT_SCR_LO: 3911 case AMDGPU::FLAT_SCR_HI: 3912 return false; 3913 default: 3914 return true; 3915 } 3916 } 3917 3918 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3919 // SI/CI have. 3920 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3921 R.isValid(); ++R) { 3922 if (*R == RegNo) 3923 return hasSGPR102_SGPR103(); 3924 } 3925 3926 return true; 3927 } 3928 3929 OperandMatchResultTy 3930 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 3931 OperandMode Mode) { 3932 // Try to parse with a custom parser 3933 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3934 3935 // If we successfully parsed the operand or if there as an error parsing, 3936 // we are done. 3937 // 3938 // If we are parsing after we reach EndOfStatement then this means we 3939 // are appending default values to the Operands list. This is only done 3940 // by custom parser, so we shouldn't continue on to the generic parsing. 3941 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3942 getLexer().is(AsmToken::EndOfStatement)) 3943 return ResTy; 3944 3945 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 3946 unsigned Prefix = Operands.size(); 3947 SMLoc LBraceLoc = getTok().getLoc(); 3948 Parser.Lex(); // eat the '[' 3949 3950 for (;;) { 3951 ResTy = parseReg(Operands); 3952 if (ResTy != MatchOperand_Success) 3953 return ResTy; 3954 3955 if (getLexer().is(AsmToken::RBrac)) 3956 break; 3957 3958 if (getLexer().isNot(AsmToken::Comma)) 3959 return MatchOperand_ParseFail; 3960 Parser.Lex(); 3961 } 3962 3963 if (Operands.size() - Prefix > 1) { 3964 Operands.insert(Operands.begin() + Prefix, 3965 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 3966 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 3967 getTok().getLoc())); 3968 } 3969 3970 Parser.Lex(); // eat the ']' 3971 return MatchOperand_Success; 3972 } 3973 3974 return parseRegOrImm(Operands); 3975 } 3976 3977 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3978 // Clear any forced encodings from the previous instruction. 3979 setForcedEncodingSize(0); 3980 setForcedDPP(false); 3981 setForcedSDWA(false); 3982 3983 if (Name.endswith("_e64")) { 3984 setForcedEncodingSize(64); 3985 return Name.substr(0, Name.size() - 4); 3986 } else if (Name.endswith("_e32")) { 3987 setForcedEncodingSize(32); 3988 return Name.substr(0, Name.size() - 4); 3989 } else if (Name.endswith("_dpp")) { 3990 setForcedDPP(true); 3991 return Name.substr(0, Name.size() - 4); 3992 } else if (Name.endswith("_sdwa")) { 3993 setForcedSDWA(true); 3994 return Name.substr(0, Name.size() - 5); 3995 } 3996 return Name; 3997 } 3998 3999 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4000 StringRef Name, 4001 SMLoc NameLoc, OperandVector &Operands) { 4002 // Add the instruction mnemonic 4003 Name = parseMnemonicSuffix(Name); 4004 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4005 4006 bool IsMIMG = Name.startswith("image_"); 4007 4008 while (!getLexer().is(AsmToken::EndOfStatement)) { 4009 OperandMode Mode = OperandMode_Default; 4010 if (IsMIMG && isGFX10() && Operands.size() == 2) 4011 Mode = OperandMode_NSA; 4012 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4013 4014 // Eat the comma or space if there is one. 4015 if (getLexer().is(AsmToken::Comma)) 4016 Parser.Lex(); 4017 4018 switch (Res) { 4019 case MatchOperand_Success: break; 4020 case MatchOperand_ParseFail: 4021 // FIXME: use real operand location rather than the current location. 4022 Error(getLexer().getLoc(), "failed parsing operand."); 4023 while (!getLexer().is(AsmToken::EndOfStatement)) { 4024 Parser.Lex(); 4025 } 4026 return true; 4027 case MatchOperand_NoMatch: 4028 // FIXME: use real operand location rather than the current location. 4029 Error(getLexer().getLoc(), "not a valid operand."); 4030 while (!getLexer().is(AsmToken::EndOfStatement)) { 4031 Parser.Lex(); 4032 } 4033 return true; 4034 } 4035 } 4036 4037 return false; 4038 } 4039 4040 //===----------------------------------------------------------------------===// 4041 // Utility functions 4042 //===----------------------------------------------------------------------===// 4043 4044 OperandMatchResultTy 4045 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4046 4047 if (!trySkipId(Prefix, AsmToken::Colon)) 4048 return MatchOperand_NoMatch; 4049 4050 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4051 } 4052 4053 OperandMatchResultTy 4054 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4055 AMDGPUOperand::ImmTy ImmTy, 4056 bool (*ConvertResult)(int64_t&)) { 4057 SMLoc S = getLoc(); 4058 int64_t Value = 0; 4059 4060 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4061 if (Res != MatchOperand_Success) 4062 return Res; 4063 4064 if (ConvertResult && !ConvertResult(Value)) { 4065 Error(S, "invalid " + StringRef(Prefix) + " value."); 4066 } 4067 4068 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4069 return MatchOperand_Success; 4070 } 4071 4072 OperandMatchResultTy 4073 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4074 OperandVector &Operands, 4075 AMDGPUOperand::ImmTy ImmTy, 4076 bool (*ConvertResult)(int64_t&)) { 4077 SMLoc S = getLoc(); 4078 if (!trySkipId(Prefix, AsmToken::Colon)) 4079 return MatchOperand_NoMatch; 4080 4081 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4082 return MatchOperand_ParseFail; 4083 4084 unsigned Val = 0; 4085 const unsigned MaxSize = 4; 4086 4087 // FIXME: How to verify the number of elements matches the number of src 4088 // operands? 4089 for (int I = 0; ; ++I) { 4090 int64_t Op; 4091 SMLoc Loc = getLoc(); 4092 if (!parseExpr(Op)) 4093 return MatchOperand_ParseFail; 4094 4095 if (Op != 0 && Op != 1) { 4096 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4097 return MatchOperand_ParseFail; 4098 } 4099 4100 Val |= (Op << I); 4101 4102 if (trySkipToken(AsmToken::RBrac)) 4103 break; 4104 4105 if (I + 1 == MaxSize) { 4106 Error(getLoc(), "expected a closing square bracket"); 4107 return MatchOperand_ParseFail; 4108 } 4109 4110 if (!skipToken(AsmToken::Comma, "expected a comma")) 4111 return MatchOperand_ParseFail; 4112 } 4113 4114 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4115 return MatchOperand_Success; 4116 } 4117 4118 OperandMatchResultTy 4119 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4120 AMDGPUOperand::ImmTy ImmTy) { 4121 int64_t Bit = 0; 4122 SMLoc S = Parser.getTok().getLoc(); 4123 4124 // We are at the end of the statement, and this is a default argument, so 4125 // use a default value. 4126 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4127 switch(getLexer().getKind()) { 4128 case AsmToken::Identifier: { 4129 StringRef Tok = Parser.getTok().getString(); 4130 if (Tok == Name) { 4131 if (Tok == "r128" && isGFX9()) 4132 Error(S, "r128 modifier is not supported on this GPU"); 4133 if (Tok == "a16" && !isGFX9()) 4134 Error(S, "a16 modifier is not supported on this GPU"); 4135 Bit = 1; 4136 Parser.Lex(); 4137 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4138 Bit = 0; 4139 Parser.Lex(); 4140 } else { 4141 return MatchOperand_NoMatch; 4142 } 4143 break; 4144 } 4145 default: 4146 return MatchOperand_NoMatch; 4147 } 4148 } 4149 4150 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4151 return MatchOperand_ParseFail; 4152 4153 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4154 return MatchOperand_Success; 4155 } 4156 4157 static void addOptionalImmOperand( 4158 MCInst& Inst, const OperandVector& Operands, 4159 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4160 AMDGPUOperand::ImmTy ImmT, 4161 int64_t Default = 0) { 4162 auto i = OptionalIdx.find(ImmT); 4163 if (i != OptionalIdx.end()) { 4164 unsigned Idx = i->second; 4165 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4166 } else { 4167 Inst.addOperand(MCOperand::createImm(Default)); 4168 } 4169 } 4170 4171 OperandMatchResultTy 4172 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4173 if (getLexer().isNot(AsmToken::Identifier)) { 4174 return MatchOperand_NoMatch; 4175 } 4176 StringRef Tok = Parser.getTok().getString(); 4177 if (Tok != Prefix) { 4178 return MatchOperand_NoMatch; 4179 } 4180 4181 Parser.Lex(); 4182 if (getLexer().isNot(AsmToken::Colon)) { 4183 return MatchOperand_ParseFail; 4184 } 4185 4186 Parser.Lex(); 4187 if (getLexer().isNot(AsmToken::Identifier)) { 4188 return MatchOperand_ParseFail; 4189 } 4190 4191 Value = Parser.getTok().getString(); 4192 return MatchOperand_Success; 4193 } 4194 4195 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4196 // values to live in a joint format operand in the MCInst encoding. 4197 OperandMatchResultTy 4198 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4199 SMLoc S = Parser.getTok().getLoc(); 4200 int64_t Dfmt = 0, Nfmt = 0; 4201 // dfmt and nfmt can appear in either order, and each is optional. 4202 bool GotDfmt = false, GotNfmt = false; 4203 while (!GotDfmt || !GotNfmt) { 4204 if (!GotDfmt) { 4205 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4206 if (Res != MatchOperand_NoMatch) { 4207 if (Res != MatchOperand_Success) 4208 return Res; 4209 if (Dfmt >= 16) { 4210 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4211 return MatchOperand_ParseFail; 4212 } 4213 GotDfmt = true; 4214 Parser.Lex(); 4215 continue; 4216 } 4217 } 4218 if (!GotNfmt) { 4219 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4220 if (Res != MatchOperand_NoMatch) { 4221 if (Res != MatchOperand_Success) 4222 return Res; 4223 if (Nfmt >= 8) { 4224 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4225 return MatchOperand_ParseFail; 4226 } 4227 GotNfmt = true; 4228 Parser.Lex(); 4229 continue; 4230 } 4231 } 4232 break; 4233 } 4234 if (!GotDfmt && !GotNfmt) 4235 return MatchOperand_NoMatch; 4236 auto Format = Dfmt | Nfmt << 4; 4237 Operands.push_back( 4238 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4239 return MatchOperand_Success; 4240 } 4241 4242 //===----------------------------------------------------------------------===// 4243 // ds 4244 //===----------------------------------------------------------------------===// 4245 4246 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4247 const OperandVector &Operands) { 4248 OptionalImmIndexMap OptionalIdx; 4249 4250 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4251 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4252 4253 // Add the register arguments 4254 if (Op.isReg()) { 4255 Op.addRegOperands(Inst, 1); 4256 continue; 4257 } 4258 4259 // Handle optional arguments 4260 OptionalIdx[Op.getImmTy()] = i; 4261 } 4262 4263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4266 4267 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4268 } 4269 4270 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4271 bool IsGdsHardcoded) { 4272 OptionalImmIndexMap OptionalIdx; 4273 4274 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4276 4277 // Add the register arguments 4278 if (Op.isReg()) { 4279 Op.addRegOperands(Inst, 1); 4280 continue; 4281 } 4282 4283 if (Op.isToken() && Op.getToken() == "gds") { 4284 IsGdsHardcoded = true; 4285 continue; 4286 } 4287 4288 // Handle optional arguments 4289 OptionalIdx[Op.getImmTy()] = i; 4290 } 4291 4292 AMDGPUOperand::ImmTy OffsetType = 4293 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4294 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4295 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4296 AMDGPUOperand::ImmTyOffset; 4297 4298 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4299 4300 if (!IsGdsHardcoded) { 4301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4302 } 4303 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4304 } 4305 4306 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4307 OptionalImmIndexMap OptionalIdx; 4308 4309 unsigned OperandIdx[4]; 4310 unsigned EnMask = 0; 4311 int SrcIdx = 0; 4312 4313 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4314 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4315 4316 // Add the register arguments 4317 if (Op.isReg()) { 4318 assert(SrcIdx < 4); 4319 OperandIdx[SrcIdx] = Inst.size(); 4320 Op.addRegOperands(Inst, 1); 4321 ++SrcIdx; 4322 continue; 4323 } 4324 4325 if (Op.isOff()) { 4326 assert(SrcIdx < 4); 4327 OperandIdx[SrcIdx] = Inst.size(); 4328 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4329 ++SrcIdx; 4330 continue; 4331 } 4332 4333 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4334 Op.addImmOperands(Inst, 1); 4335 continue; 4336 } 4337 4338 if (Op.isToken() && Op.getToken() == "done") 4339 continue; 4340 4341 // Handle optional arguments 4342 OptionalIdx[Op.getImmTy()] = i; 4343 } 4344 4345 assert(SrcIdx == 4); 4346 4347 bool Compr = false; 4348 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4349 Compr = true; 4350 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4351 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4352 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4353 } 4354 4355 for (auto i = 0; i < SrcIdx; ++i) { 4356 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4357 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4358 } 4359 } 4360 4361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4363 4364 Inst.addOperand(MCOperand::createImm(EnMask)); 4365 } 4366 4367 //===----------------------------------------------------------------------===// 4368 // s_waitcnt 4369 //===----------------------------------------------------------------------===// 4370 4371 static bool 4372 encodeCnt( 4373 const AMDGPU::IsaVersion ISA, 4374 int64_t &IntVal, 4375 int64_t CntVal, 4376 bool Saturate, 4377 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4378 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4379 { 4380 bool Failed = false; 4381 4382 IntVal = encode(ISA, IntVal, CntVal); 4383 if (CntVal != decode(ISA, IntVal)) { 4384 if (Saturate) { 4385 IntVal = encode(ISA, IntVal, -1); 4386 } else { 4387 Failed = true; 4388 } 4389 } 4390 return Failed; 4391 } 4392 4393 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4394 StringRef CntName = Parser.getTok().getString(); 4395 int64_t CntVal; 4396 4397 Parser.Lex(); 4398 if (getLexer().isNot(AsmToken::LParen)) 4399 return true; 4400 4401 Parser.Lex(); 4402 if (getLexer().isNot(AsmToken::Integer)) 4403 return true; 4404 4405 SMLoc ValLoc = Parser.getTok().getLoc(); 4406 if (getParser().parseAbsoluteExpression(CntVal)) 4407 return true; 4408 4409 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4410 4411 bool Failed = true; 4412 bool Sat = CntName.endswith("_sat"); 4413 4414 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4415 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4416 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4417 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4418 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4419 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4420 } 4421 4422 if (Failed) { 4423 Error(ValLoc, "too large value for " + CntName); 4424 return true; 4425 } 4426 4427 if (getLexer().isNot(AsmToken::RParen)) { 4428 return true; 4429 } 4430 4431 Parser.Lex(); 4432 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4433 const AsmToken NextToken = getLexer().peekTok(); 4434 if (NextToken.is(AsmToken::Identifier)) { 4435 Parser.Lex(); 4436 } 4437 } 4438 4439 return false; 4440 } 4441 4442 OperandMatchResultTy 4443 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4444 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4445 int64_t Waitcnt = getWaitcntBitMask(ISA); 4446 SMLoc S = Parser.getTok().getLoc(); 4447 4448 switch(getLexer().getKind()) { 4449 default: return MatchOperand_ParseFail; 4450 case AsmToken::Integer: 4451 // The operand can be an integer value. 4452 if (getParser().parseAbsoluteExpression(Waitcnt)) 4453 return MatchOperand_ParseFail; 4454 break; 4455 4456 case AsmToken::Identifier: 4457 do { 4458 if (parseCnt(Waitcnt)) 4459 return MatchOperand_ParseFail; 4460 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4461 break; 4462 } 4463 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4464 return MatchOperand_Success; 4465 } 4466 4467 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4468 int64_t &Width) { 4469 using namespace llvm::AMDGPU::Hwreg; 4470 4471 if (Parser.getTok().getString() != "hwreg") 4472 return true; 4473 Parser.Lex(); 4474 4475 if (getLexer().isNot(AsmToken::LParen)) 4476 return true; 4477 Parser.Lex(); 4478 4479 if (getLexer().is(AsmToken::Identifier)) { 4480 HwReg.IsSymbolic = true; 4481 HwReg.Id = ID_UNKNOWN_; 4482 const StringRef tok = Parser.getTok().getString(); 4483 int Last = ID_SYMBOLIC_LAST_; 4484 if (isSI() || isCI() || isVI()) 4485 Last = ID_SYMBOLIC_FIRST_GFX9_; 4486 else if (isGFX9()) 4487 Last = ID_SYMBOLIC_FIRST_GFX10_; 4488 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4489 if (tok == IdSymbolic[i]) { 4490 HwReg.Id = i; 4491 break; 4492 } 4493 } 4494 Parser.Lex(); 4495 } else { 4496 HwReg.IsSymbolic = false; 4497 if (getLexer().isNot(AsmToken::Integer)) 4498 return true; 4499 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4500 return true; 4501 } 4502 4503 if (getLexer().is(AsmToken::RParen)) { 4504 Parser.Lex(); 4505 return false; 4506 } 4507 4508 // optional params 4509 if (getLexer().isNot(AsmToken::Comma)) 4510 return true; 4511 Parser.Lex(); 4512 4513 if (getLexer().isNot(AsmToken::Integer)) 4514 return true; 4515 if (getParser().parseAbsoluteExpression(Offset)) 4516 return true; 4517 4518 if (getLexer().isNot(AsmToken::Comma)) 4519 return true; 4520 Parser.Lex(); 4521 4522 if (getLexer().isNot(AsmToken::Integer)) 4523 return true; 4524 if (getParser().parseAbsoluteExpression(Width)) 4525 return true; 4526 4527 if (getLexer().isNot(AsmToken::RParen)) 4528 return true; 4529 Parser.Lex(); 4530 4531 return false; 4532 } 4533 4534 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4535 using namespace llvm::AMDGPU::Hwreg; 4536 4537 int64_t Imm16Val = 0; 4538 SMLoc S = Parser.getTok().getLoc(); 4539 4540 switch(getLexer().getKind()) { 4541 default: return MatchOperand_NoMatch; 4542 case AsmToken::Integer: 4543 // The operand can be an integer value. 4544 if (getParser().parseAbsoluteExpression(Imm16Val)) 4545 return MatchOperand_NoMatch; 4546 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4547 Error(S, "invalid immediate: only 16-bit values are legal"); 4548 // Do not return error code, but create an imm operand anyway and proceed 4549 // to the next operand, if any. That avoids unneccessary error messages. 4550 } 4551 break; 4552 4553 case AsmToken::Identifier: { 4554 OperandInfoTy HwReg(ID_UNKNOWN_); 4555 int64_t Offset = OFFSET_DEFAULT_; 4556 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4557 if (parseHwregConstruct(HwReg, Offset, Width)) 4558 return MatchOperand_ParseFail; 4559 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4560 if (HwReg.IsSymbolic) 4561 Error(S, "invalid symbolic name of hardware register"); 4562 else 4563 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4564 } 4565 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4566 Error(S, "invalid bit offset: only 5-bit values are legal"); 4567 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4568 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4569 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4570 } 4571 break; 4572 } 4573 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4574 return MatchOperand_Success; 4575 } 4576 4577 bool AMDGPUOperand::isSWaitCnt() const { 4578 return isImm(); 4579 } 4580 4581 bool AMDGPUOperand::isHwreg() const { 4582 return isImmTy(ImmTyHwreg); 4583 } 4584 4585 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4586 using namespace llvm::AMDGPU::SendMsg; 4587 4588 if (Parser.getTok().getString() != "sendmsg") 4589 return true; 4590 Parser.Lex(); 4591 4592 if (getLexer().isNot(AsmToken::LParen)) 4593 return true; 4594 Parser.Lex(); 4595 4596 if (getLexer().is(AsmToken::Identifier)) { 4597 Msg.IsSymbolic = true; 4598 Msg.Id = ID_UNKNOWN_; 4599 const std::string tok = Parser.getTok().getString(); 4600 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4601 switch(i) { 4602 default: continue; // Omit gaps. 4603 case ID_GS_ALLOC_REQ: 4604 if (isSI() || isCI() || isVI()) 4605 continue; 4606 break; 4607 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: 4608 case ID_SYSMSG: break; 4609 } 4610 if (tok == IdSymbolic[i]) { 4611 Msg.Id = i; 4612 break; 4613 } 4614 } 4615 Parser.Lex(); 4616 } else { 4617 Msg.IsSymbolic = false; 4618 if (getLexer().isNot(AsmToken::Integer)) 4619 return true; 4620 if (getParser().parseAbsoluteExpression(Msg.Id)) 4621 return true; 4622 if (getLexer().is(AsmToken::Integer)) 4623 if (getParser().parseAbsoluteExpression(Msg.Id)) 4624 Msg.Id = ID_UNKNOWN_; 4625 } 4626 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4627 return false; 4628 4629 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4630 if (getLexer().isNot(AsmToken::RParen)) 4631 return true; 4632 Parser.Lex(); 4633 return false; 4634 } 4635 4636 if (getLexer().isNot(AsmToken::Comma)) 4637 return true; 4638 Parser.Lex(); 4639 4640 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4641 Operation.Id = ID_UNKNOWN_; 4642 if (getLexer().is(AsmToken::Identifier)) { 4643 Operation.IsSymbolic = true; 4644 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4645 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4646 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4647 const StringRef Tok = Parser.getTok().getString(); 4648 for (int i = F; i < L; ++i) { 4649 if (Tok == S[i]) { 4650 Operation.Id = i; 4651 break; 4652 } 4653 } 4654 Parser.Lex(); 4655 } else { 4656 Operation.IsSymbolic = false; 4657 if (getLexer().isNot(AsmToken::Integer)) 4658 return true; 4659 if (getParser().parseAbsoluteExpression(Operation.Id)) 4660 return true; 4661 } 4662 4663 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4664 // Stream id is optional. 4665 if (getLexer().is(AsmToken::RParen)) { 4666 Parser.Lex(); 4667 return false; 4668 } 4669 4670 if (getLexer().isNot(AsmToken::Comma)) 4671 return true; 4672 Parser.Lex(); 4673 4674 if (getLexer().isNot(AsmToken::Integer)) 4675 return true; 4676 if (getParser().parseAbsoluteExpression(StreamId)) 4677 return true; 4678 } 4679 4680 if (getLexer().isNot(AsmToken::RParen)) 4681 return true; 4682 Parser.Lex(); 4683 return false; 4684 } 4685 4686 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4687 if (getLexer().getKind() != AsmToken::Identifier) 4688 return MatchOperand_NoMatch; 4689 4690 StringRef Str = Parser.getTok().getString(); 4691 int Slot = StringSwitch<int>(Str) 4692 .Case("p10", 0) 4693 .Case("p20", 1) 4694 .Case("p0", 2) 4695 .Default(-1); 4696 4697 SMLoc S = Parser.getTok().getLoc(); 4698 if (Slot == -1) 4699 return MatchOperand_ParseFail; 4700 4701 Parser.Lex(); 4702 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4703 AMDGPUOperand::ImmTyInterpSlot)); 4704 return MatchOperand_Success; 4705 } 4706 4707 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4708 if (getLexer().getKind() != AsmToken::Identifier) 4709 return MatchOperand_NoMatch; 4710 4711 StringRef Str = Parser.getTok().getString(); 4712 if (!Str.startswith("attr")) 4713 return MatchOperand_NoMatch; 4714 4715 StringRef Chan = Str.take_back(2); 4716 int AttrChan = StringSwitch<int>(Chan) 4717 .Case(".x", 0) 4718 .Case(".y", 1) 4719 .Case(".z", 2) 4720 .Case(".w", 3) 4721 .Default(-1); 4722 if (AttrChan == -1) 4723 return MatchOperand_ParseFail; 4724 4725 Str = Str.drop_back(2).drop_front(4); 4726 4727 uint8_t Attr; 4728 if (Str.getAsInteger(10, Attr)) 4729 return MatchOperand_ParseFail; 4730 4731 SMLoc S = Parser.getTok().getLoc(); 4732 Parser.Lex(); 4733 if (Attr > 63) { 4734 Error(S, "out of bounds attr"); 4735 return MatchOperand_Success; 4736 } 4737 4738 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4739 4740 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4741 AMDGPUOperand::ImmTyInterpAttr)); 4742 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4743 AMDGPUOperand::ImmTyAttrChan)); 4744 return MatchOperand_Success; 4745 } 4746 4747 void AMDGPUAsmParser::errorExpTgt() { 4748 Error(Parser.getTok().getLoc(), "invalid exp target"); 4749 } 4750 4751 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4752 uint8_t &Val) { 4753 if (Str == "null") { 4754 Val = 9; 4755 return MatchOperand_Success; 4756 } 4757 4758 if (Str.startswith("mrt")) { 4759 Str = Str.drop_front(3); 4760 if (Str == "z") { // == mrtz 4761 Val = 8; 4762 return MatchOperand_Success; 4763 } 4764 4765 if (Str.getAsInteger(10, Val)) 4766 return MatchOperand_ParseFail; 4767 4768 if (Val > 7) 4769 errorExpTgt(); 4770 4771 return MatchOperand_Success; 4772 } 4773 4774 if (Str.startswith("pos")) { 4775 Str = Str.drop_front(3); 4776 if (Str.getAsInteger(10, Val)) 4777 return MatchOperand_ParseFail; 4778 4779 if (Val > 4 || (Val == 4 && !isGFX10())) 4780 errorExpTgt(); 4781 4782 Val += 12; 4783 return MatchOperand_Success; 4784 } 4785 4786 if (isGFX10() && Str == "prim") { 4787 Val = 20; 4788 return MatchOperand_Success; 4789 } 4790 4791 if (Str.startswith("param")) { 4792 Str = Str.drop_front(5); 4793 if (Str.getAsInteger(10, Val)) 4794 return MatchOperand_ParseFail; 4795 4796 if (Val >= 32) 4797 errorExpTgt(); 4798 4799 Val += 32; 4800 return MatchOperand_Success; 4801 } 4802 4803 if (Str.startswith("invalid_target_")) { 4804 Str = Str.drop_front(15); 4805 if (Str.getAsInteger(10, Val)) 4806 return MatchOperand_ParseFail; 4807 4808 errorExpTgt(); 4809 return MatchOperand_Success; 4810 } 4811 4812 return MatchOperand_NoMatch; 4813 } 4814 4815 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4816 uint8_t Val; 4817 StringRef Str = Parser.getTok().getString(); 4818 4819 auto Res = parseExpTgtImpl(Str, Val); 4820 if (Res != MatchOperand_Success) 4821 return Res; 4822 4823 SMLoc S = Parser.getTok().getLoc(); 4824 Parser.Lex(); 4825 4826 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4827 AMDGPUOperand::ImmTyExpTgt)); 4828 return MatchOperand_Success; 4829 } 4830 4831 OperandMatchResultTy 4832 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4833 using namespace llvm::AMDGPU::SendMsg; 4834 4835 int64_t Imm16Val = 0; 4836 SMLoc S = Parser.getTok().getLoc(); 4837 4838 switch(getLexer().getKind()) { 4839 default: 4840 return MatchOperand_NoMatch; 4841 case AsmToken::Integer: 4842 // The operand can be an integer value. 4843 if (getParser().parseAbsoluteExpression(Imm16Val)) 4844 return MatchOperand_NoMatch; 4845 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4846 Error(S, "invalid immediate: only 16-bit values are legal"); 4847 // Do not return error code, but create an imm operand anyway and proceed 4848 // to the next operand, if any. That avoids unneccessary error messages. 4849 } 4850 break; 4851 case AsmToken::Identifier: { 4852 OperandInfoTy Msg(ID_UNKNOWN_); 4853 OperandInfoTy Operation(OP_UNKNOWN_); 4854 int64_t StreamId = STREAM_ID_DEFAULT_; 4855 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4856 return MatchOperand_ParseFail; 4857 do { 4858 // Validate and encode message ID. 4859 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4860 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) 4861 || Msg.Id == ID_SYSMSG)) { 4862 if (Msg.IsSymbolic) 4863 Error(S, "invalid/unsupported symbolic name of message"); 4864 else 4865 Error(S, "invalid/unsupported code of message"); 4866 break; 4867 } 4868 Imm16Val = (Msg.Id << ID_SHIFT_); 4869 // Validate and encode operation ID. 4870 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4871 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4872 if (Operation.IsSymbolic) 4873 Error(S, "invalid symbolic name of GS_OP"); 4874 else 4875 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4876 break; 4877 } 4878 if (Operation.Id == OP_GS_NOP 4879 && Msg.Id != ID_GS_DONE) { 4880 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4881 break; 4882 } 4883 Imm16Val |= (Operation.Id << OP_SHIFT_); 4884 } 4885 if (Msg.Id == ID_SYSMSG) { 4886 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4887 if (Operation.IsSymbolic) 4888 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4889 else 4890 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4891 break; 4892 } 4893 Imm16Val |= (Operation.Id << OP_SHIFT_); 4894 } 4895 // Validate and encode stream ID. 4896 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4897 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4898 Error(S, "invalid stream id: only 2-bit values are legal"); 4899 break; 4900 } 4901 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4902 } 4903 } while (false); 4904 } 4905 break; 4906 } 4907 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4908 return MatchOperand_Success; 4909 } 4910 4911 bool AMDGPUOperand::isSendMsg() const { 4912 return isImmTy(ImmTySendMsg); 4913 } 4914 4915 //===----------------------------------------------------------------------===// 4916 // parser helpers 4917 //===----------------------------------------------------------------------===// 4918 4919 bool 4920 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4921 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4922 } 4923 4924 bool 4925 AMDGPUAsmParser::isId(const StringRef Id) const { 4926 return isId(getToken(), Id); 4927 } 4928 4929 bool 4930 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4931 return getTokenKind() == Kind; 4932 } 4933 4934 bool 4935 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4936 if (isId(Id)) { 4937 lex(); 4938 return true; 4939 } 4940 return false; 4941 } 4942 4943 bool 4944 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 4945 if (isId(Id) && peekToken().is(Kind)) { 4946 lex(); 4947 lex(); 4948 return true; 4949 } 4950 return false; 4951 } 4952 4953 bool 4954 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4955 if (isToken(Kind)) { 4956 lex(); 4957 return true; 4958 } 4959 return false; 4960 } 4961 4962 bool 4963 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4964 const StringRef ErrMsg) { 4965 if (!trySkipToken(Kind)) { 4966 Error(getLoc(), ErrMsg); 4967 return false; 4968 } 4969 return true; 4970 } 4971 4972 bool 4973 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4974 return !getParser().parseAbsoluteExpression(Imm); 4975 } 4976 4977 bool 4978 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4979 if (isToken(AsmToken::String)) { 4980 Val = getToken().getStringContents(); 4981 lex(); 4982 return true; 4983 } else { 4984 Error(getLoc(), ErrMsg); 4985 return false; 4986 } 4987 } 4988 4989 AsmToken 4990 AMDGPUAsmParser::getToken() const { 4991 return Parser.getTok(); 4992 } 4993 4994 AsmToken 4995 AMDGPUAsmParser::peekToken() { 4996 return getLexer().peekTok(); 4997 } 4998 4999 void 5000 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5001 auto TokCount = getLexer().peekTokens(Tokens); 5002 5003 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5004 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5005 } 5006 5007 AsmToken::TokenKind 5008 AMDGPUAsmParser::getTokenKind() const { 5009 return getLexer().getKind(); 5010 } 5011 5012 SMLoc 5013 AMDGPUAsmParser::getLoc() const { 5014 return getToken().getLoc(); 5015 } 5016 5017 StringRef 5018 AMDGPUAsmParser::getTokenStr() const { 5019 return getToken().getString(); 5020 } 5021 5022 void 5023 AMDGPUAsmParser::lex() { 5024 Parser.Lex(); 5025 } 5026 5027 //===----------------------------------------------------------------------===// 5028 // swizzle 5029 //===----------------------------------------------------------------------===// 5030 5031 LLVM_READNONE 5032 static unsigned 5033 encodeBitmaskPerm(const unsigned AndMask, 5034 const unsigned OrMask, 5035 const unsigned XorMask) { 5036 using namespace llvm::AMDGPU::Swizzle; 5037 5038 return BITMASK_PERM_ENC | 5039 (AndMask << BITMASK_AND_SHIFT) | 5040 (OrMask << BITMASK_OR_SHIFT) | 5041 (XorMask << BITMASK_XOR_SHIFT); 5042 } 5043 5044 bool 5045 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5046 const unsigned MinVal, 5047 const unsigned MaxVal, 5048 const StringRef ErrMsg) { 5049 for (unsigned i = 0; i < OpNum; ++i) { 5050 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5051 return false; 5052 } 5053 SMLoc ExprLoc = Parser.getTok().getLoc(); 5054 if (!parseExpr(Op[i])) { 5055 return false; 5056 } 5057 if (Op[i] < MinVal || Op[i] > MaxVal) { 5058 Error(ExprLoc, ErrMsg); 5059 return false; 5060 } 5061 } 5062 5063 return true; 5064 } 5065 5066 bool 5067 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5068 using namespace llvm::AMDGPU::Swizzle; 5069 5070 int64_t Lane[LANE_NUM]; 5071 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5072 "expected a 2-bit lane id")) { 5073 Imm = QUAD_PERM_ENC; 5074 for (unsigned I = 0; I < LANE_NUM; ++I) { 5075 Imm |= Lane[I] << (LANE_SHIFT * I); 5076 } 5077 return true; 5078 } 5079 return false; 5080 } 5081 5082 bool 5083 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5084 using namespace llvm::AMDGPU::Swizzle; 5085 5086 SMLoc S = Parser.getTok().getLoc(); 5087 int64_t GroupSize; 5088 int64_t LaneIdx; 5089 5090 if (!parseSwizzleOperands(1, &GroupSize, 5091 2, 32, 5092 "group size must be in the interval [2,32]")) { 5093 return false; 5094 } 5095 if (!isPowerOf2_64(GroupSize)) { 5096 Error(S, "group size must be a power of two"); 5097 return false; 5098 } 5099 if (parseSwizzleOperands(1, &LaneIdx, 5100 0, GroupSize - 1, 5101 "lane id must be in the interval [0,group size - 1]")) { 5102 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5103 return true; 5104 } 5105 return false; 5106 } 5107 5108 bool 5109 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5110 using namespace llvm::AMDGPU::Swizzle; 5111 5112 SMLoc S = Parser.getTok().getLoc(); 5113 int64_t GroupSize; 5114 5115 if (!parseSwizzleOperands(1, &GroupSize, 5116 2, 32, "group size must be in the interval [2,32]")) { 5117 return false; 5118 } 5119 if (!isPowerOf2_64(GroupSize)) { 5120 Error(S, "group size must be a power of two"); 5121 return false; 5122 } 5123 5124 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5125 return true; 5126 } 5127 5128 bool 5129 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5130 using namespace llvm::AMDGPU::Swizzle; 5131 5132 SMLoc S = Parser.getTok().getLoc(); 5133 int64_t GroupSize; 5134 5135 if (!parseSwizzleOperands(1, &GroupSize, 5136 1, 16, "group size must be in the interval [1,16]")) { 5137 return false; 5138 } 5139 if (!isPowerOf2_64(GroupSize)) { 5140 Error(S, "group size must be a power of two"); 5141 return false; 5142 } 5143 5144 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5145 return true; 5146 } 5147 5148 bool 5149 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5150 using namespace llvm::AMDGPU::Swizzle; 5151 5152 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5153 return false; 5154 } 5155 5156 StringRef Ctl; 5157 SMLoc StrLoc = Parser.getTok().getLoc(); 5158 if (!parseString(Ctl)) { 5159 return false; 5160 } 5161 if (Ctl.size() != BITMASK_WIDTH) { 5162 Error(StrLoc, "expected a 5-character mask"); 5163 return false; 5164 } 5165 5166 unsigned AndMask = 0; 5167 unsigned OrMask = 0; 5168 unsigned XorMask = 0; 5169 5170 for (size_t i = 0; i < Ctl.size(); ++i) { 5171 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5172 switch(Ctl[i]) { 5173 default: 5174 Error(StrLoc, "invalid mask"); 5175 return false; 5176 case '0': 5177 break; 5178 case '1': 5179 OrMask |= Mask; 5180 break; 5181 case 'p': 5182 AndMask |= Mask; 5183 break; 5184 case 'i': 5185 AndMask |= Mask; 5186 XorMask |= Mask; 5187 break; 5188 } 5189 } 5190 5191 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5192 return true; 5193 } 5194 5195 bool 5196 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5197 5198 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5199 5200 if (!parseExpr(Imm)) { 5201 return false; 5202 } 5203 if (!isUInt<16>(Imm)) { 5204 Error(OffsetLoc, "expected a 16-bit offset"); 5205 return false; 5206 } 5207 return true; 5208 } 5209 5210 bool 5211 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5212 using namespace llvm::AMDGPU::Swizzle; 5213 5214 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5215 5216 SMLoc ModeLoc = Parser.getTok().getLoc(); 5217 bool Ok = false; 5218 5219 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5220 Ok = parseSwizzleQuadPerm(Imm); 5221 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5222 Ok = parseSwizzleBitmaskPerm(Imm); 5223 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5224 Ok = parseSwizzleBroadcast(Imm); 5225 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5226 Ok = parseSwizzleSwap(Imm); 5227 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5228 Ok = parseSwizzleReverse(Imm); 5229 } else { 5230 Error(ModeLoc, "expected a swizzle mode"); 5231 } 5232 5233 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5234 } 5235 5236 return false; 5237 } 5238 5239 OperandMatchResultTy 5240 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5241 SMLoc S = Parser.getTok().getLoc(); 5242 int64_t Imm = 0; 5243 5244 if (trySkipId("offset")) { 5245 5246 bool Ok = false; 5247 if (skipToken(AsmToken::Colon, "expected a colon")) { 5248 if (trySkipId("swizzle")) { 5249 Ok = parseSwizzleMacro(Imm); 5250 } else { 5251 Ok = parseSwizzleOffset(Imm); 5252 } 5253 } 5254 5255 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5256 5257 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5258 } else { 5259 // Swizzle "offset" operand is optional. 5260 // If it is omitted, try parsing other optional operands. 5261 return parseOptionalOpr(Operands); 5262 } 5263 } 5264 5265 bool 5266 AMDGPUOperand::isSwizzle() const { 5267 return isImmTy(ImmTySwizzle); 5268 } 5269 5270 //===----------------------------------------------------------------------===// 5271 // VGPR Index Mode 5272 //===----------------------------------------------------------------------===// 5273 5274 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5275 5276 using namespace llvm::AMDGPU::VGPRIndexMode; 5277 5278 if (trySkipToken(AsmToken::RParen)) { 5279 return OFF; 5280 } 5281 5282 int64_t Imm = 0; 5283 5284 while (true) { 5285 unsigned Mode = 0; 5286 SMLoc S = Parser.getTok().getLoc(); 5287 5288 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5289 if (trySkipId(IdSymbolic[ModeId])) { 5290 Mode = 1 << ModeId; 5291 break; 5292 } 5293 } 5294 5295 if (Mode == 0) { 5296 Error(S, (Imm == 0)? 5297 "expected a VGPR index mode or a closing parenthesis" : 5298 "expected a VGPR index mode"); 5299 break; 5300 } 5301 5302 if (Imm & Mode) { 5303 Error(S, "duplicate VGPR index mode"); 5304 break; 5305 } 5306 Imm |= Mode; 5307 5308 if (trySkipToken(AsmToken::RParen)) 5309 break; 5310 if (!skipToken(AsmToken::Comma, 5311 "expected a comma or a closing parenthesis")) 5312 break; 5313 } 5314 5315 return Imm; 5316 } 5317 5318 OperandMatchResultTy 5319 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5320 5321 int64_t Imm = 0; 5322 SMLoc S = Parser.getTok().getLoc(); 5323 5324 if (getLexer().getKind() == AsmToken::Identifier && 5325 Parser.getTok().getString() == "gpr_idx" && 5326 getLexer().peekTok().is(AsmToken::LParen)) { 5327 5328 Parser.Lex(); 5329 Parser.Lex(); 5330 5331 // If parse failed, trigger an error but do not return error code 5332 // to avoid excessive error messages. 5333 Imm = parseGPRIdxMacro(); 5334 5335 } else { 5336 if (getParser().parseAbsoluteExpression(Imm)) 5337 return MatchOperand_NoMatch; 5338 if (Imm < 0 || !isUInt<4>(Imm)) { 5339 Error(S, "invalid immediate: only 4-bit values are legal"); 5340 } 5341 } 5342 5343 Operands.push_back( 5344 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5345 return MatchOperand_Success; 5346 } 5347 5348 bool AMDGPUOperand::isGPRIdxMode() const { 5349 return isImmTy(ImmTyGprIdxMode); 5350 } 5351 5352 //===----------------------------------------------------------------------===// 5353 // sopp branch targets 5354 //===----------------------------------------------------------------------===// 5355 5356 OperandMatchResultTy 5357 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5358 SMLoc S = Parser.getTok().getLoc(); 5359 5360 switch (getLexer().getKind()) { 5361 default: return MatchOperand_ParseFail; 5362 case AsmToken::Integer: { 5363 int64_t Imm; 5364 if (getParser().parseAbsoluteExpression(Imm)) 5365 return MatchOperand_ParseFail; 5366 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5367 return MatchOperand_Success; 5368 } 5369 5370 case AsmToken::Identifier: 5371 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5372 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5373 Parser.getTok().getString()), getContext()), S)); 5374 Parser.Lex(); 5375 return MatchOperand_Success; 5376 } 5377 } 5378 5379 //===----------------------------------------------------------------------===// 5380 // mubuf 5381 //===----------------------------------------------------------------------===// 5382 5383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5384 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5385 } 5386 5387 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5388 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5389 } 5390 5391 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5392 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5393 } 5394 5395 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5396 const OperandVector &Operands, 5397 bool IsAtomic, 5398 bool IsAtomicReturn, 5399 bool IsLds) { 5400 bool IsLdsOpcode = IsLds; 5401 bool HasLdsModifier = false; 5402 OptionalImmIndexMap OptionalIdx; 5403 assert(IsAtomicReturn ? IsAtomic : true); 5404 unsigned FirstOperandIdx = 1; 5405 5406 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5407 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5408 5409 // Add the register arguments 5410 if (Op.isReg()) { 5411 Op.addRegOperands(Inst, 1); 5412 // Insert a tied src for atomic return dst. 5413 // This cannot be postponed as subsequent calls to 5414 // addImmOperands rely on correct number of MC operands. 5415 if (IsAtomicReturn && i == FirstOperandIdx) 5416 Op.addRegOperands(Inst, 1); 5417 continue; 5418 } 5419 5420 // Handle the case where soffset is an immediate 5421 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5422 Op.addImmOperands(Inst, 1); 5423 continue; 5424 } 5425 5426 HasLdsModifier |= Op.isLDS(); 5427 5428 // Handle tokens like 'offen' which are sometimes hard-coded into the 5429 // asm string. There are no MCInst operands for these. 5430 if (Op.isToken()) { 5431 continue; 5432 } 5433 assert(Op.isImm()); 5434 5435 // Handle optional arguments 5436 OptionalIdx[Op.getImmTy()] = i; 5437 } 5438 5439 // This is a workaround for an llvm quirk which may result in an 5440 // incorrect instruction selection. Lds and non-lds versions of 5441 // MUBUF instructions are identical except that lds versions 5442 // have mandatory 'lds' modifier. However this modifier follows 5443 // optional modifiers and llvm asm matcher regards this 'lds' 5444 // modifier as an optional one. As a result, an lds version 5445 // of opcode may be selected even if it has no 'lds' modifier. 5446 if (IsLdsOpcode && !HasLdsModifier) { 5447 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5448 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5449 Inst.setOpcode(NoLdsOpcode); 5450 IsLdsOpcode = false; 5451 } 5452 } 5453 5454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5455 if (!IsAtomic) { // glc is hard-coded. 5456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5457 } 5458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5459 5460 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5462 } 5463 5464 if (isGFX10()) 5465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5466 } 5467 5468 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5469 OptionalImmIndexMap OptionalIdx; 5470 5471 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5472 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5473 5474 // Add the register arguments 5475 if (Op.isReg()) { 5476 Op.addRegOperands(Inst, 1); 5477 continue; 5478 } 5479 5480 // Handle the case where soffset is an immediate 5481 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5482 Op.addImmOperands(Inst, 1); 5483 continue; 5484 } 5485 5486 // Handle tokens like 'offen' which are sometimes hard-coded into the 5487 // asm string. There are no MCInst operands for these. 5488 if (Op.isToken()) { 5489 continue; 5490 } 5491 assert(Op.isImm()); 5492 5493 // Handle optional arguments 5494 OptionalIdx[Op.getImmTy()] = i; 5495 } 5496 5497 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5498 AMDGPUOperand::ImmTyOffset); 5499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5502 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5503 5504 if (isGFX10()) 5505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5506 } 5507 5508 //===----------------------------------------------------------------------===// 5509 // mimg 5510 //===----------------------------------------------------------------------===// 5511 5512 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5513 bool IsAtomic) { 5514 unsigned I = 1; 5515 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5516 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5517 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5518 } 5519 5520 if (IsAtomic) { 5521 // Add src, same as dst 5522 assert(Desc.getNumDefs() == 1); 5523 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5524 } 5525 5526 OptionalImmIndexMap OptionalIdx; 5527 5528 for (unsigned E = Operands.size(); I != E; ++I) { 5529 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5530 5531 // Add the register arguments 5532 if (Op.isReg()) { 5533 Op.addRegOperands(Inst, 1); 5534 } else if (Op.isImmModifier()) { 5535 OptionalIdx[Op.getImmTy()] = I; 5536 } else if (!Op.isToken()) { 5537 llvm_unreachable("unexpected operand type"); 5538 } 5539 } 5540 5541 bool IsGFX10 = isGFX10(); 5542 5543 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5544 if (IsGFX10) 5545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5546 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5547 if (IsGFX10) 5548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5553 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5554 if (!IsGFX10) 5555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5556 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5557 } 5558 5559 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5560 cvtMIMG(Inst, Operands, true); 5561 } 5562 5563 //===----------------------------------------------------------------------===// 5564 // smrd 5565 //===----------------------------------------------------------------------===// 5566 5567 bool AMDGPUOperand::isSMRDOffset8() const { 5568 return isImm() && isUInt<8>(getImm()); 5569 } 5570 5571 bool AMDGPUOperand::isSMRDOffset20() const { 5572 return isImm() && isUInt<20>(getImm()); 5573 } 5574 5575 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5576 // 32-bit literals are only supported on CI and we only want to use them 5577 // when the offset is > 8-bits. 5578 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5579 } 5580 5581 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5582 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5583 } 5584 5585 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5586 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5587 } 5588 5589 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5590 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5591 } 5592 5593 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5594 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5595 } 5596 5597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5598 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5599 } 5600 5601 //===----------------------------------------------------------------------===// 5602 // vop3 5603 //===----------------------------------------------------------------------===// 5604 5605 static bool ConvertOmodMul(int64_t &Mul) { 5606 if (Mul != 1 && Mul != 2 && Mul != 4) 5607 return false; 5608 5609 Mul >>= 1; 5610 return true; 5611 } 5612 5613 static bool ConvertOmodDiv(int64_t &Div) { 5614 if (Div == 1) { 5615 Div = 0; 5616 return true; 5617 } 5618 5619 if (Div == 2) { 5620 Div = 3; 5621 return true; 5622 } 5623 5624 return false; 5625 } 5626 5627 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5628 if (BoundCtrl == 0) { 5629 BoundCtrl = 1; 5630 return true; 5631 } 5632 5633 if (BoundCtrl == -1) { 5634 BoundCtrl = 0; 5635 return true; 5636 } 5637 5638 return false; 5639 } 5640 5641 // Note: the order in this table matches the order of operands in AsmString. 5642 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5643 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5644 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5645 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5646 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5647 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5648 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5649 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5650 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5651 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5652 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5653 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5654 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5655 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5656 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5657 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5658 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5659 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5660 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5661 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5662 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5663 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5664 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5665 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5666 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5667 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5668 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5669 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5670 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5671 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5672 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5673 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5674 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5675 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5676 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5677 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5678 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5679 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5680 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5681 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5682 }; 5683 5684 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5685 unsigned size = Operands.size(); 5686 assert(size > 0); 5687 5688 OperandMatchResultTy res = parseOptionalOpr(Operands); 5689 5690 // This is a hack to enable hardcoded mandatory operands which follow 5691 // optional operands. 5692 // 5693 // Current design assumes that all operands after the first optional operand 5694 // are also optional. However implementation of some instructions violates 5695 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5696 // 5697 // To alleviate this problem, we have to (implicitly) parse extra operands 5698 // to make sure autogenerated parser of custom operands never hit hardcoded 5699 // mandatory operands. 5700 5701 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5702 5703 // We have parsed the first optional operand. 5704 // Parse as many operands as necessary to skip all mandatory operands. 5705 5706 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5707 if (res != MatchOperand_Success || 5708 getLexer().is(AsmToken::EndOfStatement)) break; 5709 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5710 res = parseOptionalOpr(Operands); 5711 } 5712 } 5713 5714 return res; 5715 } 5716 5717 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5718 OperandMatchResultTy res; 5719 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5720 // try to parse any optional operand here 5721 if (Op.IsBit) { 5722 res = parseNamedBit(Op.Name, Operands, Op.Type); 5723 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5724 res = parseOModOperand(Operands); 5725 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5726 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5727 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5728 res = parseSDWASel(Operands, Op.Name, Op.Type); 5729 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5730 res = parseSDWADstUnused(Operands); 5731 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5732 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5733 Op.Type == AMDGPUOperand::ImmTyNegLo || 5734 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5735 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5736 Op.ConvertResult); 5737 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5738 res = parseDim(Operands); 5739 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5740 res = parseDfmtNfmt(Operands); 5741 } else { 5742 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5743 } 5744 if (res != MatchOperand_NoMatch) { 5745 return res; 5746 } 5747 } 5748 return MatchOperand_NoMatch; 5749 } 5750 5751 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5752 StringRef Name = Parser.getTok().getString(); 5753 if (Name == "mul") { 5754 return parseIntWithPrefix("mul", Operands, 5755 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5756 } 5757 5758 if (Name == "div") { 5759 return parseIntWithPrefix("div", Operands, 5760 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5761 } 5762 5763 return MatchOperand_NoMatch; 5764 } 5765 5766 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5767 cvtVOP3P(Inst, Operands); 5768 5769 int Opc = Inst.getOpcode(); 5770 5771 int SrcNum; 5772 const int Ops[] = { AMDGPU::OpName::src0, 5773 AMDGPU::OpName::src1, 5774 AMDGPU::OpName::src2 }; 5775 for (SrcNum = 0; 5776 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5777 ++SrcNum); 5778 assert(SrcNum > 0); 5779 5780 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5781 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5782 5783 if ((OpSel & (1 << SrcNum)) != 0) { 5784 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5785 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5786 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5787 } 5788 } 5789 5790 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5791 // 1. This operand is input modifiers 5792 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5793 // 2. This is not last operand 5794 && Desc.NumOperands > (OpNum + 1) 5795 // 3. Next operand is register class 5796 && Desc.OpInfo[OpNum + 1].RegClass != -1 5797 // 4. Next register is not tied to any other operand 5798 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5799 } 5800 5801 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5802 { 5803 OptionalImmIndexMap OptionalIdx; 5804 unsigned Opc = Inst.getOpcode(); 5805 5806 unsigned I = 1; 5807 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5808 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5809 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5810 } 5811 5812 for (unsigned E = Operands.size(); I != E; ++I) { 5813 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5814 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5815 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5816 } else if (Op.isInterpSlot() || 5817 Op.isInterpAttr() || 5818 Op.isAttrChan()) { 5819 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5820 } else if (Op.isImmModifier()) { 5821 OptionalIdx[Op.getImmTy()] = I; 5822 } else { 5823 llvm_unreachable("unhandled operand type"); 5824 } 5825 } 5826 5827 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5828 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5829 } 5830 5831 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5833 } 5834 5835 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5837 } 5838 } 5839 5840 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5841 OptionalImmIndexMap &OptionalIdx) { 5842 unsigned Opc = Inst.getOpcode(); 5843 5844 unsigned I = 1; 5845 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5846 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5847 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5848 } 5849 5850 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5851 // This instruction has src modifiers 5852 for (unsigned E = Operands.size(); I != E; ++I) { 5853 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5854 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5855 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5856 } else if (Op.isImmModifier()) { 5857 OptionalIdx[Op.getImmTy()] = I; 5858 } else if (Op.isRegOrImm()) { 5859 Op.addRegOrImmOperands(Inst, 1); 5860 } else { 5861 llvm_unreachable("unhandled operand type"); 5862 } 5863 } 5864 } else { 5865 // No src modifiers 5866 for (unsigned E = Operands.size(); I != E; ++I) { 5867 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5868 if (Op.isMod()) { 5869 OptionalIdx[Op.getImmTy()] = I; 5870 } else { 5871 Op.addRegOrImmOperands(Inst, 1); 5872 } 5873 } 5874 } 5875 5876 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5878 } 5879 5880 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5882 } 5883 5884 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 5885 // it has src2 register operand that is tied to dst operand 5886 // we don't allow modifiers for this operand in assembler so src2_modifiers 5887 // should be 0. 5888 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 5889 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 5890 Opc == AMDGPU::V_MAC_F32_e64_vi || 5891 Opc == AMDGPU::V_MAC_F16_e64_vi || 5892 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 5893 Opc == AMDGPU::V_FMAC_F32_e64_vi || 5894 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 5895 auto it = Inst.begin(); 5896 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5897 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5898 ++it; 5899 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5900 } 5901 } 5902 5903 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5904 OptionalImmIndexMap OptionalIdx; 5905 cvtVOP3(Inst, Operands, OptionalIdx); 5906 } 5907 5908 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5909 const OperandVector &Operands) { 5910 OptionalImmIndexMap OptIdx; 5911 const int Opc = Inst.getOpcode(); 5912 const MCInstrDesc &Desc = MII.get(Opc); 5913 5914 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5915 5916 cvtVOP3(Inst, Operands, OptIdx); 5917 5918 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5919 assert(!IsPacked); 5920 Inst.addOperand(Inst.getOperand(0)); 5921 } 5922 5923 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5924 // instruction, and then figure out where to actually put the modifiers 5925 5926 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5927 5928 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5929 if (OpSelHiIdx != -1) { 5930 int DefaultVal = IsPacked ? -1 : 0; 5931 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5932 DefaultVal); 5933 } 5934 5935 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5936 if (NegLoIdx != -1) { 5937 assert(IsPacked); 5938 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5939 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5940 } 5941 5942 const int Ops[] = { AMDGPU::OpName::src0, 5943 AMDGPU::OpName::src1, 5944 AMDGPU::OpName::src2 }; 5945 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5946 AMDGPU::OpName::src1_modifiers, 5947 AMDGPU::OpName::src2_modifiers }; 5948 5949 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5950 5951 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5952 unsigned OpSelHi = 0; 5953 unsigned NegLo = 0; 5954 unsigned NegHi = 0; 5955 5956 if (OpSelHiIdx != -1) { 5957 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5958 } 5959 5960 if (NegLoIdx != -1) { 5961 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5962 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5963 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5964 } 5965 5966 for (int J = 0; J < 3; ++J) { 5967 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5968 if (OpIdx == -1) 5969 break; 5970 5971 uint32_t ModVal = 0; 5972 5973 if ((OpSel & (1 << J)) != 0) 5974 ModVal |= SISrcMods::OP_SEL_0; 5975 5976 if ((OpSelHi & (1 << J)) != 0) 5977 ModVal |= SISrcMods::OP_SEL_1; 5978 5979 if ((NegLo & (1 << J)) != 0) 5980 ModVal |= SISrcMods::NEG; 5981 5982 if ((NegHi & (1 << J)) != 0) 5983 ModVal |= SISrcMods::NEG_HI; 5984 5985 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5986 5987 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5988 } 5989 } 5990 5991 //===----------------------------------------------------------------------===// 5992 // dpp 5993 //===----------------------------------------------------------------------===// 5994 5995 bool AMDGPUOperand::isDPPCtrl() const { 5996 using namespace AMDGPU::DPP; 5997 5998 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5999 if (result) { 6000 int64_t Imm = getImm(); 6001 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6002 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6003 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6004 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6005 (Imm == DppCtrl::WAVE_SHL1) || 6006 (Imm == DppCtrl::WAVE_ROL1) || 6007 (Imm == DppCtrl::WAVE_SHR1) || 6008 (Imm == DppCtrl::WAVE_ROR1) || 6009 (Imm == DppCtrl::ROW_MIRROR) || 6010 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6011 (Imm == DppCtrl::BCAST15) || 6012 (Imm == DppCtrl::BCAST31); 6013 } 6014 return false; 6015 } 6016 6017 bool AMDGPUOperand::isS16Imm() const { 6018 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6019 } 6020 6021 bool AMDGPUOperand::isU16Imm() const { 6022 return isImm() && isUInt<16>(getImm()); 6023 } 6024 6025 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6026 if (!isGFX10()) 6027 return MatchOperand_NoMatch; 6028 6029 SMLoc S = Parser.getTok().getLoc(); 6030 6031 if (getLexer().isNot(AsmToken::Identifier)) 6032 return MatchOperand_NoMatch; 6033 if (getLexer().getTok().getString() != "dim") 6034 return MatchOperand_NoMatch; 6035 6036 Parser.Lex(); 6037 if (getLexer().isNot(AsmToken::Colon)) 6038 return MatchOperand_ParseFail; 6039 6040 Parser.Lex(); 6041 6042 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6043 // integer. 6044 std::string Token; 6045 if (getLexer().is(AsmToken::Integer)) { 6046 SMLoc Loc = getLexer().getTok().getEndLoc(); 6047 Token = getLexer().getTok().getString(); 6048 Parser.Lex(); 6049 if (getLexer().getTok().getLoc() != Loc) 6050 return MatchOperand_ParseFail; 6051 } 6052 if (getLexer().isNot(AsmToken::Identifier)) 6053 return MatchOperand_ParseFail; 6054 Token += getLexer().getTok().getString(); 6055 6056 StringRef DimId = Token; 6057 if (DimId.startswith("SQ_RSRC_IMG_")) 6058 DimId = DimId.substr(12); 6059 6060 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6061 if (!DimInfo) 6062 return MatchOperand_ParseFail; 6063 6064 Parser.Lex(); 6065 6066 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6067 AMDGPUOperand::ImmTyDim)); 6068 return MatchOperand_Success; 6069 } 6070 6071 OperandMatchResultTy 6072 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6073 using namespace AMDGPU::DPP; 6074 6075 SMLoc S = Parser.getTok().getLoc(); 6076 StringRef Prefix; 6077 int64_t Int; 6078 6079 if (getLexer().getKind() == AsmToken::Identifier) { 6080 Prefix = Parser.getTok().getString(); 6081 } else { 6082 return MatchOperand_NoMatch; 6083 } 6084 6085 if (Prefix == "row_mirror") { 6086 Int = DppCtrl::ROW_MIRROR; 6087 Parser.Lex(); 6088 } else if (Prefix == "row_half_mirror") { 6089 Int = DppCtrl::ROW_HALF_MIRROR; 6090 Parser.Lex(); 6091 } else { 6092 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6093 if (Prefix != "quad_perm" 6094 && Prefix != "row_shl" 6095 && Prefix != "row_shr" 6096 && Prefix != "row_ror" 6097 && Prefix != "wave_shl" 6098 && Prefix != "wave_rol" 6099 && Prefix != "wave_shr" 6100 && Prefix != "wave_ror" 6101 && Prefix != "row_bcast") { 6102 return MatchOperand_NoMatch; 6103 } 6104 6105 Parser.Lex(); 6106 if (getLexer().isNot(AsmToken::Colon)) 6107 return MatchOperand_ParseFail; 6108 6109 if (Prefix == "quad_perm") { 6110 // quad_perm:[%d,%d,%d,%d] 6111 Parser.Lex(); 6112 if (getLexer().isNot(AsmToken::LBrac)) 6113 return MatchOperand_ParseFail; 6114 Parser.Lex(); 6115 6116 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6117 return MatchOperand_ParseFail; 6118 6119 for (int i = 0; i < 3; ++i) { 6120 if (getLexer().isNot(AsmToken::Comma)) 6121 return MatchOperand_ParseFail; 6122 Parser.Lex(); 6123 6124 int64_t Temp; 6125 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6126 return MatchOperand_ParseFail; 6127 const int shift = i*2 + 2; 6128 Int += (Temp << shift); 6129 } 6130 6131 if (getLexer().isNot(AsmToken::RBrac)) 6132 return MatchOperand_ParseFail; 6133 Parser.Lex(); 6134 } else { 6135 // sel:%d 6136 Parser.Lex(); 6137 if (getParser().parseAbsoluteExpression(Int)) 6138 return MatchOperand_ParseFail; 6139 6140 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6141 Int |= DppCtrl::ROW_SHL0; 6142 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6143 Int |= DppCtrl::ROW_SHR0; 6144 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6145 Int |= DppCtrl::ROW_ROR0; 6146 } else if (Prefix == "wave_shl" && 1 == Int) { 6147 Int = DppCtrl::WAVE_SHL1; 6148 } else if (Prefix == "wave_rol" && 1 == Int) { 6149 Int = DppCtrl::WAVE_ROL1; 6150 } else if (Prefix == "wave_shr" && 1 == Int) { 6151 Int = DppCtrl::WAVE_SHR1; 6152 } else if (Prefix == "wave_ror" && 1 == Int) { 6153 Int = DppCtrl::WAVE_ROR1; 6154 } else if (Prefix == "row_bcast") { 6155 if (Int == 15) { 6156 Int = DppCtrl::BCAST15; 6157 } else if (Int == 31) { 6158 Int = DppCtrl::BCAST31; 6159 } else { 6160 return MatchOperand_ParseFail; 6161 } 6162 } else { 6163 return MatchOperand_ParseFail; 6164 } 6165 } 6166 } 6167 6168 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6169 return MatchOperand_Success; 6170 } 6171 6172 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6173 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6174 } 6175 6176 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6177 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6178 } 6179 6180 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6181 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6182 } 6183 6184 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6185 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6186 } 6187 6188 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 6189 OptionalImmIndexMap OptionalIdx; 6190 6191 unsigned I = 1; 6192 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6193 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6194 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6195 } 6196 6197 for (unsigned E = Operands.size(); I != E; ++I) { 6198 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6199 MCOI::TIED_TO); 6200 if (TiedTo != -1) { 6201 assert((unsigned)TiedTo < Inst.getNumOperands()); 6202 // handle tied old or src2 for MAC instructions 6203 Inst.addOperand(Inst.getOperand(TiedTo)); 6204 } 6205 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6206 // Add the register arguments 6207 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6208 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6209 // Skip it. 6210 continue; 6211 } 6212 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6213 Op.addRegWithFPInputModsOperands(Inst, 2); 6214 } else if (Op.isDPPCtrl()) { 6215 Op.addImmOperands(Inst, 1); 6216 } else if (Op.isImm()) { 6217 // Handle optional arguments 6218 OptionalIdx[Op.getImmTy()] = I; 6219 } else { 6220 llvm_unreachable("Invalid operand type"); 6221 } 6222 } 6223 6224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6227 } 6228 6229 //===----------------------------------------------------------------------===// 6230 // sdwa 6231 //===----------------------------------------------------------------------===// 6232 6233 OperandMatchResultTy 6234 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6235 AMDGPUOperand::ImmTy Type) { 6236 using namespace llvm::AMDGPU::SDWA; 6237 6238 SMLoc S = Parser.getTok().getLoc(); 6239 StringRef Value; 6240 OperandMatchResultTy res; 6241 6242 res = parseStringWithPrefix(Prefix, Value); 6243 if (res != MatchOperand_Success) { 6244 return res; 6245 } 6246 6247 int64_t Int; 6248 Int = StringSwitch<int64_t>(Value) 6249 .Case("BYTE_0", SdwaSel::BYTE_0) 6250 .Case("BYTE_1", SdwaSel::BYTE_1) 6251 .Case("BYTE_2", SdwaSel::BYTE_2) 6252 .Case("BYTE_3", SdwaSel::BYTE_3) 6253 .Case("WORD_0", SdwaSel::WORD_0) 6254 .Case("WORD_1", SdwaSel::WORD_1) 6255 .Case("DWORD", SdwaSel::DWORD) 6256 .Default(0xffffffff); 6257 Parser.Lex(); // eat last token 6258 6259 if (Int == 0xffffffff) { 6260 return MatchOperand_ParseFail; 6261 } 6262 6263 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6264 return MatchOperand_Success; 6265 } 6266 6267 OperandMatchResultTy 6268 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6269 using namespace llvm::AMDGPU::SDWA; 6270 6271 SMLoc S = Parser.getTok().getLoc(); 6272 StringRef Value; 6273 OperandMatchResultTy res; 6274 6275 res = parseStringWithPrefix("dst_unused", Value); 6276 if (res != MatchOperand_Success) { 6277 return res; 6278 } 6279 6280 int64_t Int; 6281 Int = StringSwitch<int64_t>(Value) 6282 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6283 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6284 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6285 .Default(0xffffffff); 6286 Parser.Lex(); // eat last token 6287 6288 if (Int == 0xffffffff) { 6289 return MatchOperand_ParseFail; 6290 } 6291 6292 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6293 return MatchOperand_Success; 6294 } 6295 6296 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6297 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6298 } 6299 6300 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6301 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6302 } 6303 6304 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6305 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6306 } 6307 6308 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6309 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6310 } 6311 6312 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6313 uint64_t BasicInstType, bool skipVcc) { 6314 using namespace llvm::AMDGPU::SDWA; 6315 6316 OptionalImmIndexMap OptionalIdx; 6317 bool skippedVcc = false; 6318 6319 unsigned I = 1; 6320 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6321 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6322 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6323 } 6324 6325 for (unsigned E = Operands.size(); I != E; ++I) { 6326 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6327 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6328 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6329 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6330 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6331 // Skip VCC only if we didn't skip it on previous iteration. 6332 if (BasicInstType == SIInstrFlags::VOP2 && 6333 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6334 skippedVcc = true; 6335 continue; 6336 } else if (BasicInstType == SIInstrFlags::VOPC && 6337 Inst.getNumOperands() == 0) { 6338 skippedVcc = true; 6339 continue; 6340 } 6341 } 6342 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6343 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6344 } else if (Op.isImm()) { 6345 // Handle optional arguments 6346 OptionalIdx[Op.getImmTy()] = I; 6347 } else { 6348 llvm_unreachable("Invalid operand type"); 6349 } 6350 skippedVcc = false; 6351 } 6352 6353 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6354 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6355 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6356 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6357 switch (BasicInstType) { 6358 case SIInstrFlags::VOP1: 6359 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6360 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6362 } 6363 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6364 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6366 break; 6367 6368 case SIInstrFlags::VOP2: 6369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6370 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6371 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6372 } 6373 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6376 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6377 break; 6378 6379 case SIInstrFlags::VOPC: 6380 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6381 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6384 break; 6385 6386 default: 6387 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6388 } 6389 } 6390 6391 // special case v_mac_{f16, f32}: 6392 // it has src2 register operand that is tied to dst operand 6393 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6394 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6395 auto it = Inst.begin(); 6396 std::advance( 6397 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6398 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6399 } 6400 } 6401 6402 /// Force static initialization. 6403 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6404 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6405 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6406 } 6407 6408 #define GET_REGISTER_MATCHER 6409 #define GET_MATCHER_IMPLEMENTATION 6410 #define GET_MNEMONIC_SPELL_CHECKER 6411 #include "AMDGPUGenAsmMatcher.inc" 6412 6413 // This fuction should be defined after auto-generated include so that we have 6414 // MatchClassKind enum defined 6415 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6416 unsigned Kind) { 6417 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6418 // But MatchInstructionImpl() expects to meet token and fails to validate 6419 // operand. This method checks if we are given immediate operand but expect to 6420 // get corresponding token. 6421 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6422 switch (Kind) { 6423 case MCK_addr64: 6424 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6425 case MCK_gds: 6426 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6427 case MCK_lds: 6428 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6429 case MCK_glc: 6430 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6431 case MCK_idxen: 6432 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6433 case MCK_offen: 6434 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6435 case MCK_SSrcB32: 6436 // When operands have expression values, they will return true for isToken, 6437 // because it is not possible to distinguish between a token and an 6438 // expression at parse time. MatchInstructionImpl() will always try to 6439 // match an operand as a token, when isToken returns true, and when the 6440 // name of the expression is not a valid token, the match will fail, 6441 // so we need to handle it here. 6442 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6443 case MCK_SSrcF32: 6444 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6445 case MCK_SoppBrTarget: 6446 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6447 case MCK_VReg32OrOff: 6448 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6449 case MCK_InterpSlot: 6450 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6451 case MCK_Attr: 6452 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6453 case MCK_AttrChan: 6454 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6455 default: 6456 return Match_InvalidOperand; 6457 } 6458 } 6459 6460 //===----------------------------------------------------------------------===// 6461 // endpgm 6462 //===----------------------------------------------------------------------===// 6463 6464 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6465 SMLoc S = Parser.getTok().getLoc(); 6466 int64_t Imm = 0; 6467 6468 if (!parseExpr(Imm)) { 6469 // The operand is optional, if not present default to 0 6470 Imm = 0; 6471 } 6472 6473 if (!isUInt<16>(Imm)) { 6474 Error(S, "expected a 16-bit value"); 6475 return MatchOperand_ParseFail; 6476 } 6477 6478 Operands.push_back( 6479 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6480 return MatchOperand_Success; 6481 } 6482 6483 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6484