1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyEndpgm, 183 ImmTyHigh 184 }; 185 186 private: 187 struct TokOp { 188 const char *Data; 189 unsigned Length; 190 }; 191 192 struct ImmOp { 193 int64_t Val; 194 ImmTy Type; 195 bool IsFPImm; 196 Modifiers Mods; 197 }; 198 199 struct RegOp { 200 unsigned RegNo; 201 Modifiers Mods; 202 }; 203 204 union { 205 TokOp Tok; 206 ImmOp Imm; 207 RegOp Reg; 208 const MCExpr *Expr; 209 }; 210 211 public: 212 bool isToken() const override { 213 if (Kind == Token) 214 return true; 215 216 if (Kind != Expression || !Expr) 217 return false; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isa<MCSymbolRefExpr>(Expr); 224 } 225 226 bool isImm() const override { 227 return Kind == Immediate; 228 } 229 230 bool isInlinableImm(MVT type) const; 231 bool isLiteralImm(MVT type) const; 232 233 bool isRegKind() const { 234 return Kind == Register; 235 } 236 237 bool isReg() const override { 238 return isRegKind() && !hasModifiers(); 239 } 240 241 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 242 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 243 } 244 245 bool isRegOrImmWithInt16InputMods() const { 246 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 247 } 248 249 bool isRegOrImmWithInt32InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 251 } 252 253 bool isRegOrImmWithInt64InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 255 } 256 257 bool isRegOrImmWithFP16InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 259 } 260 261 bool isRegOrImmWithFP32InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 263 } 264 265 bool isRegOrImmWithFP64InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 267 } 268 269 bool isVReg() const { 270 return isRegClass(AMDGPU::VGPR_32RegClassID) || 271 isRegClass(AMDGPU::VReg_64RegClassID) || 272 isRegClass(AMDGPU::VReg_96RegClassID) || 273 isRegClass(AMDGPU::VReg_128RegClassID) || 274 isRegClass(AMDGPU::VReg_256RegClassID) || 275 isRegClass(AMDGPU::VReg_512RegClassID); 276 } 277 278 bool isVReg32() const { 279 return isRegClass(AMDGPU::VGPR_32RegClassID); 280 } 281 282 bool isVReg32OrOff() const { 283 return isOff() || isVReg32(); 284 } 285 286 bool isSDWAOperand(MVT type) const; 287 bool isSDWAFP16Operand() const; 288 bool isSDWAFP32Operand() const; 289 bool isSDWAInt16Operand() const; 290 bool isSDWAInt32Operand() const; 291 292 bool isImmTy(ImmTy ImmT) const { 293 return isImm() && Imm.Type == ImmT; 294 } 295 296 bool isImmModifier() const { 297 return isImm() && Imm.Type != ImmTyNone; 298 } 299 300 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 301 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 302 bool isDMask() const { return isImmTy(ImmTyDMask); } 303 bool isDim() const { return isImmTy(ImmTyDim); } 304 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 305 bool isDA() const { return isImmTy(ImmTyDA); } 306 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 307 bool isLWE() const { return isImmTy(ImmTyLWE); } 308 bool isOff() const { return isImmTy(ImmTyOff); } 309 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 310 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 311 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 312 bool isOffen() const { return isImmTy(ImmTyOffen); } 313 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 314 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 315 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 316 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 317 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 318 319 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 320 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 321 bool isGDS() const { return isImmTy(ImmTyGDS); } 322 bool isLDS() const { return isImmTy(ImmTyLDS); } 323 bool isDLC() const { return isImmTy(ImmTyDLC); } 324 bool isGLC() const { return isImmTy(ImmTyGLC); } 325 bool isSLC() const { return isImmTy(ImmTySLC); } 326 bool isTFE() const { return isImmTy(ImmTyTFE); } 327 bool isD16() const { return isImmTy(ImmTyD16); } 328 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 329 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 330 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 331 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 332 bool isFI() const { return isImmTy(ImmTyDppFi); } 333 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 334 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 335 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 336 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 337 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 338 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 339 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 340 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 341 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 342 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 343 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 344 bool isHigh() const { return isImmTy(ImmTyHigh); } 345 346 bool isMod() const { 347 return isClampSI() || isOModSI(); 348 } 349 350 bool isRegOrImm() const { 351 return isReg() || isImm(); 352 } 353 354 bool isRegClass(unsigned RCID) const; 355 356 bool isInlineValue() const; 357 358 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 359 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 360 } 361 362 bool isSCSrcB16() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 364 } 365 366 bool isSCSrcV2B16() const { 367 return isSCSrcB16(); 368 } 369 370 bool isSCSrcB32() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 372 } 373 374 bool isSCSrcB64() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 376 } 377 378 bool isBoolReg() const; 379 380 bool isSCSrcF16() const { 381 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 382 } 383 384 bool isSCSrcV2F16() const { 385 return isSCSrcF16(); 386 } 387 388 bool isSCSrcF32() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 390 } 391 392 bool isSCSrcF64() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 394 } 395 396 bool isSSrcB32() const { 397 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 398 } 399 400 bool isSSrcB16() const { 401 return isSCSrcB16() || isLiteralImm(MVT::i16); 402 } 403 404 bool isSSrcV2B16() const { 405 llvm_unreachable("cannot happen"); 406 return isSSrcB16(); 407 } 408 409 bool isSSrcB64() const { 410 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 411 // See isVSrc64(). 412 return isSCSrcB64() || isLiteralImm(MVT::i64); 413 } 414 415 bool isSSrcF32() const { 416 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 417 } 418 419 bool isSSrcF64() const { 420 return isSCSrcB64() || isLiteralImm(MVT::f64); 421 } 422 423 bool isSSrcF16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::f16); 425 } 426 427 bool isSSrcV2F16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcF16(); 430 } 431 432 bool isSSrcOrLdsB32() const { 433 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 434 isLiteralImm(MVT::i32) || isExpr(); 435 } 436 437 bool isVCSrcB32() const { 438 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 439 } 440 441 bool isVCSrcB64() const { 442 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 443 } 444 445 bool isVCSrcB16() const { 446 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 447 } 448 449 bool isVCSrcV2B16() const { 450 return isVCSrcB16(); 451 } 452 453 bool isVCSrcF32() const { 454 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 455 } 456 457 bool isVCSrcF64() const { 458 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 459 } 460 461 bool isVCSrcF16() const { 462 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 463 } 464 465 bool isVCSrcV2F16() const { 466 return isVCSrcF16(); 467 } 468 469 bool isVSrcB32() const { 470 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVSrcB64() const { 474 return isVCSrcF64() || isLiteralImm(MVT::i64); 475 } 476 477 bool isVSrcB16() const { 478 return isVCSrcF16() || isLiteralImm(MVT::i16); 479 } 480 481 bool isVSrcV2B16() const { 482 return isVSrcB16() || isLiteralImm(MVT::v2i16); 483 } 484 485 bool isVSrcF32() const { 486 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 487 } 488 489 bool isVSrcF64() const { 490 return isVCSrcF64() || isLiteralImm(MVT::f64); 491 } 492 493 bool isVSrcF16() const { 494 return isVCSrcF16() || isLiteralImm(MVT::f16); 495 } 496 497 bool isVSrcV2F16() const { 498 return isVSrcF16() || isLiteralImm(MVT::v2f16); 499 } 500 501 bool isKImmFP32() const { 502 return isLiteralImm(MVT::f32); 503 } 504 505 bool isKImmFP16() const { 506 return isLiteralImm(MVT::f16); 507 } 508 509 bool isMem() const override { 510 return false; 511 } 512 513 bool isExpr() const { 514 return Kind == Expression; 515 } 516 517 bool isSoppBrTarget() const { 518 return isExpr() || isImm(); 519 } 520 521 bool isSWaitCnt() const; 522 bool isHwreg() const; 523 bool isSendMsg() const; 524 bool isSwizzle() const; 525 bool isSMRDOffset8() const; 526 bool isSMRDOffset20() const; 527 bool isSMRDLiteralOffset() const; 528 bool isDPP8() const; 529 bool isDPPCtrl() const; 530 bool isGPRIdxMode() const; 531 bool isS16Imm() const; 532 bool isU16Imm() const; 533 bool isEndpgm() const; 534 535 StringRef getExpressionAsToken() const { 536 assert(isExpr()); 537 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 538 return S->getSymbol().getName(); 539 } 540 541 StringRef getToken() const { 542 assert(isToken()); 543 544 if (Kind == Expression) 545 return getExpressionAsToken(); 546 547 return StringRef(Tok.Data, Tok.Length); 548 } 549 550 int64_t getImm() const { 551 assert(isImm()); 552 return Imm.Val; 553 } 554 555 ImmTy getImmTy() const { 556 assert(isImm()); 557 return Imm.Type; 558 } 559 560 unsigned getReg() const override { 561 assert(isRegKind()); 562 return Reg.RegNo; 563 } 564 565 SMLoc getStartLoc() const override { 566 return StartLoc; 567 } 568 569 SMLoc getEndLoc() const override { 570 return EndLoc; 571 } 572 573 SMRange getLocRange() const { 574 return SMRange(StartLoc, EndLoc); 575 } 576 577 Modifiers getModifiers() const { 578 assert(isRegKind() || isImmTy(ImmTyNone)); 579 return isRegKind() ? Reg.Mods : Imm.Mods; 580 } 581 582 void setModifiers(Modifiers Mods) { 583 assert(isRegKind() || isImmTy(ImmTyNone)); 584 if (isRegKind()) 585 Reg.Mods = Mods; 586 else 587 Imm.Mods = Mods; 588 } 589 590 bool hasModifiers() const { 591 return getModifiers().hasModifiers(); 592 } 593 594 bool hasFPModifiers() const { 595 return getModifiers().hasFPModifiers(); 596 } 597 598 bool hasIntModifiers() const { 599 return getModifiers().hasIntModifiers(); 600 } 601 602 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 603 604 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 605 606 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 607 608 template <unsigned Bitwidth> 609 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 610 611 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 612 addKImmFPOperands<16>(Inst, N); 613 } 614 615 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 616 addKImmFPOperands<32>(Inst, N); 617 } 618 619 void addRegOperands(MCInst &Inst, unsigned N) const; 620 621 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 622 addRegOperands(Inst, N); 623 } 624 625 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 626 if (isRegKind()) 627 addRegOperands(Inst, N); 628 else if (isExpr()) 629 Inst.addOperand(MCOperand::createExpr(Expr)); 630 else 631 addImmOperands(Inst, N); 632 } 633 634 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 635 Modifiers Mods = getModifiers(); 636 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 637 if (isRegKind()) { 638 addRegOperands(Inst, N); 639 } else { 640 addImmOperands(Inst, N, false); 641 } 642 } 643 644 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 645 assert(!hasIntModifiers()); 646 addRegOrImmWithInputModsOperands(Inst, N); 647 } 648 649 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 650 assert(!hasFPModifiers()); 651 addRegOrImmWithInputModsOperands(Inst, N); 652 } 653 654 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 655 Modifiers Mods = getModifiers(); 656 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 657 assert(isRegKind()); 658 addRegOperands(Inst, N); 659 } 660 661 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 662 assert(!hasIntModifiers()); 663 addRegWithInputModsOperands(Inst, N); 664 } 665 666 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 667 assert(!hasFPModifiers()); 668 addRegWithInputModsOperands(Inst, N); 669 } 670 671 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 672 if (isImm()) 673 addImmOperands(Inst, N); 674 else { 675 assert(isExpr()); 676 Inst.addOperand(MCOperand::createExpr(Expr)); 677 } 678 } 679 680 static void printImmTy(raw_ostream& OS, ImmTy Type) { 681 switch (Type) { 682 case ImmTyNone: OS << "None"; break; 683 case ImmTyGDS: OS << "GDS"; break; 684 case ImmTyLDS: OS << "LDS"; break; 685 case ImmTyOffen: OS << "Offen"; break; 686 case ImmTyIdxen: OS << "Idxen"; break; 687 case ImmTyAddr64: OS << "Addr64"; break; 688 case ImmTyOffset: OS << "Offset"; break; 689 case ImmTyInstOffset: OS << "InstOffset"; break; 690 case ImmTyOffset0: OS << "Offset0"; break; 691 case ImmTyOffset1: OS << "Offset1"; break; 692 case ImmTyDLC: OS << "DLC"; break; 693 case ImmTyGLC: OS << "GLC"; break; 694 case ImmTySLC: OS << "SLC"; break; 695 case ImmTyTFE: OS << "TFE"; break; 696 case ImmTyD16: OS << "D16"; break; 697 case ImmTyFORMAT: OS << "FORMAT"; break; 698 case ImmTyClampSI: OS << "ClampSI"; break; 699 case ImmTyOModSI: OS << "OModSI"; break; 700 case ImmTyDPP8: OS << "DPP8"; break; 701 case ImmTyDppCtrl: OS << "DppCtrl"; break; 702 case ImmTyDppRowMask: OS << "DppRowMask"; break; 703 case ImmTyDppBankMask: OS << "DppBankMask"; break; 704 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 705 case ImmTyDppFi: OS << "FI"; break; 706 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 707 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 708 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 709 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 710 case ImmTyDMask: OS << "DMask"; break; 711 case ImmTyDim: OS << "Dim"; break; 712 case ImmTyUNorm: OS << "UNorm"; break; 713 case ImmTyDA: OS << "DA"; break; 714 case ImmTyR128A16: OS << "R128A16"; break; 715 case ImmTyLWE: OS << "LWE"; break; 716 case ImmTyOff: OS << "Off"; break; 717 case ImmTyExpTgt: OS << "ExpTgt"; break; 718 case ImmTyExpCompr: OS << "ExpCompr"; break; 719 case ImmTyExpVM: OS << "ExpVM"; break; 720 case ImmTyHwreg: OS << "Hwreg"; break; 721 case ImmTySendMsg: OS << "SendMsg"; break; 722 case ImmTyInterpSlot: OS << "InterpSlot"; break; 723 case ImmTyInterpAttr: OS << "InterpAttr"; break; 724 case ImmTyAttrChan: OS << "AttrChan"; break; 725 case ImmTyOpSel: OS << "OpSel"; break; 726 case ImmTyOpSelHi: OS << "OpSelHi"; break; 727 case ImmTyNegLo: OS << "NegLo"; break; 728 case ImmTyNegHi: OS << "NegHi"; break; 729 case ImmTySwizzle: OS << "Swizzle"; break; 730 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 731 case ImmTyHigh: OS << "High"; break; 732 case ImmTyEndpgm: 733 OS << "Endpgm"; 734 break; 735 } 736 } 737 738 void print(raw_ostream &OS) const override { 739 switch (Kind) { 740 case Register: 741 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 742 break; 743 case Immediate: 744 OS << '<' << getImm(); 745 if (getImmTy() != ImmTyNone) { 746 OS << " type: "; printImmTy(OS, getImmTy()); 747 } 748 OS << " mods: " << Imm.Mods << '>'; 749 break; 750 case Token: 751 OS << '\'' << getToken() << '\''; 752 break; 753 case Expression: 754 OS << "<expr " << *Expr << '>'; 755 break; 756 } 757 } 758 759 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 760 int64_t Val, SMLoc Loc, 761 ImmTy Type = ImmTyNone, 762 bool IsFPImm = false) { 763 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 764 Op->Imm.Val = Val; 765 Op->Imm.IsFPImm = IsFPImm; 766 Op->Imm.Type = Type; 767 Op->Imm.Mods = Modifiers(); 768 Op->StartLoc = Loc; 769 Op->EndLoc = Loc; 770 return Op; 771 } 772 773 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 774 StringRef Str, SMLoc Loc, 775 bool HasExplicitEncodingSize = true) { 776 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 777 Res->Tok.Data = Str.data(); 778 Res->Tok.Length = Str.size(); 779 Res->StartLoc = Loc; 780 Res->EndLoc = Loc; 781 return Res; 782 } 783 784 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 785 unsigned RegNo, SMLoc S, 786 SMLoc E) { 787 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 788 Op->Reg.RegNo = RegNo; 789 Op->Reg.Mods = Modifiers(); 790 Op->StartLoc = S; 791 Op->EndLoc = E; 792 return Op; 793 } 794 795 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 796 const class MCExpr *Expr, SMLoc S) { 797 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 798 Op->Expr = Expr; 799 Op->StartLoc = S; 800 Op->EndLoc = S; 801 return Op; 802 } 803 }; 804 805 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 806 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 807 return OS; 808 } 809 810 //===----------------------------------------------------------------------===// 811 // AsmParser 812 //===----------------------------------------------------------------------===// 813 814 // Holds info related to the current kernel, e.g. count of SGPRs used. 815 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 816 // .amdgpu_hsa_kernel or at EOF. 817 class KernelScopeInfo { 818 int SgprIndexUnusedMin = -1; 819 int VgprIndexUnusedMin = -1; 820 MCContext *Ctx = nullptr; 821 822 void usesSgprAt(int i) { 823 if (i >= SgprIndexUnusedMin) { 824 SgprIndexUnusedMin = ++i; 825 if (Ctx) { 826 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 827 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 828 } 829 } 830 } 831 832 void usesVgprAt(int i) { 833 if (i >= VgprIndexUnusedMin) { 834 VgprIndexUnusedMin = ++i; 835 if (Ctx) { 836 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 837 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 838 } 839 } 840 } 841 842 public: 843 KernelScopeInfo() = default; 844 845 void initialize(MCContext &Context) { 846 Ctx = &Context; 847 usesSgprAt(SgprIndexUnusedMin = -1); 848 usesVgprAt(VgprIndexUnusedMin = -1); 849 } 850 851 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 852 switch (RegKind) { 853 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 854 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 855 default: break; 856 } 857 } 858 }; 859 860 class AMDGPUAsmParser : public MCTargetAsmParser { 861 MCAsmParser &Parser; 862 863 // Number of extra operands parsed after the first optional operand. 864 // This may be necessary to skip hardcoded mandatory operands. 865 static const unsigned MAX_OPR_LOOKAHEAD = 8; 866 867 unsigned ForcedEncodingSize = 0; 868 bool ForcedDPP = false; 869 bool ForcedSDWA = false; 870 KernelScopeInfo KernelScope; 871 872 /// @name Auto-generated Match Functions 873 /// { 874 875 #define GET_ASSEMBLER_HEADER 876 #include "AMDGPUGenAsmMatcher.inc" 877 878 /// } 879 880 private: 881 bool ParseAsAbsoluteExpression(uint32_t &Ret); 882 bool OutOfRangeError(SMRange Range); 883 /// Calculate VGPR/SGPR blocks required for given target, reserved 884 /// registers, and user-specified NextFreeXGPR values. 885 /// 886 /// \param Features [in] Target features, used for bug corrections. 887 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 888 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 889 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 890 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 891 /// descriptor field, if valid. 892 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 893 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 894 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 895 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 896 /// \param VGPRBlocks [out] Result VGPR block count. 897 /// \param SGPRBlocks [out] Result SGPR block count. 898 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 899 bool FlatScrUsed, bool XNACKUsed, 900 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 901 SMRange VGPRRange, unsigned NextFreeSGPR, 902 SMRange SGPRRange, unsigned &VGPRBlocks, 903 unsigned &SGPRBlocks); 904 bool ParseDirectiveAMDGCNTarget(); 905 bool ParseDirectiveAMDHSAKernel(); 906 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 907 bool ParseDirectiveHSACodeObjectVersion(); 908 bool ParseDirectiveHSACodeObjectISA(); 909 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 910 bool ParseDirectiveAMDKernelCodeT(); 911 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 912 bool ParseDirectiveAMDGPUHsaKernel(); 913 914 bool ParseDirectiveISAVersion(); 915 bool ParseDirectiveHSAMetadata(); 916 bool ParseDirectivePALMetadataBegin(); 917 bool ParseDirectivePALMetadata(); 918 bool ParseDirectiveAMDGPULDS(); 919 920 /// Common code to parse out a block of text (typically YAML) between start and 921 /// end directives. 922 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 923 const char *AssemblerDirectiveEnd, 924 std::string &CollectString); 925 926 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 927 RegisterKind RegKind, unsigned Reg1, 928 unsigned RegNum); 929 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 930 unsigned& RegNum, unsigned& RegWidth, 931 unsigned *DwordRegIndex); 932 bool isRegister(); 933 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 934 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 935 void initializeGprCountSymbol(RegisterKind RegKind); 936 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 937 unsigned RegWidth); 938 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 939 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 940 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 941 bool IsGdsHardcoded); 942 943 public: 944 enum AMDGPUMatchResultTy { 945 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 946 }; 947 enum OperandMode { 948 OperandMode_Default, 949 OperandMode_NSA, 950 }; 951 952 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 953 954 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 955 const MCInstrInfo &MII, 956 const MCTargetOptions &Options) 957 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 958 MCAsmParserExtension::Initialize(Parser); 959 960 if (getFeatureBits().none()) { 961 // Set default features. 962 copySTI().ToggleFeature("southern-islands"); 963 } 964 965 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 966 967 { 968 // TODO: make those pre-defined variables read-only. 969 // Currently there is none suitable machinery in the core llvm-mc for this. 970 // MCSymbol::isRedefinable is intended for another purpose, and 971 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 972 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 973 MCContext &Ctx = getContext(); 974 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 975 MCSymbol *Sym = 976 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 977 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 978 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 979 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 980 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 981 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 982 } else { 983 MCSymbol *Sym = 984 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 985 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 986 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 987 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 988 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 989 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 990 } 991 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 992 initializeGprCountSymbol(IS_VGPR); 993 initializeGprCountSymbol(IS_SGPR); 994 } else 995 KernelScope.initialize(getContext()); 996 } 997 } 998 999 bool hasXNACK() const { 1000 return AMDGPU::hasXNACK(getSTI()); 1001 } 1002 1003 bool hasMIMG_R128() const { 1004 return AMDGPU::hasMIMG_R128(getSTI()); 1005 } 1006 1007 bool hasPackedD16() const { 1008 return AMDGPU::hasPackedD16(getSTI()); 1009 } 1010 1011 bool isSI() const { 1012 return AMDGPU::isSI(getSTI()); 1013 } 1014 1015 bool isCI() const { 1016 return AMDGPU::isCI(getSTI()); 1017 } 1018 1019 bool isVI() const { 1020 return AMDGPU::isVI(getSTI()); 1021 } 1022 1023 bool isGFX9() const { 1024 return AMDGPU::isGFX9(getSTI()); 1025 } 1026 1027 bool isGFX10() const { 1028 return AMDGPU::isGFX10(getSTI()); 1029 } 1030 1031 bool hasInv2PiInlineImm() const { 1032 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1033 } 1034 1035 bool hasFlatOffsets() const { 1036 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1037 } 1038 1039 bool hasSGPR102_SGPR103() const { 1040 return !isVI() && !isGFX9(); 1041 } 1042 1043 bool hasSGPR104_SGPR105() const { 1044 return isGFX10(); 1045 } 1046 1047 bool hasIntClamp() const { 1048 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1049 } 1050 1051 AMDGPUTargetStreamer &getTargetStreamer() { 1052 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1053 return static_cast<AMDGPUTargetStreamer &>(TS); 1054 } 1055 1056 const MCRegisterInfo *getMRI() const { 1057 // We need this const_cast because for some reason getContext() is not const 1058 // in MCAsmParser. 1059 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1060 } 1061 1062 const MCInstrInfo *getMII() const { 1063 return &MII; 1064 } 1065 1066 const FeatureBitset &getFeatureBits() const { 1067 return getSTI().getFeatureBits(); 1068 } 1069 1070 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1071 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1072 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1073 1074 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1075 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1076 bool isForcedDPP() const { return ForcedDPP; } 1077 bool isForcedSDWA() const { return ForcedSDWA; } 1078 ArrayRef<unsigned> getMatchedVariants() const; 1079 1080 std::unique_ptr<AMDGPUOperand> parseRegister(); 1081 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1082 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1083 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1084 unsigned Kind) override; 1085 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1086 OperandVector &Operands, MCStreamer &Out, 1087 uint64_t &ErrorInfo, 1088 bool MatchingInlineAsm) override; 1089 bool ParseDirective(AsmToken DirectiveID) override; 1090 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1091 OperandMode Mode = OperandMode_Default); 1092 StringRef parseMnemonicSuffix(StringRef Name); 1093 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1094 SMLoc NameLoc, OperandVector &Operands) override; 1095 //bool ProcessInstruction(MCInst &Inst); 1096 1097 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1098 1099 OperandMatchResultTy 1100 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1101 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1102 bool (*ConvertResult)(int64_t &) = nullptr); 1103 1104 OperandMatchResultTy 1105 parseOperandArrayWithPrefix(const char *Prefix, 1106 OperandVector &Operands, 1107 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1108 bool (*ConvertResult)(int64_t&) = nullptr); 1109 1110 OperandMatchResultTy 1111 parseNamedBit(const char *Name, OperandVector &Operands, 1112 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1113 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1114 StringRef &Value); 1115 1116 bool isModifier(); 1117 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1118 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1119 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1120 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1121 bool parseSP3NegModifier(); 1122 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1123 OperandMatchResultTy parseReg(OperandVector &Operands); 1124 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1125 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1126 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1127 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1128 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1129 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1130 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1131 1132 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1133 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1134 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1135 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1136 1137 bool parseCnt(int64_t &IntVal); 1138 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1139 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1140 1141 private: 1142 struct OperandInfoTy { 1143 int64_t Id; 1144 bool IsSymbolic = false; 1145 bool IsDefined = false; 1146 1147 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1148 }; 1149 1150 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1151 bool validateSendMsg(const OperandInfoTy &Msg, 1152 const OperandInfoTy &Op, 1153 const OperandInfoTy &Stream, 1154 const SMLoc Loc); 1155 1156 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1157 void validateHwreg(const OperandInfoTy &HwReg, 1158 const int64_t Offset, 1159 const int64_t Width, 1160 const SMLoc Loc); 1161 1162 void errorExpTgt(); 1163 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1164 1165 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1166 bool validateSOPLiteral(const MCInst &Inst) const; 1167 bool validateConstantBusLimitations(const MCInst &Inst); 1168 bool validateEarlyClobberLimitations(const MCInst &Inst); 1169 bool validateIntClampSupported(const MCInst &Inst); 1170 bool validateMIMGAtomicDMask(const MCInst &Inst); 1171 bool validateMIMGGatherDMask(const MCInst &Inst); 1172 bool validateMIMGDataSize(const MCInst &Inst); 1173 bool validateMIMGAddrSize(const MCInst &Inst); 1174 bool validateMIMGD16(const MCInst &Inst); 1175 bool validateMIMGDim(const MCInst &Inst); 1176 bool validateLdsDirect(const MCInst &Inst); 1177 bool validateOpSel(const MCInst &Inst); 1178 bool validateVccOperand(unsigned Reg) const; 1179 bool validateVOP3Literal(const MCInst &Inst) const; 1180 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1181 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1182 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1183 1184 bool isId(const StringRef Id) const; 1185 bool isId(const AsmToken &Token, const StringRef Id) const; 1186 bool isToken(const AsmToken::TokenKind Kind) const; 1187 bool trySkipId(const StringRef Id); 1188 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1189 bool trySkipToken(const AsmToken::TokenKind Kind); 1190 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1191 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1192 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1193 AsmToken::TokenKind getTokenKind() const; 1194 bool parseExpr(int64_t &Imm); 1195 StringRef getTokenStr() const; 1196 AsmToken peekToken(); 1197 AsmToken getToken() const; 1198 SMLoc getLoc() const; 1199 void lex(); 1200 1201 public: 1202 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1203 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1204 1205 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1206 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1207 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1208 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1209 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1210 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1211 1212 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1213 const unsigned MinVal, 1214 const unsigned MaxVal, 1215 const StringRef ErrMsg); 1216 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1217 bool parseSwizzleOffset(int64_t &Imm); 1218 bool parseSwizzleMacro(int64_t &Imm); 1219 bool parseSwizzleQuadPerm(int64_t &Imm); 1220 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1221 bool parseSwizzleBroadcast(int64_t &Imm); 1222 bool parseSwizzleSwap(int64_t &Imm); 1223 bool parseSwizzleReverse(int64_t &Imm); 1224 1225 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1226 int64_t parseGPRIdxMacro(); 1227 1228 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1229 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1230 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1231 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1232 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1233 1234 AMDGPUOperand::Ptr defaultDLC() const; 1235 AMDGPUOperand::Ptr defaultGLC() const; 1236 AMDGPUOperand::Ptr defaultSLC() const; 1237 1238 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1239 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1240 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1241 AMDGPUOperand::Ptr defaultOffsetU12() const; 1242 AMDGPUOperand::Ptr defaultOffsetS13() const; 1243 1244 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1245 1246 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1247 OptionalImmIndexMap &OptionalIdx); 1248 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1249 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1250 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1251 1252 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1253 1254 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1255 bool IsAtomic = false); 1256 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1257 1258 OperandMatchResultTy parseDim(OperandVector &Operands); 1259 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1260 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1261 AMDGPUOperand::Ptr defaultRowMask() const; 1262 AMDGPUOperand::Ptr defaultBankMask() const; 1263 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1264 AMDGPUOperand::Ptr defaultFI() const; 1265 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1266 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1267 1268 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1269 AMDGPUOperand::ImmTy Type); 1270 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1271 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1272 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1273 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1274 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1275 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1276 uint64_t BasicInstType, bool skipVcc = false); 1277 1278 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1279 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1280 }; 1281 1282 struct OptionalOperand { 1283 const char *Name; 1284 AMDGPUOperand::ImmTy Type; 1285 bool IsBit; 1286 bool (*ConvertResult)(int64_t&); 1287 }; 1288 1289 } // end anonymous namespace 1290 1291 // May be called with integer type with equivalent bitwidth. 1292 static const fltSemantics *getFltSemantics(unsigned Size) { 1293 switch (Size) { 1294 case 4: 1295 return &APFloat::IEEEsingle(); 1296 case 8: 1297 return &APFloat::IEEEdouble(); 1298 case 2: 1299 return &APFloat::IEEEhalf(); 1300 default: 1301 llvm_unreachable("unsupported fp type"); 1302 } 1303 } 1304 1305 static const fltSemantics *getFltSemantics(MVT VT) { 1306 return getFltSemantics(VT.getSizeInBits() / 8); 1307 } 1308 1309 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1310 switch (OperandType) { 1311 case AMDGPU::OPERAND_REG_IMM_INT32: 1312 case AMDGPU::OPERAND_REG_IMM_FP32: 1313 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1314 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1315 return &APFloat::IEEEsingle(); 1316 case AMDGPU::OPERAND_REG_IMM_INT64: 1317 case AMDGPU::OPERAND_REG_IMM_FP64: 1318 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1319 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1320 return &APFloat::IEEEdouble(); 1321 case AMDGPU::OPERAND_REG_IMM_INT16: 1322 case AMDGPU::OPERAND_REG_IMM_FP16: 1323 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1324 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1325 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1326 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1327 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1328 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1329 return &APFloat::IEEEhalf(); 1330 default: 1331 llvm_unreachable("unsupported fp type"); 1332 } 1333 } 1334 1335 //===----------------------------------------------------------------------===// 1336 // Operand 1337 //===----------------------------------------------------------------------===// 1338 1339 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1340 bool Lost; 1341 1342 // Convert literal to single precision 1343 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1344 APFloat::rmNearestTiesToEven, 1345 &Lost); 1346 // We allow precision lost but not overflow or underflow 1347 if (Status != APFloat::opOK && 1348 Lost && 1349 ((Status & APFloat::opOverflow) != 0 || 1350 (Status & APFloat::opUnderflow) != 0)) { 1351 return false; 1352 } 1353 1354 return true; 1355 } 1356 1357 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1358 return isUIntN(Size, Val) || isIntN(Size, Val); 1359 } 1360 1361 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1362 1363 // This is a hack to enable named inline values like 1364 // shared_base with both 32-bit and 64-bit operands. 1365 // Note that these values are defined as 1366 // 32-bit operands only. 1367 if (isInlineValue()) { 1368 return true; 1369 } 1370 1371 if (!isImmTy(ImmTyNone)) { 1372 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1373 return false; 1374 } 1375 // TODO: We should avoid using host float here. It would be better to 1376 // check the float bit values which is what a few other places do. 1377 // We've had bot failures before due to weird NaN support on mips hosts. 1378 1379 APInt Literal(64, Imm.Val); 1380 1381 if (Imm.IsFPImm) { // We got fp literal token 1382 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1383 return AMDGPU::isInlinableLiteral64(Imm.Val, 1384 AsmParser->hasInv2PiInlineImm()); 1385 } 1386 1387 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1388 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1389 return false; 1390 1391 if (type.getScalarSizeInBits() == 16) { 1392 return AMDGPU::isInlinableLiteral16( 1393 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1394 AsmParser->hasInv2PiInlineImm()); 1395 } 1396 1397 // Check if single precision literal is inlinable 1398 return AMDGPU::isInlinableLiteral32( 1399 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1400 AsmParser->hasInv2PiInlineImm()); 1401 } 1402 1403 // We got int literal token. 1404 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1405 return AMDGPU::isInlinableLiteral64(Imm.Val, 1406 AsmParser->hasInv2PiInlineImm()); 1407 } 1408 1409 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1410 return false; 1411 } 1412 1413 if (type.getScalarSizeInBits() == 16) { 1414 return AMDGPU::isInlinableLiteral16( 1415 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1416 AsmParser->hasInv2PiInlineImm()); 1417 } 1418 1419 return AMDGPU::isInlinableLiteral32( 1420 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1421 AsmParser->hasInv2PiInlineImm()); 1422 } 1423 1424 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1425 // Check that this immediate can be added as literal 1426 if (!isImmTy(ImmTyNone)) { 1427 return false; 1428 } 1429 1430 if (!Imm.IsFPImm) { 1431 // We got int literal token. 1432 1433 if (type == MVT::f64 && hasFPModifiers()) { 1434 // Cannot apply fp modifiers to int literals preserving the same semantics 1435 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1436 // disable these cases. 1437 return false; 1438 } 1439 1440 unsigned Size = type.getSizeInBits(); 1441 if (Size == 64) 1442 Size = 32; 1443 1444 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1445 // types. 1446 return isSafeTruncation(Imm.Val, Size); 1447 } 1448 1449 // We got fp literal token 1450 if (type == MVT::f64) { // Expected 64-bit fp operand 1451 // We would set low 64-bits of literal to zeroes but we accept this literals 1452 return true; 1453 } 1454 1455 if (type == MVT::i64) { // Expected 64-bit int operand 1456 // We don't allow fp literals in 64-bit integer instructions. It is 1457 // unclear how we should encode them. 1458 return false; 1459 } 1460 1461 // We allow fp literals with f16x2 operands assuming that the specified 1462 // literal goes into the lower half and the upper half is zero. We also 1463 // require that the literal may be losslesly converted to f16. 1464 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1465 (type == MVT::v2i16)? MVT::i16 : type; 1466 1467 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1468 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1469 } 1470 1471 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1472 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1473 } 1474 1475 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1476 if (AsmParser->isVI()) 1477 return isVReg32(); 1478 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1479 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1480 else 1481 return false; 1482 } 1483 1484 bool AMDGPUOperand::isSDWAFP16Operand() const { 1485 return isSDWAOperand(MVT::f16); 1486 } 1487 1488 bool AMDGPUOperand::isSDWAFP32Operand() const { 1489 return isSDWAOperand(MVT::f32); 1490 } 1491 1492 bool AMDGPUOperand::isSDWAInt16Operand() const { 1493 return isSDWAOperand(MVT::i16); 1494 } 1495 1496 bool AMDGPUOperand::isSDWAInt32Operand() const { 1497 return isSDWAOperand(MVT::i32); 1498 } 1499 1500 bool AMDGPUOperand::isBoolReg() const { 1501 return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? 1502 isSCSrcB64() : isSCSrcB32(); 1503 } 1504 1505 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1506 { 1507 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1508 assert(Size == 2 || Size == 4 || Size == 8); 1509 1510 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1511 1512 if (Imm.Mods.Abs) { 1513 Val &= ~FpSignMask; 1514 } 1515 if (Imm.Mods.Neg) { 1516 Val ^= FpSignMask; 1517 } 1518 1519 return Val; 1520 } 1521 1522 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1523 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1524 Inst.getNumOperands())) { 1525 addLiteralImmOperand(Inst, Imm.Val, 1526 ApplyModifiers & 1527 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1528 } else { 1529 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1530 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1531 } 1532 } 1533 1534 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1535 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1536 auto OpNum = Inst.getNumOperands(); 1537 // Check that this operand accepts literals 1538 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1539 1540 if (ApplyModifiers) { 1541 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1542 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1543 Val = applyInputFPModifiers(Val, Size); 1544 } 1545 1546 APInt Literal(64, Val); 1547 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1548 1549 if (Imm.IsFPImm) { // We got fp literal token 1550 switch (OpTy) { 1551 case AMDGPU::OPERAND_REG_IMM_INT64: 1552 case AMDGPU::OPERAND_REG_IMM_FP64: 1553 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1554 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1555 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1556 AsmParser->hasInv2PiInlineImm())) { 1557 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1558 return; 1559 } 1560 1561 // Non-inlineable 1562 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1563 // For fp operands we check if low 32 bits are zeros 1564 if (Literal.getLoBits(32) != 0) { 1565 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1566 "Can't encode literal as exact 64-bit floating-point operand. " 1567 "Low 32-bits will be set to zero"); 1568 } 1569 1570 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1571 return; 1572 } 1573 1574 // We don't allow fp literals in 64-bit integer instructions. It is 1575 // unclear how we should encode them. This case should be checked earlier 1576 // in predicate methods (isLiteralImm()) 1577 llvm_unreachable("fp literal in 64-bit integer instruction."); 1578 1579 case AMDGPU::OPERAND_REG_IMM_INT32: 1580 case AMDGPU::OPERAND_REG_IMM_FP32: 1581 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1582 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1583 case AMDGPU::OPERAND_REG_IMM_INT16: 1584 case AMDGPU::OPERAND_REG_IMM_FP16: 1585 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1586 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1587 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1588 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1589 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1590 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1591 bool lost; 1592 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1593 // Convert literal to single precision 1594 FPLiteral.convert(*getOpFltSemantics(OpTy), 1595 APFloat::rmNearestTiesToEven, &lost); 1596 // We allow precision lost but not overflow or underflow. This should be 1597 // checked earlier in isLiteralImm() 1598 1599 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1600 Inst.addOperand(MCOperand::createImm(ImmVal)); 1601 return; 1602 } 1603 default: 1604 llvm_unreachable("invalid operand size"); 1605 } 1606 1607 return; 1608 } 1609 1610 // We got int literal token. 1611 // Only sign extend inline immediates. 1612 switch (OpTy) { 1613 case AMDGPU::OPERAND_REG_IMM_INT32: 1614 case AMDGPU::OPERAND_REG_IMM_FP32: 1615 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1616 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1617 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1618 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1619 if (isSafeTruncation(Val, 32) && 1620 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1621 AsmParser->hasInv2PiInlineImm())) { 1622 Inst.addOperand(MCOperand::createImm(Val)); 1623 return; 1624 } 1625 1626 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1627 return; 1628 1629 case AMDGPU::OPERAND_REG_IMM_INT64: 1630 case AMDGPU::OPERAND_REG_IMM_FP64: 1631 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1632 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1633 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1634 Inst.addOperand(MCOperand::createImm(Val)); 1635 return; 1636 } 1637 1638 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1639 return; 1640 1641 case AMDGPU::OPERAND_REG_IMM_INT16: 1642 case AMDGPU::OPERAND_REG_IMM_FP16: 1643 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1644 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1645 if (isSafeTruncation(Val, 16) && 1646 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1647 AsmParser->hasInv2PiInlineImm())) { 1648 Inst.addOperand(MCOperand::createImm(Val)); 1649 return; 1650 } 1651 1652 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1653 return; 1654 1655 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1656 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1657 assert(isSafeTruncation(Val, 16)); 1658 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1659 AsmParser->hasInv2PiInlineImm())); 1660 1661 Inst.addOperand(MCOperand::createImm(Val)); 1662 return; 1663 } 1664 default: 1665 llvm_unreachable("invalid operand size"); 1666 } 1667 } 1668 1669 template <unsigned Bitwidth> 1670 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1671 APInt Literal(64, Imm.Val); 1672 1673 if (!Imm.IsFPImm) { 1674 // We got int literal token. 1675 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1676 return; 1677 } 1678 1679 bool Lost; 1680 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1681 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1682 APFloat::rmNearestTiesToEven, &Lost); 1683 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1684 } 1685 1686 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1687 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1688 } 1689 1690 static bool isInlineValue(unsigned Reg) { 1691 switch (Reg) { 1692 case AMDGPU::SRC_SHARED_BASE: 1693 case AMDGPU::SRC_SHARED_LIMIT: 1694 case AMDGPU::SRC_PRIVATE_BASE: 1695 case AMDGPU::SRC_PRIVATE_LIMIT: 1696 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1697 return true; 1698 case AMDGPU::SRC_VCCZ: 1699 case AMDGPU::SRC_EXECZ: 1700 case AMDGPU::SRC_SCC: 1701 return true; 1702 default: 1703 return false; 1704 } 1705 } 1706 1707 bool AMDGPUOperand::isInlineValue() const { 1708 return isRegKind() && ::isInlineValue(getReg()); 1709 } 1710 1711 //===----------------------------------------------------------------------===// 1712 // AsmParser 1713 //===----------------------------------------------------------------------===// 1714 1715 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1716 if (Is == IS_VGPR) { 1717 switch (RegWidth) { 1718 default: return -1; 1719 case 1: return AMDGPU::VGPR_32RegClassID; 1720 case 2: return AMDGPU::VReg_64RegClassID; 1721 case 3: return AMDGPU::VReg_96RegClassID; 1722 case 4: return AMDGPU::VReg_128RegClassID; 1723 case 8: return AMDGPU::VReg_256RegClassID; 1724 case 16: return AMDGPU::VReg_512RegClassID; 1725 } 1726 } else if (Is == IS_TTMP) { 1727 switch (RegWidth) { 1728 default: return -1; 1729 case 1: return AMDGPU::TTMP_32RegClassID; 1730 case 2: return AMDGPU::TTMP_64RegClassID; 1731 case 4: return AMDGPU::TTMP_128RegClassID; 1732 case 8: return AMDGPU::TTMP_256RegClassID; 1733 case 16: return AMDGPU::TTMP_512RegClassID; 1734 } 1735 } else if (Is == IS_SGPR) { 1736 switch (RegWidth) { 1737 default: return -1; 1738 case 1: return AMDGPU::SGPR_32RegClassID; 1739 case 2: return AMDGPU::SGPR_64RegClassID; 1740 case 4: return AMDGPU::SGPR_128RegClassID; 1741 case 8: return AMDGPU::SGPR_256RegClassID; 1742 case 16: return AMDGPU::SGPR_512RegClassID; 1743 } 1744 } 1745 return -1; 1746 } 1747 1748 static unsigned getSpecialRegForName(StringRef RegName) { 1749 return StringSwitch<unsigned>(RegName) 1750 .Case("exec", AMDGPU::EXEC) 1751 .Case("vcc", AMDGPU::VCC) 1752 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1753 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1754 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1755 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1756 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1757 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1758 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1759 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1760 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1761 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1762 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1763 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1764 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1765 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1766 .Case("m0", AMDGPU::M0) 1767 .Case("vccz", AMDGPU::SRC_VCCZ) 1768 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1769 .Case("execz", AMDGPU::SRC_EXECZ) 1770 .Case("src_execz", AMDGPU::SRC_EXECZ) 1771 .Case("scc", AMDGPU::SRC_SCC) 1772 .Case("src_scc", AMDGPU::SRC_SCC) 1773 .Case("tba", AMDGPU::TBA) 1774 .Case("tma", AMDGPU::TMA) 1775 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1776 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1777 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1778 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1779 .Case("vcc_lo", AMDGPU::VCC_LO) 1780 .Case("vcc_hi", AMDGPU::VCC_HI) 1781 .Case("exec_lo", AMDGPU::EXEC_LO) 1782 .Case("exec_hi", AMDGPU::EXEC_HI) 1783 .Case("tma_lo", AMDGPU::TMA_LO) 1784 .Case("tma_hi", AMDGPU::TMA_HI) 1785 .Case("tba_lo", AMDGPU::TBA_LO) 1786 .Case("tba_hi", AMDGPU::TBA_HI) 1787 .Case("null", AMDGPU::SGPR_NULL) 1788 .Default(0); 1789 } 1790 1791 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1792 SMLoc &EndLoc) { 1793 auto R = parseRegister(); 1794 if (!R) return true; 1795 assert(R->isReg()); 1796 RegNo = R->getReg(); 1797 StartLoc = R->getStartLoc(); 1798 EndLoc = R->getEndLoc(); 1799 return false; 1800 } 1801 1802 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1803 RegisterKind RegKind, unsigned Reg1, 1804 unsigned RegNum) { 1805 switch (RegKind) { 1806 case IS_SPECIAL: 1807 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1808 Reg = AMDGPU::EXEC; 1809 RegWidth = 2; 1810 return true; 1811 } 1812 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1813 Reg = AMDGPU::FLAT_SCR; 1814 RegWidth = 2; 1815 return true; 1816 } 1817 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1818 Reg = AMDGPU::XNACK_MASK; 1819 RegWidth = 2; 1820 return true; 1821 } 1822 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1823 Reg = AMDGPU::VCC; 1824 RegWidth = 2; 1825 return true; 1826 } 1827 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1828 Reg = AMDGPU::TBA; 1829 RegWidth = 2; 1830 return true; 1831 } 1832 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1833 Reg = AMDGPU::TMA; 1834 RegWidth = 2; 1835 return true; 1836 } 1837 return false; 1838 case IS_VGPR: 1839 case IS_SGPR: 1840 case IS_TTMP: 1841 if (Reg1 != Reg + RegWidth) { 1842 return false; 1843 } 1844 RegWidth++; 1845 return true; 1846 default: 1847 llvm_unreachable("unexpected register kind"); 1848 } 1849 } 1850 1851 static const StringRef Registers[] = { 1852 { "v" }, 1853 { "s" }, 1854 { "ttmp" }, 1855 }; 1856 1857 bool 1858 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1859 const AsmToken &NextToken) const { 1860 1861 // A list of consecutive registers: [s0,s1,s2,s3] 1862 if (Token.is(AsmToken::LBrac)) 1863 return true; 1864 1865 if (!Token.is(AsmToken::Identifier)) 1866 return false; 1867 1868 // A single register like s0 or a range of registers like s[0:1] 1869 1870 StringRef RegName = Token.getString(); 1871 1872 for (StringRef Reg : Registers) { 1873 if (RegName.startswith(Reg)) { 1874 if (Reg.size() < RegName.size()) { 1875 unsigned RegNum; 1876 // A single register with an index: rXX 1877 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1878 return true; 1879 } else { 1880 // A range of registers: r[XX:YY]. 1881 if (NextToken.is(AsmToken::LBrac)) 1882 return true; 1883 } 1884 } 1885 } 1886 1887 return getSpecialRegForName(RegName); 1888 } 1889 1890 bool 1891 AMDGPUAsmParser::isRegister() 1892 { 1893 return isRegister(getToken(), peekToken()); 1894 } 1895 1896 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1897 unsigned &RegNum, unsigned &RegWidth, 1898 unsigned *DwordRegIndex) { 1899 if (DwordRegIndex) { *DwordRegIndex = 0; } 1900 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1901 if (getLexer().is(AsmToken::Identifier)) { 1902 StringRef RegName = Parser.getTok().getString(); 1903 if ((Reg = getSpecialRegForName(RegName))) { 1904 Parser.Lex(); 1905 RegKind = IS_SPECIAL; 1906 } else { 1907 unsigned RegNumIndex = 0; 1908 if (RegName[0] == 'v') { 1909 RegNumIndex = 1; 1910 RegKind = IS_VGPR; 1911 } else if (RegName[0] == 's') { 1912 RegNumIndex = 1; 1913 RegKind = IS_SGPR; 1914 } else if (RegName.startswith("ttmp")) { 1915 RegNumIndex = strlen("ttmp"); 1916 RegKind = IS_TTMP; 1917 } else { 1918 return false; 1919 } 1920 if (RegName.size() > RegNumIndex) { 1921 // Single 32-bit register: vXX. 1922 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1923 return false; 1924 Parser.Lex(); 1925 RegWidth = 1; 1926 } else { 1927 // Range of registers: v[XX:YY]. ":YY" is optional. 1928 Parser.Lex(); 1929 int64_t RegLo, RegHi; 1930 if (getLexer().isNot(AsmToken::LBrac)) 1931 return false; 1932 Parser.Lex(); 1933 1934 if (getParser().parseAbsoluteExpression(RegLo)) 1935 return false; 1936 1937 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1938 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1939 return false; 1940 Parser.Lex(); 1941 1942 if (isRBrace) { 1943 RegHi = RegLo; 1944 } else { 1945 if (getParser().parseAbsoluteExpression(RegHi)) 1946 return false; 1947 1948 if (getLexer().isNot(AsmToken::RBrac)) 1949 return false; 1950 Parser.Lex(); 1951 } 1952 RegNum = (unsigned) RegLo; 1953 RegWidth = (RegHi - RegLo) + 1; 1954 } 1955 } 1956 } else if (getLexer().is(AsmToken::LBrac)) { 1957 // List of consecutive registers: [s0,s1,s2,s3] 1958 Parser.Lex(); 1959 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1960 return false; 1961 if (RegWidth != 1) 1962 return false; 1963 RegisterKind RegKind1; 1964 unsigned Reg1, RegNum1, RegWidth1; 1965 do { 1966 if (getLexer().is(AsmToken::Comma)) { 1967 Parser.Lex(); 1968 } else if (getLexer().is(AsmToken::RBrac)) { 1969 Parser.Lex(); 1970 break; 1971 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1972 if (RegWidth1 != 1) { 1973 return false; 1974 } 1975 if (RegKind1 != RegKind) { 1976 return false; 1977 } 1978 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1979 return false; 1980 } 1981 } else { 1982 return false; 1983 } 1984 } while (true); 1985 } else { 1986 return false; 1987 } 1988 switch (RegKind) { 1989 case IS_SPECIAL: 1990 RegNum = 0; 1991 RegWidth = 1; 1992 break; 1993 case IS_VGPR: 1994 case IS_SGPR: 1995 case IS_TTMP: 1996 { 1997 unsigned Size = 1; 1998 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1999 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2000 Size = std::min(RegWidth, 4u); 2001 } 2002 if (RegNum % Size != 0) 2003 return false; 2004 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2005 RegNum = RegNum / Size; 2006 int RCID = getRegClass(RegKind, RegWidth); 2007 if (RCID == -1) 2008 return false; 2009 const MCRegisterClass RC = TRI->getRegClass(RCID); 2010 if (RegNum >= RC.getNumRegs()) 2011 return false; 2012 Reg = RC.getRegister(RegNum); 2013 break; 2014 } 2015 2016 default: 2017 llvm_unreachable("unexpected register kind"); 2018 } 2019 2020 if (!subtargetHasRegister(*TRI, Reg)) 2021 return false; 2022 return true; 2023 } 2024 2025 Optional<StringRef> 2026 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2027 switch (RegKind) { 2028 case IS_VGPR: 2029 return StringRef(".amdgcn.next_free_vgpr"); 2030 case IS_SGPR: 2031 return StringRef(".amdgcn.next_free_sgpr"); 2032 default: 2033 return None; 2034 } 2035 } 2036 2037 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2038 auto SymbolName = getGprCountSymbolName(RegKind); 2039 assert(SymbolName && "initializing invalid register kind"); 2040 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2041 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2042 } 2043 2044 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2045 unsigned DwordRegIndex, 2046 unsigned RegWidth) { 2047 // Symbols are only defined for GCN targets 2048 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2049 return true; 2050 2051 auto SymbolName = getGprCountSymbolName(RegKind); 2052 if (!SymbolName) 2053 return true; 2054 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2055 2056 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2057 int64_t OldCount; 2058 2059 if (!Sym->isVariable()) 2060 return !Error(getParser().getTok().getLoc(), 2061 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2062 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2063 return !Error( 2064 getParser().getTok().getLoc(), 2065 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2066 2067 if (OldCount <= NewMax) 2068 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2069 2070 return true; 2071 } 2072 2073 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2074 const auto &Tok = Parser.getTok(); 2075 SMLoc StartLoc = Tok.getLoc(); 2076 SMLoc EndLoc = Tok.getEndLoc(); 2077 RegisterKind RegKind; 2078 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2079 2080 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2081 //FIXME: improve error messages (bug 41303). 2082 Error(StartLoc, "not a valid operand."); 2083 return nullptr; 2084 } 2085 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2086 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2087 return nullptr; 2088 } else 2089 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2090 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2091 } 2092 2093 OperandMatchResultTy 2094 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2095 // TODO: add syntactic sugar for 1/(2*PI) 2096 2097 assert(!isRegister()); 2098 assert(!isModifier()); 2099 2100 const auto& Tok = getToken(); 2101 const auto& NextTok = peekToken(); 2102 bool IsReal = Tok.is(AsmToken::Real); 2103 SMLoc S = getLoc(); 2104 bool Negate = false; 2105 2106 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2107 lex(); 2108 IsReal = true; 2109 Negate = true; 2110 } 2111 2112 if (IsReal) { 2113 // Floating-point expressions are not supported. 2114 // Can only allow floating-point literals with an 2115 // optional sign. 2116 2117 StringRef Num = getTokenStr(); 2118 lex(); 2119 2120 APFloat RealVal(APFloat::IEEEdouble()); 2121 auto roundMode = APFloat::rmNearestTiesToEven; 2122 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2123 return MatchOperand_ParseFail; 2124 } 2125 if (Negate) 2126 RealVal.changeSign(); 2127 2128 Operands.push_back( 2129 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2130 AMDGPUOperand::ImmTyNone, true)); 2131 2132 return MatchOperand_Success; 2133 2134 } else { 2135 int64_t IntVal; 2136 const MCExpr *Expr; 2137 SMLoc S = getLoc(); 2138 2139 if (HasSP3AbsModifier) { 2140 // This is a workaround for handling expressions 2141 // as arguments of SP3 'abs' modifier, for example: 2142 // |1.0| 2143 // |-1| 2144 // |1+x| 2145 // This syntax is not compatible with syntax of standard 2146 // MC expressions (due to the trailing '|'). 2147 SMLoc EndLoc; 2148 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2149 return MatchOperand_ParseFail; 2150 } else { 2151 if (Parser.parseExpression(Expr)) 2152 return MatchOperand_ParseFail; 2153 } 2154 2155 if (Expr->evaluateAsAbsolute(IntVal)) { 2156 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2157 } else { 2158 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2159 } 2160 2161 return MatchOperand_Success; 2162 } 2163 2164 return MatchOperand_NoMatch; 2165 } 2166 2167 OperandMatchResultTy 2168 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2169 if (!isRegister()) 2170 return MatchOperand_NoMatch; 2171 2172 if (auto R = parseRegister()) { 2173 assert(R->isReg()); 2174 Operands.push_back(std::move(R)); 2175 return MatchOperand_Success; 2176 } 2177 return MatchOperand_ParseFail; 2178 } 2179 2180 OperandMatchResultTy 2181 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2182 auto res = parseReg(Operands); 2183 if (res != MatchOperand_NoMatch) { 2184 return res; 2185 } else if (isModifier()) { 2186 return MatchOperand_NoMatch; 2187 } else { 2188 return parseImm(Operands, HasSP3AbsMod); 2189 } 2190 } 2191 2192 bool 2193 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2194 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2195 const auto &str = Token.getString(); 2196 return str == "abs" || str == "neg" || str == "sext"; 2197 } 2198 return false; 2199 } 2200 2201 bool 2202 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2203 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2204 } 2205 2206 bool 2207 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2208 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2209 } 2210 2211 bool 2212 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2213 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2214 } 2215 2216 // Check if this is an operand modifier or an opcode modifier 2217 // which may look like an expression but it is not. We should 2218 // avoid parsing these modifiers as expressions. Currently 2219 // recognized sequences are: 2220 // |...| 2221 // abs(...) 2222 // neg(...) 2223 // sext(...) 2224 // -reg 2225 // -|...| 2226 // -abs(...) 2227 // name:... 2228 // Note that simple opcode modifiers like 'gds' may be parsed as 2229 // expressions; this is a special case. See getExpressionAsToken. 2230 // 2231 bool 2232 AMDGPUAsmParser::isModifier() { 2233 2234 AsmToken Tok = getToken(); 2235 AsmToken NextToken[2]; 2236 peekTokens(NextToken); 2237 2238 return isOperandModifier(Tok, NextToken[0]) || 2239 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2240 isOpcodeModifierWithVal(Tok, NextToken[0]); 2241 } 2242 2243 // Check if the current token is an SP3 'neg' modifier. 2244 // Currently this modifier is allowed in the following context: 2245 // 2246 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2247 // 2. Before an 'abs' modifier: -abs(...) 2248 // 3. Before an SP3 'abs' modifier: -|...| 2249 // 2250 // In all other cases "-" is handled as a part 2251 // of an expression that follows the sign. 2252 // 2253 // Note: When "-" is followed by an integer literal, 2254 // this is interpreted as integer negation rather 2255 // than a floating-point NEG modifier applied to N. 2256 // Beside being contr-intuitive, such use of floating-point 2257 // NEG modifier would have resulted in different meaning 2258 // of integer literals used with VOP1/2/C and VOP3, 2259 // for example: 2260 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2261 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2262 // Negative fp literals with preceding "-" are 2263 // handled likewise for unifomtity 2264 // 2265 bool 2266 AMDGPUAsmParser::parseSP3NegModifier() { 2267 2268 AsmToken NextToken[2]; 2269 peekTokens(NextToken); 2270 2271 if (isToken(AsmToken::Minus) && 2272 (isRegister(NextToken[0], NextToken[1]) || 2273 NextToken[0].is(AsmToken::Pipe) || 2274 isId(NextToken[0], "abs"))) { 2275 lex(); 2276 return true; 2277 } 2278 2279 return false; 2280 } 2281 2282 OperandMatchResultTy 2283 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2284 bool AllowImm) { 2285 bool Neg, SP3Neg; 2286 bool Abs, SP3Abs; 2287 SMLoc Loc; 2288 2289 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2290 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2291 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2292 return MatchOperand_ParseFail; 2293 } 2294 2295 SP3Neg = parseSP3NegModifier(); 2296 2297 Loc = getLoc(); 2298 Neg = trySkipId("neg"); 2299 if (Neg && SP3Neg) { 2300 Error(Loc, "expected register or immediate"); 2301 return MatchOperand_ParseFail; 2302 } 2303 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2304 return MatchOperand_ParseFail; 2305 2306 Abs = trySkipId("abs"); 2307 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2308 return MatchOperand_ParseFail; 2309 2310 Loc = getLoc(); 2311 SP3Abs = trySkipToken(AsmToken::Pipe); 2312 if (Abs && SP3Abs) { 2313 Error(Loc, "expected register or immediate"); 2314 return MatchOperand_ParseFail; 2315 } 2316 2317 OperandMatchResultTy Res; 2318 if (AllowImm) { 2319 Res = parseRegOrImm(Operands, SP3Abs); 2320 } else { 2321 Res = parseReg(Operands); 2322 } 2323 if (Res != MatchOperand_Success) { 2324 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2325 } 2326 2327 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2328 return MatchOperand_ParseFail; 2329 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2330 return MatchOperand_ParseFail; 2331 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2332 return MatchOperand_ParseFail; 2333 2334 AMDGPUOperand::Modifiers Mods; 2335 Mods.Abs = Abs || SP3Abs; 2336 Mods.Neg = Neg || SP3Neg; 2337 2338 if (Mods.hasFPModifiers()) { 2339 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2340 if (Op.isExpr()) { 2341 Error(Op.getStartLoc(), "expected an absolute expression"); 2342 return MatchOperand_ParseFail; 2343 } 2344 Op.setModifiers(Mods); 2345 } 2346 return MatchOperand_Success; 2347 } 2348 2349 OperandMatchResultTy 2350 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2351 bool AllowImm) { 2352 bool Sext = trySkipId("sext"); 2353 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2354 return MatchOperand_ParseFail; 2355 2356 OperandMatchResultTy Res; 2357 if (AllowImm) { 2358 Res = parseRegOrImm(Operands); 2359 } else { 2360 Res = parseReg(Operands); 2361 } 2362 if (Res != MatchOperand_Success) { 2363 return Sext? MatchOperand_ParseFail : Res; 2364 } 2365 2366 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2367 return MatchOperand_ParseFail; 2368 2369 AMDGPUOperand::Modifiers Mods; 2370 Mods.Sext = Sext; 2371 2372 if (Mods.hasIntModifiers()) { 2373 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2374 if (Op.isExpr()) { 2375 Error(Op.getStartLoc(), "expected an absolute expression"); 2376 return MatchOperand_ParseFail; 2377 } 2378 Op.setModifiers(Mods); 2379 } 2380 2381 return MatchOperand_Success; 2382 } 2383 2384 OperandMatchResultTy 2385 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2386 return parseRegOrImmWithFPInputMods(Operands, false); 2387 } 2388 2389 OperandMatchResultTy 2390 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2391 return parseRegOrImmWithIntInputMods(Operands, false); 2392 } 2393 2394 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2395 auto Loc = getLoc(); 2396 if (trySkipId("off")) { 2397 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2398 AMDGPUOperand::ImmTyOff, false)); 2399 return MatchOperand_Success; 2400 } 2401 2402 if (!isRegister()) 2403 return MatchOperand_NoMatch; 2404 2405 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2406 if (Reg) { 2407 Operands.push_back(std::move(Reg)); 2408 return MatchOperand_Success; 2409 } 2410 2411 return MatchOperand_ParseFail; 2412 2413 } 2414 2415 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2416 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2417 2418 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2419 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2420 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2421 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2422 return Match_InvalidOperand; 2423 2424 if ((TSFlags & SIInstrFlags::VOP3) && 2425 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2426 getForcedEncodingSize() != 64) 2427 return Match_PreferE32; 2428 2429 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2430 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2431 // v_mac_f32/16 allow only dst_sel == DWORD; 2432 auto OpNum = 2433 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2434 const auto &Op = Inst.getOperand(OpNum); 2435 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2436 return Match_InvalidOperand; 2437 } 2438 } 2439 2440 if (TSFlags & SIInstrFlags::FLAT) { 2441 // FIXME: Produces error without correct column reported. 2442 auto Opcode = Inst.getOpcode(); 2443 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 2444 2445 const auto &Op = Inst.getOperand(OpNum); 2446 if (!hasFlatOffsets() && Op.getImm() != 0) 2447 return Match_InvalidOperand; 2448 2449 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for 2450 // FLAT segment. For FLAT segment MSB is ignored and forced to zero. 2451 if (isGFX10()) { 2452 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 2453 if (!isInt<12>(Op.getImm())) 2454 return Match_InvalidOperand; 2455 } else { 2456 if (!isUInt<11>(Op.getImm())) 2457 return Match_InvalidOperand; 2458 } 2459 } 2460 } 2461 2462 return Match_Success; 2463 } 2464 2465 // What asm variants we should check 2466 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2467 if (getForcedEncodingSize() == 32) { 2468 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2469 return makeArrayRef(Variants); 2470 } 2471 2472 if (isForcedVOP3()) { 2473 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2474 return makeArrayRef(Variants); 2475 } 2476 2477 if (isForcedSDWA()) { 2478 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2479 AMDGPUAsmVariants::SDWA9}; 2480 return makeArrayRef(Variants); 2481 } 2482 2483 if (isForcedDPP()) { 2484 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2485 return makeArrayRef(Variants); 2486 } 2487 2488 static const unsigned Variants[] = { 2489 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2490 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2491 }; 2492 2493 return makeArrayRef(Variants); 2494 } 2495 2496 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2497 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2498 const unsigned Num = Desc.getNumImplicitUses(); 2499 for (unsigned i = 0; i < Num; ++i) { 2500 unsigned Reg = Desc.ImplicitUses[i]; 2501 switch (Reg) { 2502 case AMDGPU::FLAT_SCR: 2503 case AMDGPU::VCC: 2504 case AMDGPU::VCC_LO: 2505 case AMDGPU::VCC_HI: 2506 case AMDGPU::M0: 2507 case AMDGPU::SGPR_NULL: 2508 return Reg; 2509 default: 2510 break; 2511 } 2512 } 2513 return AMDGPU::NoRegister; 2514 } 2515 2516 // NB: This code is correct only when used to check constant 2517 // bus limitations because GFX7 support no f16 inline constants. 2518 // Note that there are no cases when a GFX7 opcode violates 2519 // constant bus limitations due to the use of an f16 constant. 2520 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2521 unsigned OpIdx) const { 2522 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2523 2524 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2525 return false; 2526 } 2527 2528 const MCOperand &MO = Inst.getOperand(OpIdx); 2529 2530 int64_t Val = MO.getImm(); 2531 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2532 2533 switch (OpSize) { // expected operand size 2534 case 8: 2535 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2536 case 4: 2537 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2538 case 2: { 2539 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2540 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2541 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2542 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2543 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2544 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2545 } else { 2546 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2547 } 2548 } 2549 default: 2550 llvm_unreachable("invalid operand size"); 2551 } 2552 } 2553 2554 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2555 const MCOperand &MO = Inst.getOperand(OpIdx); 2556 if (MO.isImm()) { 2557 return !isInlineConstant(Inst, OpIdx); 2558 } 2559 return !MO.isReg() || 2560 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2561 } 2562 2563 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2564 const unsigned Opcode = Inst.getOpcode(); 2565 const MCInstrDesc &Desc = MII.get(Opcode); 2566 unsigned ConstantBusUseCount = 0; 2567 unsigned NumLiterals = 0; 2568 unsigned LiteralSize; 2569 2570 if (Desc.TSFlags & 2571 (SIInstrFlags::VOPC | 2572 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2573 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2574 SIInstrFlags::SDWA)) { 2575 // Check special imm operands (used by madmk, etc) 2576 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2577 ++ConstantBusUseCount; 2578 } 2579 2580 SmallDenseSet<unsigned> SGPRsUsed; 2581 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2582 if (SGPRUsed != AMDGPU::NoRegister) { 2583 SGPRsUsed.insert(SGPRUsed); 2584 ++ConstantBusUseCount; 2585 } 2586 2587 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2588 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2589 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2590 2591 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2592 2593 for (int OpIdx : OpIndices) { 2594 if (OpIdx == -1) break; 2595 2596 const MCOperand &MO = Inst.getOperand(OpIdx); 2597 if (usesConstantBus(Inst, OpIdx)) { 2598 if (MO.isReg()) { 2599 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2600 // Pairs of registers with a partial intersections like these 2601 // s0, s[0:1] 2602 // flat_scratch_lo, flat_scratch 2603 // flat_scratch_lo, flat_scratch_hi 2604 // are theoretically valid but they are disabled anyway. 2605 // Note that this code mimics SIInstrInfo::verifyInstruction 2606 if (!SGPRsUsed.count(Reg)) { 2607 SGPRsUsed.insert(Reg); 2608 ++ConstantBusUseCount; 2609 } 2610 SGPRUsed = Reg; 2611 } else { // Expression or a literal 2612 2613 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2614 continue; // special operand like VINTERP attr_chan 2615 2616 // An instruction may use only one literal. 2617 // This has been validated on the previous step. 2618 // See validateVOP3Literal. 2619 // This literal may be used as more than one operand. 2620 // If all these operands are of the same size, 2621 // this literal counts as one scalar value. 2622 // Otherwise it counts as 2 scalar values. 2623 // See "GFX10 Shader Programming", section 3.6.2.3. 2624 2625 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2626 if (Size < 4) Size = 4; 2627 2628 if (NumLiterals == 0) { 2629 NumLiterals = 1; 2630 LiteralSize = Size; 2631 } else if (LiteralSize != Size) { 2632 NumLiterals = 2; 2633 } 2634 } 2635 } 2636 } 2637 } 2638 ConstantBusUseCount += NumLiterals; 2639 2640 if (isGFX10()) 2641 return ConstantBusUseCount <= 2; 2642 2643 return ConstantBusUseCount <= 1; 2644 } 2645 2646 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2647 const unsigned Opcode = Inst.getOpcode(); 2648 const MCInstrDesc &Desc = MII.get(Opcode); 2649 2650 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2651 if (DstIdx == -1 || 2652 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2653 return true; 2654 } 2655 2656 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2657 2658 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2659 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2660 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2661 2662 assert(DstIdx != -1); 2663 const MCOperand &Dst = Inst.getOperand(DstIdx); 2664 assert(Dst.isReg()); 2665 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2666 2667 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2668 2669 for (int SrcIdx : SrcIndices) { 2670 if (SrcIdx == -1) break; 2671 const MCOperand &Src = Inst.getOperand(SrcIdx); 2672 if (Src.isReg()) { 2673 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2674 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2675 return false; 2676 } 2677 } 2678 } 2679 2680 return true; 2681 } 2682 2683 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2684 2685 const unsigned Opc = Inst.getOpcode(); 2686 const MCInstrDesc &Desc = MII.get(Opc); 2687 2688 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2689 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2690 assert(ClampIdx != -1); 2691 return Inst.getOperand(ClampIdx).getImm() == 0; 2692 } 2693 2694 return true; 2695 } 2696 2697 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2698 2699 const unsigned Opc = Inst.getOpcode(); 2700 const MCInstrDesc &Desc = MII.get(Opc); 2701 2702 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2703 return true; 2704 2705 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2706 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2707 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2708 2709 assert(VDataIdx != -1); 2710 assert(DMaskIdx != -1); 2711 assert(TFEIdx != -1); 2712 2713 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2714 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2715 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2716 if (DMask == 0) 2717 DMask = 1; 2718 2719 unsigned DataSize = 2720 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2721 if (hasPackedD16()) { 2722 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2723 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2724 DataSize = (DataSize + 1) / 2; 2725 } 2726 2727 return (VDataSize / 4) == DataSize + TFESize; 2728 } 2729 2730 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2731 const unsigned Opc = Inst.getOpcode(); 2732 const MCInstrDesc &Desc = MII.get(Opc); 2733 2734 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2735 return true; 2736 2737 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2738 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2739 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2740 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2741 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2742 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2743 2744 assert(VAddr0Idx != -1); 2745 assert(SrsrcIdx != -1); 2746 assert(DimIdx != -1); 2747 assert(SrsrcIdx > VAddr0Idx); 2748 2749 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2750 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2751 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2752 unsigned VAddrSize = 2753 IsNSA ? SrsrcIdx - VAddr0Idx 2754 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2755 2756 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2757 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2758 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2759 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2760 if (!IsNSA) { 2761 if (AddrSize > 8) 2762 AddrSize = 16; 2763 else if (AddrSize > 4) 2764 AddrSize = 8; 2765 } 2766 2767 return VAddrSize == AddrSize; 2768 } 2769 2770 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2771 2772 const unsigned Opc = Inst.getOpcode(); 2773 const MCInstrDesc &Desc = MII.get(Opc); 2774 2775 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2776 return true; 2777 if (!Desc.mayLoad() || !Desc.mayStore()) 2778 return true; // Not atomic 2779 2780 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2781 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2782 2783 // This is an incomplete check because image_atomic_cmpswap 2784 // may only use 0x3 and 0xf while other atomic operations 2785 // may use 0x1 and 0x3. However these limitations are 2786 // verified when we check that dmask matches dst size. 2787 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2788 } 2789 2790 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2791 2792 const unsigned Opc = Inst.getOpcode(); 2793 const MCInstrDesc &Desc = MII.get(Opc); 2794 2795 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2796 return true; 2797 2798 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2799 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2800 2801 // GATHER4 instructions use dmask in a different fashion compared to 2802 // other MIMG instructions. The only useful DMASK values are 2803 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2804 // (red,red,red,red) etc.) The ISA document doesn't mention 2805 // this. 2806 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2807 } 2808 2809 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2810 2811 const unsigned Opc = Inst.getOpcode(); 2812 const MCInstrDesc &Desc = MII.get(Opc); 2813 2814 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2815 return true; 2816 2817 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2818 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2819 if (isCI() || isSI()) 2820 return false; 2821 } 2822 2823 return true; 2824 } 2825 2826 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2827 const unsigned Opc = Inst.getOpcode(); 2828 const MCInstrDesc &Desc = MII.get(Opc); 2829 2830 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2831 return true; 2832 2833 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2834 if (DimIdx < 0) 2835 return true; 2836 2837 long Imm = Inst.getOperand(DimIdx).getImm(); 2838 if (Imm < 0 || Imm >= 8) 2839 return false; 2840 2841 return true; 2842 } 2843 2844 static bool IsRevOpcode(const unsigned Opcode) 2845 { 2846 switch (Opcode) { 2847 case AMDGPU::V_SUBREV_F32_e32: 2848 case AMDGPU::V_SUBREV_F32_e64: 2849 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2850 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2851 case AMDGPU::V_SUBREV_F32_e32_vi: 2852 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2853 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2854 case AMDGPU::V_SUBREV_F32_e64_vi: 2855 2856 case AMDGPU::V_SUBREV_I32_e32: 2857 case AMDGPU::V_SUBREV_I32_e64: 2858 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 2859 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 2860 2861 case AMDGPU::V_SUBBREV_U32_e32: 2862 case AMDGPU::V_SUBBREV_U32_e64: 2863 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 2864 case AMDGPU::V_SUBBREV_U32_e32_vi: 2865 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 2866 case AMDGPU::V_SUBBREV_U32_e64_vi: 2867 2868 case AMDGPU::V_SUBREV_U32_e32: 2869 case AMDGPU::V_SUBREV_U32_e64: 2870 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2871 case AMDGPU::V_SUBREV_U32_e32_vi: 2872 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2873 case AMDGPU::V_SUBREV_U32_e64_vi: 2874 2875 case AMDGPU::V_SUBREV_F16_e32: 2876 case AMDGPU::V_SUBREV_F16_e64: 2877 case AMDGPU::V_SUBREV_F16_e32_gfx10: 2878 case AMDGPU::V_SUBREV_F16_e32_vi: 2879 case AMDGPU::V_SUBREV_F16_e64_gfx10: 2880 case AMDGPU::V_SUBREV_F16_e64_vi: 2881 2882 case AMDGPU::V_SUBREV_U16_e32: 2883 case AMDGPU::V_SUBREV_U16_e64: 2884 case AMDGPU::V_SUBREV_U16_e32_vi: 2885 case AMDGPU::V_SUBREV_U16_e64_vi: 2886 2887 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2888 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 2889 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2890 2891 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2892 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2893 2894 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 2895 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 2896 2897 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 2898 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 2899 2900 case AMDGPU::V_LSHRREV_B32_e32: 2901 case AMDGPU::V_LSHRREV_B32_e64: 2902 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 2903 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 2904 case AMDGPU::V_LSHRREV_B32_e32_vi: 2905 case AMDGPU::V_LSHRREV_B32_e64_vi: 2906 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 2907 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 2908 2909 case AMDGPU::V_ASHRREV_I32_e32: 2910 case AMDGPU::V_ASHRREV_I32_e64: 2911 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 2912 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 2913 case AMDGPU::V_ASHRREV_I32_e32_vi: 2914 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 2915 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 2916 case AMDGPU::V_ASHRREV_I32_e64_vi: 2917 2918 case AMDGPU::V_LSHLREV_B32_e32: 2919 case AMDGPU::V_LSHLREV_B32_e64: 2920 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 2921 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 2922 case AMDGPU::V_LSHLREV_B32_e32_vi: 2923 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 2924 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 2925 case AMDGPU::V_LSHLREV_B32_e64_vi: 2926 2927 case AMDGPU::V_LSHLREV_B16_e32: 2928 case AMDGPU::V_LSHLREV_B16_e64: 2929 case AMDGPU::V_LSHLREV_B16_e32_vi: 2930 case AMDGPU::V_LSHLREV_B16_e64_vi: 2931 case AMDGPU::V_LSHLREV_B16_gfx10: 2932 2933 case AMDGPU::V_LSHRREV_B16_e32: 2934 case AMDGPU::V_LSHRREV_B16_e64: 2935 case AMDGPU::V_LSHRREV_B16_e32_vi: 2936 case AMDGPU::V_LSHRREV_B16_e64_vi: 2937 case AMDGPU::V_LSHRREV_B16_gfx10: 2938 2939 case AMDGPU::V_ASHRREV_I16_e32: 2940 case AMDGPU::V_ASHRREV_I16_e64: 2941 case AMDGPU::V_ASHRREV_I16_e32_vi: 2942 case AMDGPU::V_ASHRREV_I16_e64_vi: 2943 case AMDGPU::V_ASHRREV_I16_gfx10: 2944 2945 case AMDGPU::V_LSHLREV_B64: 2946 case AMDGPU::V_LSHLREV_B64_gfx10: 2947 case AMDGPU::V_LSHLREV_B64_vi: 2948 2949 case AMDGPU::V_LSHRREV_B64: 2950 case AMDGPU::V_LSHRREV_B64_gfx10: 2951 case AMDGPU::V_LSHRREV_B64_vi: 2952 2953 case AMDGPU::V_ASHRREV_I64: 2954 case AMDGPU::V_ASHRREV_I64_gfx10: 2955 case AMDGPU::V_ASHRREV_I64_vi: 2956 2957 case AMDGPU::V_PK_LSHLREV_B16: 2958 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 2959 case AMDGPU::V_PK_LSHLREV_B16_vi: 2960 2961 case AMDGPU::V_PK_LSHRREV_B16: 2962 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 2963 case AMDGPU::V_PK_LSHRREV_B16_vi: 2964 case AMDGPU::V_PK_ASHRREV_I16: 2965 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 2966 case AMDGPU::V_PK_ASHRREV_I16_vi: 2967 return true; 2968 default: 2969 return false; 2970 } 2971 } 2972 2973 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2974 2975 using namespace SIInstrFlags; 2976 const unsigned Opcode = Inst.getOpcode(); 2977 const MCInstrDesc &Desc = MII.get(Opcode); 2978 2979 // lds_direct register is defined so that it can be used 2980 // with 9-bit operands only. Ignore encodings which do not accept these. 2981 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2982 return true; 2983 2984 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2985 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2986 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2987 2988 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2989 2990 // lds_direct cannot be specified as either src1 or src2. 2991 for (int SrcIdx : SrcIndices) { 2992 if (SrcIdx == -1) break; 2993 const MCOperand &Src = Inst.getOperand(SrcIdx); 2994 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2995 return false; 2996 } 2997 } 2998 2999 if (Src0Idx == -1) 3000 return true; 3001 3002 const MCOperand &Src = Inst.getOperand(Src0Idx); 3003 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3004 return true; 3005 3006 // lds_direct is specified as src0. Check additional limitations. 3007 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3008 } 3009 3010 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3011 unsigned Opcode = Inst.getOpcode(); 3012 const MCInstrDesc &Desc = MII.get(Opcode); 3013 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3014 return true; 3015 3016 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3017 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3018 3019 const int OpIndices[] = { Src0Idx, Src1Idx }; 3020 3021 unsigned NumLiterals = 0; 3022 uint32_t LiteralValue; 3023 3024 for (int OpIdx : OpIndices) { 3025 if (OpIdx == -1) break; 3026 3027 const MCOperand &MO = Inst.getOperand(OpIdx); 3028 if (MO.isImm() && 3029 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3030 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3031 !isInlineConstant(Inst, OpIdx)) { 3032 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3033 if (NumLiterals == 0 || LiteralValue != Value) { 3034 LiteralValue = Value; 3035 ++NumLiterals; 3036 } 3037 } 3038 } 3039 3040 return NumLiterals <= 1; 3041 } 3042 3043 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3044 const unsigned Opc = Inst.getOpcode(); 3045 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3046 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3047 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3048 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3049 3050 if (OpSel & ~3) 3051 return false; 3052 } 3053 return true; 3054 } 3055 3056 // Check if VCC register matches wavefront size 3057 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3058 auto FB = getFeatureBits(); 3059 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3060 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3061 } 3062 3063 // VOP3 literal is only allowed in GFX10+ and only one can be used 3064 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3065 unsigned Opcode = Inst.getOpcode(); 3066 const MCInstrDesc &Desc = MII.get(Opcode); 3067 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3068 return true; 3069 3070 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3071 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3072 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3073 3074 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3075 3076 unsigned NumLiterals = 0; 3077 uint32_t LiteralValue; 3078 3079 for (int OpIdx : OpIndices) { 3080 if (OpIdx == -1) break; 3081 3082 const MCOperand &MO = Inst.getOperand(OpIdx); 3083 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3084 continue; 3085 3086 if (!isInlineConstant(Inst, OpIdx)) { 3087 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3088 if (NumLiterals == 0 || LiteralValue != Value) { 3089 LiteralValue = Value; 3090 ++NumLiterals; 3091 } 3092 } 3093 } 3094 3095 return !NumLiterals || 3096 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3097 } 3098 3099 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3100 const SMLoc &IDLoc) { 3101 if (!validateLdsDirect(Inst)) { 3102 Error(IDLoc, 3103 "invalid use of lds_direct"); 3104 return false; 3105 } 3106 if (!validateSOPLiteral(Inst)) { 3107 Error(IDLoc, 3108 "only one literal operand is allowed"); 3109 return false; 3110 } 3111 if (!validateVOP3Literal(Inst)) { 3112 Error(IDLoc, 3113 "invalid literal operand"); 3114 return false; 3115 } 3116 if (!validateConstantBusLimitations(Inst)) { 3117 Error(IDLoc, 3118 "invalid operand (violates constant bus restrictions)"); 3119 return false; 3120 } 3121 if (!validateEarlyClobberLimitations(Inst)) { 3122 Error(IDLoc, 3123 "destination must be different than all sources"); 3124 return false; 3125 } 3126 if (!validateIntClampSupported(Inst)) { 3127 Error(IDLoc, 3128 "integer clamping is not supported on this GPU"); 3129 return false; 3130 } 3131 if (!validateOpSel(Inst)) { 3132 Error(IDLoc, 3133 "invalid op_sel operand"); 3134 return false; 3135 } 3136 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3137 if (!validateMIMGD16(Inst)) { 3138 Error(IDLoc, 3139 "d16 modifier is not supported on this GPU"); 3140 return false; 3141 } 3142 if (!validateMIMGDim(Inst)) { 3143 Error(IDLoc, "dim modifier is required on this GPU"); 3144 return false; 3145 } 3146 if (!validateMIMGDataSize(Inst)) { 3147 Error(IDLoc, 3148 "image data size does not match dmask and tfe"); 3149 return false; 3150 } 3151 if (!validateMIMGAddrSize(Inst)) { 3152 Error(IDLoc, 3153 "image address size does not match dim and a16"); 3154 return false; 3155 } 3156 if (!validateMIMGAtomicDMask(Inst)) { 3157 Error(IDLoc, 3158 "invalid atomic image dmask"); 3159 return false; 3160 } 3161 if (!validateMIMGGatherDMask(Inst)) { 3162 Error(IDLoc, 3163 "invalid image_gather dmask: only one bit must be set"); 3164 return false; 3165 } 3166 3167 return true; 3168 } 3169 3170 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3171 const FeatureBitset &FBS, 3172 unsigned VariantID = 0); 3173 3174 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3175 OperandVector &Operands, 3176 MCStreamer &Out, 3177 uint64_t &ErrorInfo, 3178 bool MatchingInlineAsm) { 3179 MCInst Inst; 3180 unsigned Result = Match_Success; 3181 for (auto Variant : getMatchedVariants()) { 3182 uint64_t EI; 3183 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3184 Variant); 3185 // We order match statuses from least to most specific. We use most specific 3186 // status as resulting 3187 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3188 if ((R == Match_Success) || 3189 (R == Match_PreferE32) || 3190 (R == Match_MissingFeature && Result != Match_PreferE32) || 3191 (R == Match_InvalidOperand && Result != Match_MissingFeature 3192 && Result != Match_PreferE32) || 3193 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3194 && Result != Match_MissingFeature 3195 && Result != Match_PreferE32)) { 3196 Result = R; 3197 ErrorInfo = EI; 3198 } 3199 if (R == Match_Success) 3200 break; 3201 } 3202 3203 switch (Result) { 3204 default: break; 3205 case Match_Success: 3206 if (!validateInstruction(Inst, IDLoc)) { 3207 return true; 3208 } 3209 Inst.setLoc(IDLoc); 3210 Out.EmitInstruction(Inst, getSTI()); 3211 return false; 3212 3213 case Match_MissingFeature: 3214 return Error(IDLoc, "instruction not supported on this GPU"); 3215 3216 case Match_MnemonicFail: { 3217 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3218 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3219 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3220 return Error(IDLoc, "invalid instruction" + Suggestion, 3221 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3222 } 3223 3224 case Match_InvalidOperand: { 3225 SMLoc ErrorLoc = IDLoc; 3226 if (ErrorInfo != ~0ULL) { 3227 if (ErrorInfo >= Operands.size()) { 3228 return Error(IDLoc, "too few operands for instruction"); 3229 } 3230 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3231 if (ErrorLoc == SMLoc()) 3232 ErrorLoc = IDLoc; 3233 } 3234 return Error(ErrorLoc, "invalid operand for instruction"); 3235 } 3236 3237 case Match_PreferE32: 3238 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3239 "should be encoded as e32"); 3240 } 3241 llvm_unreachable("Implement any new match types added!"); 3242 } 3243 3244 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3245 int64_t Tmp = -1; 3246 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3247 return true; 3248 } 3249 if (getParser().parseAbsoluteExpression(Tmp)) { 3250 return true; 3251 } 3252 Ret = static_cast<uint32_t>(Tmp); 3253 return false; 3254 } 3255 3256 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3257 uint32_t &Minor) { 3258 if (ParseAsAbsoluteExpression(Major)) 3259 return TokError("invalid major version"); 3260 3261 if (getLexer().isNot(AsmToken::Comma)) 3262 return TokError("minor version number required, comma expected"); 3263 Lex(); 3264 3265 if (ParseAsAbsoluteExpression(Minor)) 3266 return TokError("invalid minor version"); 3267 3268 return false; 3269 } 3270 3271 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3272 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3273 return TokError("directive only supported for amdgcn architecture"); 3274 3275 std::string Target; 3276 3277 SMLoc TargetStart = getTok().getLoc(); 3278 if (getParser().parseEscapedString(Target)) 3279 return true; 3280 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3281 3282 std::string ExpectedTarget; 3283 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3284 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3285 3286 if (Target != ExpectedTargetOS.str()) 3287 return getParser().Error(TargetRange.Start, "target must match options", 3288 TargetRange); 3289 3290 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3291 return false; 3292 } 3293 3294 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3295 return getParser().Error(Range.Start, "value out of range", Range); 3296 } 3297 3298 bool AMDGPUAsmParser::calculateGPRBlocks( 3299 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3300 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3301 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3302 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3303 // TODO(scott.linder): These calculations are duplicated from 3304 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3305 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3306 3307 unsigned NumVGPRs = NextFreeVGPR; 3308 unsigned NumSGPRs = NextFreeSGPR; 3309 3310 if (Version.Major >= 10) 3311 NumSGPRs = 0; 3312 else { 3313 unsigned MaxAddressableNumSGPRs = 3314 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3315 3316 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3317 NumSGPRs > MaxAddressableNumSGPRs) 3318 return OutOfRangeError(SGPRRange); 3319 3320 NumSGPRs += 3321 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3322 3323 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3324 NumSGPRs > MaxAddressableNumSGPRs) 3325 return OutOfRangeError(SGPRRange); 3326 3327 if (Features.test(FeatureSGPRInitBug)) 3328 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3329 } 3330 3331 VGPRBlocks = 3332 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3333 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3334 3335 return false; 3336 } 3337 3338 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3339 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3340 return TokError("directive only supported for amdgcn architecture"); 3341 3342 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3343 return TokError("directive only supported for amdhsa OS"); 3344 3345 StringRef KernelName; 3346 if (getParser().parseIdentifier(KernelName)) 3347 return true; 3348 3349 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3350 3351 StringSet<> Seen; 3352 3353 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3354 3355 SMRange VGPRRange; 3356 uint64_t NextFreeVGPR = 0; 3357 SMRange SGPRRange; 3358 uint64_t NextFreeSGPR = 0; 3359 unsigned UserSGPRCount = 0; 3360 bool ReserveVCC = true; 3361 bool ReserveFlatScr = true; 3362 bool ReserveXNACK = hasXNACK(); 3363 Optional<bool> EnableWavefrontSize32; 3364 3365 while (true) { 3366 while (getLexer().is(AsmToken::EndOfStatement)) 3367 Lex(); 3368 3369 if (getLexer().isNot(AsmToken::Identifier)) 3370 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3371 3372 StringRef ID = getTok().getIdentifier(); 3373 SMRange IDRange = getTok().getLocRange(); 3374 Lex(); 3375 3376 if (ID == ".end_amdhsa_kernel") 3377 break; 3378 3379 if (Seen.find(ID) != Seen.end()) 3380 return TokError(".amdhsa_ directives cannot be repeated"); 3381 Seen.insert(ID); 3382 3383 SMLoc ValStart = getTok().getLoc(); 3384 int64_t IVal; 3385 if (getParser().parseAbsoluteExpression(IVal)) 3386 return true; 3387 SMLoc ValEnd = getTok().getLoc(); 3388 SMRange ValRange = SMRange(ValStart, ValEnd); 3389 3390 if (IVal < 0) 3391 return OutOfRangeError(ValRange); 3392 3393 uint64_t Val = IVal; 3394 3395 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3396 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3397 return OutOfRangeError(RANGE); \ 3398 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3399 3400 if (ID == ".amdhsa_group_segment_fixed_size") { 3401 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3402 return OutOfRangeError(ValRange); 3403 KD.group_segment_fixed_size = Val; 3404 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3405 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3406 return OutOfRangeError(ValRange); 3407 KD.private_segment_fixed_size = Val; 3408 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3409 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3410 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3411 Val, ValRange); 3412 UserSGPRCount += 4; 3413 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3414 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3415 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3416 ValRange); 3417 UserSGPRCount += 2; 3418 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3419 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3420 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3421 ValRange); 3422 UserSGPRCount += 2; 3423 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3424 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3425 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3426 Val, ValRange); 3427 UserSGPRCount += 2; 3428 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3429 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3430 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3431 ValRange); 3432 UserSGPRCount += 2; 3433 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3434 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3435 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3436 ValRange); 3437 UserSGPRCount += 2; 3438 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3439 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3440 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3441 Val, ValRange); 3442 UserSGPRCount += 1; 3443 } else if (ID == ".amdhsa_wavefront_size32") { 3444 if (IVersion.Major < 10) 3445 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3446 IDRange); 3447 EnableWavefrontSize32 = Val; 3448 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3449 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3450 Val, ValRange); 3451 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3452 PARSE_BITS_ENTRY( 3453 KD.compute_pgm_rsrc2, 3454 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3455 ValRange); 3456 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3457 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3458 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3459 ValRange); 3460 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3461 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3462 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3463 ValRange); 3464 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3465 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3466 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3467 ValRange); 3468 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3469 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3470 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3471 ValRange); 3472 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3473 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3474 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3475 ValRange); 3476 } else if (ID == ".amdhsa_next_free_vgpr") { 3477 VGPRRange = ValRange; 3478 NextFreeVGPR = Val; 3479 } else if (ID == ".amdhsa_next_free_sgpr") { 3480 SGPRRange = ValRange; 3481 NextFreeSGPR = Val; 3482 } else if (ID == ".amdhsa_reserve_vcc") { 3483 if (!isUInt<1>(Val)) 3484 return OutOfRangeError(ValRange); 3485 ReserveVCC = Val; 3486 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3487 if (IVersion.Major < 7) 3488 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3489 IDRange); 3490 if (!isUInt<1>(Val)) 3491 return OutOfRangeError(ValRange); 3492 ReserveFlatScr = Val; 3493 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3494 if (IVersion.Major < 8) 3495 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3496 IDRange); 3497 if (!isUInt<1>(Val)) 3498 return OutOfRangeError(ValRange); 3499 ReserveXNACK = Val; 3500 } else if (ID == ".amdhsa_float_round_mode_32") { 3501 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3502 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3503 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3504 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3505 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3506 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3507 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3508 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3509 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3510 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3511 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3512 ValRange); 3513 } else if (ID == ".amdhsa_dx10_clamp") { 3514 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3515 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3516 } else if (ID == ".amdhsa_ieee_mode") { 3517 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3518 Val, ValRange); 3519 } else if (ID == ".amdhsa_fp16_overflow") { 3520 if (IVersion.Major < 9) 3521 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3522 IDRange); 3523 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3524 ValRange); 3525 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3526 if (IVersion.Major < 10) 3527 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3528 IDRange); 3529 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3530 ValRange); 3531 } else if (ID == ".amdhsa_memory_ordered") { 3532 if (IVersion.Major < 10) 3533 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3534 IDRange); 3535 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3536 ValRange); 3537 } else if (ID == ".amdhsa_forward_progress") { 3538 if (IVersion.Major < 10) 3539 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3540 IDRange); 3541 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3542 ValRange); 3543 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3544 PARSE_BITS_ENTRY( 3545 KD.compute_pgm_rsrc2, 3546 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3547 ValRange); 3548 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3549 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3550 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3551 Val, ValRange); 3552 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3553 PARSE_BITS_ENTRY( 3554 KD.compute_pgm_rsrc2, 3555 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3556 ValRange); 3557 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3558 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3559 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3560 Val, ValRange); 3561 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3562 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3563 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3564 Val, ValRange); 3565 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3566 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3567 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3568 Val, ValRange); 3569 } else if (ID == ".amdhsa_exception_int_div_zero") { 3570 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3571 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3572 Val, ValRange); 3573 } else { 3574 return getParser().Error(IDRange.Start, 3575 "unknown .amdhsa_kernel directive", IDRange); 3576 } 3577 3578 #undef PARSE_BITS_ENTRY 3579 } 3580 3581 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3582 return TokError(".amdhsa_next_free_vgpr directive is required"); 3583 3584 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3585 return TokError(".amdhsa_next_free_sgpr directive is required"); 3586 3587 unsigned VGPRBlocks; 3588 unsigned SGPRBlocks; 3589 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3590 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3591 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3592 SGPRBlocks)) 3593 return true; 3594 3595 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3596 VGPRBlocks)) 3597 return OutOfRangeError(VGPRRange); 3598 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3599 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3600 3601 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3602 SGPRBlocks)) 3603 return OutOfRangeError(SGPRRange); 3604 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3605 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3606 SGPRBlocks); 3607 3608 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3609 return TokError("too many user SGPRs enabled"); 3610 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3611 UserSGPRCount); 3612 3613 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3614 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3615 ReserveFlatScr, ReserveXNACK); 3616 return false; 3617 } 3618 3619 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3620 uint32_t Major; 3621 uint32_t Minor; 3622 3623 if (ParseDirectiveMajorMinor(Major, Minor)) 3624 return true; 3625 3626 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3627 return false; 3628 } 3629 3630 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3631 uint32_t Major; 3632 uint32_t Minor; 3633 uint32_t Stepping; 3634 StringRef VendorName; 3635 StringRef ArchName; 3636 3637 // If this directive has no arguments, then use the ISA version for the 3638 // targeted GPU. 3639 if (getLexer().is(AsmToken::EndOfStatement)) { 3640 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3641 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3642 ISA.Stepping, 3643 "AMD", "AMDGPU"); 3644 return false; 3645 } 3646 3647 if (ParseDirectiveMajorMinor(Major, Minor)) 3648 return true; 3649 3650 if (getLexer().isNot(AsmToken::Comma)) 3651 return TokError("stepping version number required, comma expected"); 3652 Lex(); 3653 3654 if (ParseAsAbsoluteExpression(Stepping)) 3655 return TokError("invalid stepping version"); 3656 3657 if (getLexer().isNot(AsmToken::Comma)) 3658 return TokError("vendor name required, comma expected"); 3659 Lex(); 3660 3661 if (getLexer().isNot(AsmToken::String)) 3662 return TokError("invalid vendor name"); 3663 3664 VendorName = getLexer().getTok().getStringContents(); 3665 Lex(); 3666 3667 if (getLexer().isNot(AsmToken::Comma)) 3668 return TokError("arch name required, comma expected"); 3669 Lex(); 3670 3671 if (getLexer().isNot(AsmToken::String)) 3672 return TokError("invalid arch name"); 3673 3674 ArchName = getLexer().getTok().getStringContents(); 3675 Lex(); 3676 3677 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3678 VendorName, ArchName); 3679 return false; 3680 } 3681 3682 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3683 amd_kernel_code_t &Header) { 3684 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3685 // assembly for backwards compatibility. 3686 if (ID == "max_scratch_backing_memory_byte_size") { 3687 Parser.eatToEndOfStatement(); 3688 return false; 3689 } 3690 3691 SmallString<40> ErrStr; 3692 raw_svector_ostream Err(ErrStr); 3693 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3694 return TokError(Err.str()); 3695 } 3696 Lex(); 3697 3698 if (ID == "enable_wavefront_size32") { 3699 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3700 if (!isGFX10()) 3701 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3702 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3703 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3704 } else { 3705 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3706 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3707 } 3708 } 3709 3710 if (ID == "wavefront_size") { 3711 if (Header.wavefront_size == 5) { 3712 if (!isGFX10()) 3713 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3714 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3715 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3716 } else if (Header.wavefront_size == 6) { 3717 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3718 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3719 } 3720 } 3721 3722 if (ID == "enable_wgp_mode") { 3723 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3724 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3725 } 3726 3727 if (ID == "enable_mem_ordered") { 3728 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3729 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3730 } 3731 3732 if (ID == "enable_fwd_progress") { 3733 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3734 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3735 } 3736 3737 return false; 3738 } 3739 3740 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3741 amd_kernel_code_t Header; 3742 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3743 3744 while (true) { 3745 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3746 // will set the current token to EndOfStatement. 3747 while(getLexer().is(AsmToken::EndOfStatement)) 3748 Lex(); 3749 3750 if (getLexer().isNot(AsmToken::Identifier)) 3751 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3752 3753 StringRef ID = getLexer().getTok().getIdentifier(); 3754 Lex(); 3755 3756 if (ID == ".end_amd_kernel_code_t") 3757 break; 3758 3759 if (ParseAMDKernelCodeTValue(ID, Header)) 3760 return true; 3761 } 3762 3763 getTargetStreamer().EmitAMDKernelCodeT(Header); 3764 3765 return false; 3766 } 3767 3768 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3769 if (getLexer().isNot(AsmToken::Identifier)) 3770 return TokError("expected symbol name"); 3771 3772 StringRef KernelName = Parser.getTok().getString(); 3773 3774 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3775 ELF::STT_AMDGPU_HSA_KERNEL); 3776 Lex(); 3777 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3778 KernelScope.initialize(getContext()); 3779 return false; 3780 } 3781 3782 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3783 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3784 return Error(getParser().getTok().getLoc(), 3785 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3786 "architectures"); 3787 } 3788 3789 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3790 3791 std::string ISAVersionStringFromSTI; 3792 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3793 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3794 3795 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3796 return Error(getParser().getTok().getLoc(), 3797 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3798 "arguments specified through the command line"); 3799 } 3800 3801 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3802 Lex(); 3803 3804 return false; 3805 } 3806 3807 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3808 const char *AssemblerDirectiveBegin; 3809 const char *AssemblerDirectiveEnd; 3810 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3811 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3812 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3813 HSAMD::V3::AssemblerDirectiveEnd) 3814 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3815 HSAMD::AssemblerDirectiveEnd); 3816 3817 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3818 return Error(getParser().getTok().getLoc(), 3819 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3820 "not available on non-amdhsa OSes")).str()); 3821 } 3822 3823 std::string HSAMetadataString; 3824 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3825 HSAMetadataString)) 3826 return true; 3827 3828 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3829 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3830 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3831 } else { 3832 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3833 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3834 } 3835 3836 return false; 3837 } 3838 3839 /// Common code to parse out a block of text (typically YAML) between start and 3840 /// end directives. 3841 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3842 const char *AssemblerDirectiveEnd, 3843 std::string &CollectString) { 3844 3845 raw_string_ostream CollectStream(CollectString); 3846 3847 getLexer().setSkipSpace(false); 3848 3849 bool FoundEnd = false; 3850 while (!getLexer().is(AsmToken::Eof)) { 3851 while (getLexer().is(AsmToken::Space)) { 3852 CollectStream << getLexer().getTok().getString(); 3853 Lex(); 3854 } 3855 3856 if (getLexer().is(AsmToken::Identifier)) { 3857 StringRef ID = getLexer().getTok().getIdentifier(); 3858 if (ID == AssemblerDirectiveEnd) { 3859 Lex(); 3860 FoundEnd = true; 3861 break; 3862 } 3863 } 3864 3865 CollectStream << Parser.parseStringToEndOfStatement() 3866 << getContext().getAsmInfo()->getSeparatorString(); 3867 3868 Parser.eatToEndOfStatement(); 3869 } 3870 3871 getLexer().setSkipSpace(true); 3872 3873 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3874 return TokError(Twine("expected directive ") + 3875 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3876 } 3877 3878 CollectStream.flush(); 3879 return false; 3880 } 3881 3882 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3883 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3884 std::string String; 3885 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3886 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3887 return true; 3888 3889 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3890 if (!PALMetadata->setFromString(String)) 3891 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3892 return false; 3893 } 3894 3895 /// Parse the assembler directive for old linear-format PAL metadata. 3896 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3897 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3898 return Error(getParser().getTok().getLoc(), 3899 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3900 "not available on non-amdpal OSes")).str()); 3901 } 3902 3903 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3904 PALMetadata->setLegacy(); 3905 for (;;) { 3906 uint32_t Key, Value; 3907 if (ParseAsAbsoluteExpression(Key)) { 3908 return TokError(Twine("invalid value in ") + 3909 Twine(PALMD::AssemblerDirective)); 3910 } 3911 if (getLexer().isNot(AsmToken::Comma)) { 3912 return TokError(Twine("expected an even number of values in ") + 3913 Twine(PALMD::AssemblerDirective)); 3914 } 3915 Lex(); 3916 if (ParseAsAbsoluteExpression(Value)) { 3917 return TokError(Twine("invalid value in ") + 3918 Twine(PALMD::AssemblerDirective)); 3919 } 3920 PALMetadata->setRegister(Key, Value); 3921 if (getLexer().isNot(AsmToken::Comma)) 3922 break; 3923 Lex(); 3924 } 3925 return false; 3926 } 3927 3928 /// ParseDirectiveAMDGPULDS 3929 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 3930 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 3931 if (getParser().checkForValidSection()) 3932 return true; 3933 3934 StringRef Name; 3935 SMLoc NameLoc = getLexer().getLoc(); 3936 if (getParser().parseIdentifier(Name)) 3937 return TokError("expected identifier in directive"); 3938 3939 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 3940 if (parseToken(AsmToken::Comma, "expected ','")) 3941 return true; 3942 3943 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 3944 3945 int64_t Size; 3946 SMLoc SizeLoc = getLexer().getLoc(); 3947 if (getParser().parseAbsoluteExpression(Size)) 3948 return true; 3949 if (Size < 0) 3950 return Error(SizeLoc, "size must be non-negative"); 3951 if (Size > LocalMemorySize) 3952 return Error(SizeLoc, "size is too large"); 3953 3954 int64_t Align = 4; 3955 if (getLexer().is(AsmToken::Comma)) { 3956 Lex(); 3957 SMLoc AlignLoc = getLexer().getLoc(); 3958 if (getParser().parseAbsoluteExpression(Align)) 3959 return true; 3960 if (Align < 0 || !isPowerOf2_64(Align)) 3961 return Error(AlignLoc, "alignment must be a power of two"); 3962 3963 // Alignment larger than the size of LDS is possible in theory, as long 3964 // as the linker manages to place to symbol at address 0, but we do want 3965 // to make sure the alignment fits nicely into a 32-bit integer. 3966 if (Align >= 1u << 31) 3967 return Error(AlignLoc, "alignment is too large"); 3968 } 3969 3970 if (parseToken(AsmToken::EndOfStatement, 3971 "unexpected token in '.amdgpu_lds' directive")) 3972 return true; 3973 3974 Symbol->redefineIfPossible(); 3975 if (!Symbol->isUndefined()) 3976 return Error(NameLoc, "invalid symbol redefinition"); 3977 3978 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 3979 return false; 3980 } 3981 3982 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3983 StringRef IDVal = DirectiveID.getString(); 3984 3985 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3986 if (IDVal == ".amdgcn_target") 3987 return ParseDirectiveAMDGCNTarget(); 3988 3989 if (IDVal == ".amdhsa_kernel") 3990 return ParseDirectiveAMDHSAKernel(); 3991 3992 // TODO: Restructure/combine with PAL metadata directive. 3993 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3994 return ParseDirectiveHSAMetadata(); 3995 } else { 3996 if (IDVal == ".hsa_code_object_version") 3997 return ParseDirectiveHSACodeObjectVersion(); 3998 3999 if (IDVal == ".hsa_code_object_isa") 4000 return ParseDirectiveHSACodeObjectISA(); 4001 4002 if (IDVal == ".amd_kernel_code_t") 4003 return ParseDirectiveAMDKernelCodeT(); 4004 4005 if (IDVal == ".amdgpu_hsa_kernel") 4006 return ParseDirectiveAMDGPUHsaKernel(); 4007 4008 if (IDVal == ".amd_amdgpu_isa") 4009 return ParseDirectiveISAVersion(); 4010 4011 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4012 return ParseDirectiveHSAMetadata(); 4013 } 4014 4015 if (IDVal == ".amdgpu_lds") 4016 return ParseDirectiveAMDGPULDS(); 4017 4018 if (IDVal == PALMD::AssemblerDirectiveBegin) 4019 return ParseDirectivePALMetadataBegin(); 4020 4021 if (IDVal == PALMD::AssemblerDirective) 4022 return ParseDirectivePALMetadata(); 4023 4024 return true; 4025 } 4026 4027 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4028 unsigned RegNo) const { 4029 4030 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4031 R.isValid(); ++R) { 4032 if (*R == RegNo) 4033 return isGFX9() || isGFX10(); 4034 } 4035 4036 // GFX10 has 2 more SGPRs 104 and 105. 4037 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4038 R.isValid(); ++R) { 4039 if (*R == RegNo) 4040 return hasSGPR104_SGPR105(); 4041 } 4042 4043 switch (RegNo) { 4044 case AMDGPU::SRC_SHARED_BASE: 4045 case AMDGPU::SRC_SHARED_LIMIT: 4046 case AMDGPU::SRC_PRIVATE_BASE: 4047 case AMDGPU::SRC_PRIVATE_LIMIT: 4048 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4049 return !isCI() && !isSI() && !isVI(); 4050 case AMDGPU::TBA: 4051 case AMDGPU::TBA_LO: 4052 case AMDGPU::TBA_HI: 4053 case AMDGPU::TMA: 4054 case AMDGPU::TMA_LO: 4055 case AMDGPU::TMA_HI: 4056 return !isGFX9() && !isGFX10(); 4057 case AMDGPU::XNACK_MASK: 4058 case AMDGPU::XNACK_MASK_LO: 4059 case AMDGPU::XNACK_MASK_HI: 4060 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4061 case AMDGPU::SGPR_NULL: 4062 return isGFX10(); 4063 default: 4064 break; 4065 } 4066 4067 if (isCI()) 4068 return true; 4069 4070 if (isSI() || isGFX10()) { 4071 // No flat_scr on SI. 4072 // On GFX10 flat scratch is not a valid register operand and can only be 4073 // accessed with s_setreg/s_getreg. 4074 switch (RegNo) { 4075 case AMDGPU::FLAT_SCR: 4076 case AMDGPU::FLAT_SCR_LO: 4077 case AMDGPU::FLAT_SCR_HI: 4078 return false; 4079 default: 4080 return true; 4081 } 4082 } 4083 4084 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4085 // SI/CI have. 4086 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4087 R.isValid(); ++R) { 4088 if (*R == RegNo) 4089 return hasSGPR102_SGPR103(); 4090 } 4091 4092 return true; 4093 } 4094 4095 OperandMatchResultTy 4096 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4097 OperandMode Mode) { 4098 // Try to parse with a custom parser 4099 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4100 4101 // If we successfully parsed the operand or if there as an error parsing, 4102 // we are done. 4103 // 4104 // If we are parsing after we reach EndOfStatement then this means we 4105 // are appending default values to the Operands list. This is only done 4106 // by custom parser, so we shouldn't continue on to the generic parsing. 4107 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4108 getLexer().is(AsmToken::EndOfStatement)) 4109 return ResTy; 4110 4111 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4112 unsigned Prefix = Operands.size(); 4113 SMLoc LBraceLoc = getTok().getLoc(); 4114 Parser.Lex(); // eat the '[' 4115 4116 for (;;) { 4117 ResTy = parseReg(Operands); 4118 if (ResTy != MatchOperand_Success) 4119 return ResTy; 4120 4121 if (getLexer().is(AsmToken::RBrac)) 4122 break; 4123 4124 if (getLexer().isNot(AsmToken::Comma)) 4125 return MatchOperand_ParseFail; 4126 Parser.Lex(); 4127 } 4128 4129 if (Operands.size() - Prefix > 1) { 4130 Operands.insert(Operands.begin() + Prefix, 4131 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4132 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4133 getTok().getLoc())); 4134 } 4135 4136 Parser.Lex(); // eat the ']' 4137 return MatchOperand_Success; 4138 } 4139 4140 return parseRegOrImm(Operands); 4141 } 4142 4143 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4144 // Clear any forced encodings from the previous instruction. 4145 setForcedEncodingSize(0); 4146 setForcedDPP(false); 4147 setForcedSDWA(false); 4148 4149 if (Name.endswith("_e64")) { 4150 setForcedEncodingSize(64); 4151 return Name.substr(0, Name.size() - 4); 4152 } else if (Name.endswith("_e32")) { 4153 setForcedEncodingSize(32); 4154 return Name.substr(0, Name.size() - 4); 4155 } else if (Name.endswith("_dpp")) { 4156 setForcedDPP(true); 4157 return Name.substr(0, Name.size() - 4); 4158 } else if (Name.endswith("_sdwa")) { 4159 setForcedSDWA(true); 4160 return Name.substr(0, Name.size() - 5); 4161 } 4162 return Name; 4163 } 4164 4165 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4166 StringRef Name, 4167 SMLoc NameLoc, OperandVector &Operands) { 4168 // Add the instruction mnemonic 4169 Name = parseMnemonicSuffix(Name); 4170 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4171 4172 bool IsMIMG = Name.startswith("image_"); 4173 4174 while (!getLexer().is(AsmToken::EndOfStatement)) { 4175 OperandMode Mode = OperandMode_Default; 4176 if (IsMIMG && isGFX10() && Operands.size() == 2) 4177 Mode = OperandMode_NSA; 4178 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4179 4180 // Eat the comma or space if there is one. 4181 if (getLexer().is(AsmToken::Comma)) 4182 Parser.Lex(); 4183 4184 switch (Res) { 4185 case MatchOperand_Success: break; 4186 case MatchOperand_ParseFail: 4187 // FIXME: use real operand location rather than the current location. 4188 Error(getLexer().getLoc(), "failed parsing operand."); 4189 while (!getLexer().is(AsmToken::EndOfStatement)) { 4190 Parser.Lex(); 4191 } 4192 return true; 4193 case MatchOperand_NoMatch: 4194 // FIXME: use real operand location rather than the current location. 4195 Error(getLexer().getLoc(), "not a valid operand."); 4196 while (!getLexer().is(AsmToken::EndOfStatement)) { 4197 Parser.Lex(); 4198 } 4199 return true; 4200 } 4201 } 4202 4203 return false; 4204 } 4205 4206 //===----------------------------------------------------------------------===// 4207 // Utility functions 4208 //===----------------------------------------------------------------------===// 4209 4210 OperandMatchResultTy 4211 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4212 4213 if (!trySkipId(Prefix, AsmToken::Colon)) 4214 return MatchOperand_NoMatch; 4215 4216 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4217 } 4218 4219 OperandMatchResultTy 4220 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4221 AMDGPUOperand::ImmTy ImmTy, 4222 bool (*ConvertResult)(int64_t&)) { 4223 SMLoc S = getLoc(); 4224 int64_t Value = 0; 4225 4226 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4227 if (Res != MatchOperand_Success) 4228 return Res; 4229 4230 if (ConvertResult && !ConvertResult(Value)) { 4231 Error(S, "invalid " + StringRef(Prefix) + " value."); 4232 } 4233 4234 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4235 return MatchOperand_Success; 4236 } 4237 4238 OperandMatchResultTy 4239 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4240 OperandVector &Operands, 4241 AMDGPUOperand::ImmTy ImmTy, 4242 bool (*ConvertResult)(int64_t&)) { 4243 SMLoc S = getLoc(); 4244 if (!trySkipId(Prefix, AsmToken::Colon)) 4245 return MatchOperand_NoMatch; 4246 4247 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4248 return MatchOperand_ParseFail; 4249 4250 unsigned Val = 0; 4251 const unsigned MaxSize = 4; 4252 4253 // FIXME: How to verify the number of elements matches the number of src 4254 // operands? 4255 for (int I = 0; ; ++I) { 4256 int64_t Op; 4257 SMLoc Loc = getLoc(); 4258 if (!parseExpr(Op)) 4259 return MatchOperand_ParseFail; 4260 4261 if (Op != 0 && Op != 1) { 4262 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4263 return MatchOperand_ParseFail; 4264 } 4265 4266 Val |= (Op << I); 4267 4268 if (trySkipToken(AsmToken::RBrac)) 4269 break; 4270 4271 if (I + 1 == MaxSize) { 4272 Error(getLoc(), "expected a closing square bracket"); 4273 return MatchOperand_ParseFail; 4274 } 4275 4276 if (!skipToken(AsmToken::Comma, "expected a comma")) 4277 return MatchOperand_ParseFail; 4278 } 4279 4280 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4281 return MatchOperand_Success; 4282 } 4283 4284 OperandMatchResultTy 4285 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4286 AMDGPUOperand::ImmTy ImmTy) { 4287 int64_t Bit = 0; 4288 SMLoc S = Parser.getTok().getLoc(); 4289 4290 // We are at the end of the statement, and this is a default argument, so 4291 // use a default value. 4292 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4293 switch(getLexer().getKind()) { 4294 case AsmToken::Identifier: { 4295 StringRef Tok = Parser.getTok().getString(); 4296 if (Tok == Name) { 4297 if (Tok == "r128" && isGFX9()) 4298 Error(S, "r128 modifier is not supported on this GPU"); 4299 if (Tok == "a16" && !isGFX9()) 4300 Error(S, "a16 modifier is not supported on this GPU"); 4301 Bit = 1; 4302 Parser.Lex(); 4303 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4304 Bit = 0; 4305 Parser.Lex(); 4306 } else { 4307 return MatchOperand_NoMatch; 4308 } 4309 break; 4310 } 4311 default: 4312 return MatchOperand_NoMatch; 4313 } 4314 } 4315 4316 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4317 return MatchOperand_ParseFail; 4318 4319 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4320 return MatchOperand_Success; 4321 } 4322 4323 static void addOptionalImmOperand( 4324 MCInst& Inst, const OperandVector& Operands, 4325 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4326 AMDGPUOperand::ImmTy ImmT, 4327 int64_t Default = 0) { 4328 auto i = OptionalIdx.find(ImmT); 4329 if (i != OptionalIdx.end()) { 4330 unsigned Idx = i->second; 4331 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4332 } else { 4333 Inst.addOperand(MCOperand::createImm(Default)); 4334 } 4335 } 4336 4337 OperandMatchResultTy 4338 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4339 if (getLexer().isNot(AsmToken::Identifier)) { 4340 return MatchOperand_NoMatch; 4341 } 4342 StringRef Tok = Parser.getTok().getString(); 4343 if (Tok != Prefix) { 4344 return MatchOperand_NoMatch; 4345 } 4346 4347 Parser.Lex(); 4348 if (getLexer().isNot(AsmToken::Colon)) { 4349 return MatchOperand_ParseFail; 4350 } 4351 4352 Parser.Lex(); 4353 if (getLexer().isNot(AsmToken::Identifier)) { 4354 return MatchOperand_ParseFail; 4355 } 4356 4357 Value = Parser.getTok().getString(); 4358 return MatchOperand_Success; 4359 } 4360 4361 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4362 // values to live in a joint format operand in the MCInst encoding. 4363 OperandMatchResultTy 4364 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4365 SMLoc S = Parser.getTok().getLoc(); 4366 int64_t Dfmt = 0, Nfmt = 0; 4367 // dfmt and nfmt can appear in either order, and each is optional. 4368 bool GotDfmt = false, GotNfmt = false; 4369 while (!GotDfmt || !GotNfmt) { 4370 if (!GotDfmt) { 4371 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4372 if (Res != MatchOperand_NoMatch) { 4373 if (Res != MatchOperand_Success) 4374 return Res; 4375 if (Dfmt >= 16) { 4376 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4377 return MatchOperand_ParseFail; 4378 } 4379 GotDfmt = true; 4380 Parser.Lex(); 4381 continue; 4382 } 4383 } 4384 if (!GotNfmt) { 4385 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4386 if (Res != MatchOperand_NoMatch) { 4387 if (Res != MatchOperand_Success) 4388 return Res; 4389 if (Nfmt >= 8) { 4390 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4391 return MatchOperand_ParseFail; 4392 } 4393 GotNfmt = true; 4394 Parser.Lex(); 4395 continue; 4396 } 4397 } 4398 break; 4399 } 4400 if (!GotDfmt && !GotNfmt) 4401 return MatchOperand_NoMatch; 4402 auto Format = Dfmt | Nfmt << 4; 4403 Operands.push_back( 4404 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4405 return MatchOperand_Success; 4406 } 4407 4408 //===----------------------------------------------------------------------===// 4409 // ds 4410 //===----------------------------------------------------------------------===// 4411 4412 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4413 const OperandVector &Operands) { 4414 OptionalImmIndexMap OptionalIdx; 4415 4416 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4417 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4418 4419 // Add the register arguments 4420 if (Op.isReg()) { 4421 Op.addRegOperands(Inst, 1); 4422 continue; 4423 } 4424 4425 // Handle optional arguments 4426 OptionalIdx[Op.getImmTy()] = i; 4427 } 4428 4429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4430 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4431 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4432 4433 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4434 } 4435 4436 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4437 bool IsGdsHardcoded) { 4438 OptionalImmIndexMap OptionalIdx; 4439 4440 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4441 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4442 4443 // Add the register arguments 4444 if (Op.isReg()) { 4445 Op.addRegOperands(Inst, 1); 4446 continue; 4447 } 4448 4449 if (Op.isToken() && Op.getToken() == "gds") { 4450 IsGdsHardcoded = true; 4451 continue; 4452 } 4453 4454 // Handle optional arguments 4455 OptionalIdx[Op.getImmTy()] = i; 4456 } 4457 4458 AMDGPUOperand::ImmTy OffsetType = 4459 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4460 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4461 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4462 AMDGPUOperand::ImmTyOffset; 4463 4464 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4465 4466 if (!IsGdsHardcoded) { 4467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4468 } 4469 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4470 } 4471 4472 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4473 OptionalImmIndexMap OptionalIdx; 4474 4475 unsigned OperandIdx[4]; 4476 unsigned EnMask = 0; 4477 int SrcIdx = 0; 4478 4479 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4480 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4481 4482 // Add the register arguments 4483 if (Op.isReg()) { 4484 assert(SrcIdx < 4); 4485 OperandIdx[SrcIdx] = Inst.size(); 4486 Op.addRegOperands(Inst, 1); 4487 ++SrcIdx; 4488 continue; 4489 } 4490 4491 if (Op.isOff()) { 4492 assert(SrcIdx < 4); 4493 OperandIdx[SrcIdx] = Inst.size(); 4494 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4495 ++SrcIdx; 4496 continue; 4497 } 4498 4499 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4500 Op.addImmOperands(Inst, 1); 4501 continue; 4502 } 4503 4504 if (Op.isToken() && Op.getToken() == "done") 4505 continue; 4506 4507 // Handle optional arguments 4508 OptionalIdx[Op.getImmTy()] = i; 4509 } 4510 4511 assert(SrcIdx == 4); 4512 4513 bool Compr = false; 4514 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4515 Compr = true; 4516 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4517 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4518 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4519 } 4520 4521 for (auto i = 0; i < SrcIdx; ++i) { 4522 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4523 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4524 } 4525 } 4526 4527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4529 4530 Inst.addOperand(MCOperand::createImm(EnMask)); 4531 } 4532 4533 //===----------------------------------------------------------------------===// 4534 // s_waitcnt 4535 //===----------------------------------------------------------------------===// 4536 4537 static bool 4538 encodeCnt( 4539 const AMDGPU::IsaVersion ISA, 4540 int64_t &IntVal, 4541 int64_t CntVal, 4542 bool Saturate, 4543 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4544 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4545 { 4546 bool Failed = false; 4547 4548 IntVal = encode(ISA, IntVal, CntVal); 4549 if (CntVal != decode(ISA, IntVal)) { 4550 if (Saturate) { 4551 IntVal = encode(ISA, IntVal, -1); 4552 } else { 4553 Failed = true; 4554 } 4555 } 4556 return Failed; 4557 } 4558 4559 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4560 4561 SMLoc CntLoc = getLoc(); 4562 StringRef CntName = getTokenStr(); 4563 4564 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4565 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4566 return false; 4567 4568 int64_t CntVal; 4569 SMLoc ValLoc = getLoc(); 4570 if (!parseExpr(CntVal)) 4571 return false; 4572 4573 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4574 4575 bool Failed = true; 4576 bool Sat = CntName.endswith("_sat"); 4577 4578 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4579 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4580 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4581 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4582 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4583 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4584 } else { 4585 Error(CntLoc, "invalid counter name " + CntName); 4586 return false; 4587 } 4588 4589 if (Failed) { 4590 Error(ValLoc, "too large value for " + CntName); 4591 return false; 4592 } 4593 4594 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4595 return false; 4596 4597 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4598 if (isToken(AsmToken::EndOfStatement)) { 4599 Error(getLoc(), "expected a counter name"); 4600 return false; 4601 } 4602 } 4603 4604 return true; 4605 } 4606 4607 OperandMatchResultTy 4608 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4609 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4610 int64_t Waitcnt = getWaitcntBitMask(ISA); 4611 SMLoc S = getLoc(); 4612 4613 // If parse failed, do not return error code 4614 // to avoid excessive error messages. 4615 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4616 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4617 } else { 4618 parseExpr(Waitcnt); 4619 } 4620 4621 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4622 return MatchOperand_Success; 4623 } 4624 4625 bool 4626 AMDGPUOperand::isSWaitCnt() const { 4627 return isImm(); 4628 } 4629 4630 //===----------------------------------------------------------------------===// 4631 // hwreg 4632 //===----------------------------------------------------------------------===// 4633 4634 bool 4635 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4636 int64_t &Offset, 4637 int64_t &Width) { 4638 using namespace llvm::AMDGPU::Hwreg; 4639 4640 // The register may be specified by name or using a numeric code 4641 if (isToken(AsmToken::Identifier) && 4642 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4643 HwReg.IsSymbolic = true; 4644 lex(); // skip message name 4645 } else if (!parseExpr(HwReg.Id)) { 4646 return false; 4647 } 4648 4649 if (trySkipToken(AsmToken::RParen)) 4650 return true; 4651 4652 // parse optional params 4653 return 4654 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4655 parseExpr(Offset) && 4656 skipToken(AsmToken::Comma, "expected a comma") && 4657 parseExpr(Width) && 4658 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4659 } 4660 4661 void 4662 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4663 const int64_t Offset, 4664 const int64_t Width, 4665 const SMLoc Loc) { 4666 4667 using namespace llvm::AMDGPU::Hwreg; 4668 4669 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4670 Error(Loc, "specified hardware register is not supported on this GPU"); 4671 } else if (!isValidHwreg(HwReg.Id)) { 4672 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4673 } else if (!isValidHwregOffset(Offset)) { 4674 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4675 } else if (!isValidHwregWidth(Width)) { 4676 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4677 } 4678 } 4679 4680 OperandMatchResultTy 4681 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4682 using namespace llvm::AMDGPU::Hwreg; 4683 4684 int64_t ImmVal = 0; 4685 SMLoc Loc = getLoc(); 4686 4687 // If parse failed, do not return error code 4688 // to avoid excessive error messages. 4689 if (trySkipId("hwreg", AsmToken::LParen)) { 4690 OperandInfoTy HwReg(ID_UNKNOWN_); 4691 int64_t Offset = OFFSET_DEFAULT_; 4692 int64_t Width = WIDTH_DEFAULT_; 4693 if (parseHwregBody(HwReg, Offset, Width)) { 4694 validateHwreg(HwReg, Offset, Width, Loc); 4695 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4696 } 4697 } else if (parseExpr(ImmVal)) { 4698 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4699 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4700 } 4701 4702 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4703 return MatchOperand_Success; 4704 } 4705 4706 bool AMDGPUOperand::isHwreg() const { 4707 return isImmTy(ImmTyHwreg); 4708 } 4709 4710 //===----------------------------------------------------------------------===// 4711 // sendmsg 4712 //===----------------------------------------------------------------------===// 4713 4714 bool 4715 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4716 OperandInfoTy &Op, 4717 OperandInfoTy &Stream) { 4718 using namespace llvm::AMDGPU::SendMsg; 4719 4720 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4721 Msg.IsSymbolic = true; 4722 lex(); // skip message name 4723 } else if (!parseExpr(Msg.Id)) { 4724 return false; 4725 } 4726 4727 if (trySkipToken(AsmToken::Comma)) { 4728 Op.IsDefined = true; 4729 if (isToken(AsmToken::Identifier) && 4730 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4731 lex(); // skip operation name 4732 } else if (!parseExpr(Op.Id)) { 4733 return false; 4734 } 4735 4736 if (trySkipToken(AsmToken::Comma)) { 4737 Stream.IsDefined = true; 4738 if (!parseExpr(Stream.Id)) 4739 return false; 4740 } 4741 } 4742 4743 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4744 } 4745 4746 bool 4747 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4748 const OperandInfoTy &Op, 4749 const OperandInfoTy &Stream, 4750 const SMLoc S) { 4751 using namespace llvm::AMDGPU::SendMsg; 4752 4753 // Validation strictness depends on whether message is specified 4754 // in a symbolc or in a numeric form. In the latter case 4755 // only encoding possibility is checked. 4756 bool Strict = Msg.IsSymbolic; 4757 4758 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 4759 Error(S, "invalid message id"); 4760 return false; 4761 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 4762 Error(S, Op.IsDefined ? 4763 "message does not support operations" : 4764 "missing message operation"); 4765 return false; 4766 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 4767 Error(S, "invalid operation id"); 4768 return false; 4769 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 4770 Error(S, "message operation does not support streams"); 4771 return false; 4772 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 4773 Error(S, "invalid message stream id"); 4774 return false; 4775 } 4776 return true; 4777 } 4778 4779 OperandMatchResultTy 4780 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4781 using namespace llvm::AMDGPU::SendMsg; 4782 4783 int64_t ImmVal = 0; 4784 SMLoc Loc = getLoc(); 4785 4786 // If parse failed, do not return error code 4787 // to avoid excessive error messages. 4788 if (trySkipId("sendmsg", AsmToken::LParen)) { 4789 OperandInfoTy Msg(ID_UNKNOWN_); 4790 OperandInfoTy Op(OP_NONE_); 4791 OperandInfoTy Stream(STREAM_ID_NONE_); 4792 if (parseSendMsgBody(Msg, Op, Stream) && 4793 validateSendMsg(Msg, Op, Stream, Loc)) { 4794 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 4795 } 4796 } else if (parseExpr(ImmVal)) { 4797 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4798 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4799 } 4800 4801 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 4802 return MatchOperand_Success; 4803 } 4804 4805 bool AMDGPUOperand::isSendMsg() const { 4806 return isImmTy(ImmTySendMsg); 4807 } 4808 4809 //===----------------------------------------------------------------------===// 4810 // v_interp 4811 //===----------------------------------------------------------------------===// 4812 4813 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4814 if (getLexer().getKind() != AsmToken::Identifier) 4815 return MatchOperand_NoMatch; 4816 4817 StringRef Str = Parser.getTok().getString(); 4818 int Slot = StringSwitch<int>(Str) 4819 .Case("p10", 0) 4820 .Case("p20", 1) 4821 .Case("p0", 2) 4822 .Default(-1); 4823 4824 SMLoc S = Parser.getTok().getLoc(); 4825 if (Slot == -1) 4826 return MatchOperand_ParseFail; 4827 4828 Parser.Lex(); 4829 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4830 AMDGPUOperand::ImmTyInterpSlot)); 4831 return MatchOperand_Success; 4832 } 4833 4834 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4835 if (getLexer().getKind() != AsmToken::Identifier) 4836 return MatchOperand_NoMatch; 4837 4838 StringRef Str = Parser.getTok().getString(); 4839 if (!Str.startswith("attr")) 4840 return MatchOperand_NoMatch; 4841 4842 StringRef Chan = Str.take_back(2); 4843 int AttrChan = StringSwitch<int>(Chan) 4844 .Case(".x", 0) 4845 .Case(".y", 1) 4846 .Case(".z", 2) 4847 .Case(".w", 3) 4848 .Default(-1); 4849 if (AttrChan == -1) 4850 return MatchOperand_ParseFail; 4851 4852 Str = Str.drop_back(2).drop_front(4); 4853 4854 uint8_t Attr; 4855 if (Str.getAsInteger(10, Attr)) 4856 return MatchOperand_ParseFail; 4857 4858 SMLoc S = Parser.getTok().getLoc(); 4859 Parser.Lex(); 4860 if (Attr > 63) { 4861 Error(S, "out of bounds attr"); 4862 return MatchOperand_Success; 4863 } 4864 4865 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4866 4867 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4868 AMDGPUOperand::ImmTyInterpAttr)); 4869 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4870 AMDGPUOperand::ImmTyAttrChan)); 4871 return MatchOperand_Success; 4872 } 4873 4874 //===----------------------------------------------------------------------===// 4875 // exp 4876 //===----------------------------------------------------------------------===// 4877 4878 void AMDGPUAsmParser::errorExpTgt() { 4879 Error(Parser.getTok().getLoc(), "invalid exp target"); 4880 } 4881 4882 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4883 uint8_t &Val) { 4884 if (Str == "null") { 4885 Val = 9; 4886 return MatchOperand_Success; 4887 } 4888 4889 if (Str.startswith("mrt")) { 4890 Str = Str.drop_front(3); 4891 if (Str == "z") { // == mrtz 4892 Val = 8; 4893 return MatchOperand_Success; 4894 } 4895 4896 if (Str.getAsInteger(10, Val)) 4897 return MatchOperand_ParseFail; 4898 4899 if (Val > 7) 4900 errorExpTgt(); 4901 4902 return MatchOperand_Success; 4903 } 4904 4905 if (Str.startswith("pos")) { 4906 Str = Str.drop_front(3); 4907 if (Str.getAsInteger(10, Val)) 4908 return MatchOperand_ParseFail; 4909 4910 if (Val > 4 || (Val == 4 && !isGFX10())) 4911 errorExpTgt(); 4912 4913 Val += 12; 4914 return MatchOperand_Success; 4915 } 4916 4917 if (isGFX10() && Str == "prim") { 4918 Val = 20; 4919 return MatchOperand_Success; 4920 } 4921 4922 if (Str.startswith("param")) { 4923 Str = Str.drop_front(5); 4924 if (Str.getAsInteger(10, Val)) 4925 return MatchOperand_ParseFail; 4926 4927 if (Val >= 32) 4928 errorExpTgt(); 4929 4930 Val += 32; 4931 return MatchOperand_Success; 4932 } 4933 4934 if (Str.startswith("invalid_target_")) { 4935 Str = Str.drop_front(15); 4936 if (Str.getAsInteger(10, Val)) 4937 return MatchOperand_ParseFail; 4938 4939 errorExpTgt(); 4940 return MatchOperand_Success; 4941 } 4942 4943 return MatchOperand_NoMatch; 4944 } 4945 4946 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4947 uint8_t Val; 4948 StringRef Str = Parser.getTok().getString(); 4949 4950 auto Res = parseExpTgtImpl(Str, Val); 4951 if (Res != MatchOperand_Success) 4952 return Res; 4953 4954 SMLoc S = Parser.getTok().getLoc(); 4955 Parser.Lex(); 4956 4957 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4958 AMDGPUOperand::ImmTyExpTgt)); 4959 return MatchOperand_Success; 4960 } 4961 4962 //===----------------------------------------------------------------------===// 4963 // parser helpers 4964 //===----------------------------------------------------------------------===// 4965 4966 bool 4967 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4968 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4969 } 4970 4971 bool 4972 AMDGPUAsmParser::isId(const StringRef Id) const { 4973 return isId(getToken(), Id); 4974 } 4975 4976 bool 4977 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4978 return getTokenKind() == Kind; 4979 } 4980 4981 bool 4982 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4983 if (isId(Id)) { 4984 lex(); 4985 return true; 4986 } 4987 return false; 4988 } 4989 4990 bool 4991 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 4992 if (isId(Id) && peekToken().is(Kind)) { 4993 lex(); 4994 lex(); 4995 return true; 4996 } 4997 return false; 4998 } 4999 5000 bool 5001 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5002 if (isToken(Kind)) { 5003 lex(); 5004 return true; 5005 } 5006 return false; 5007 } 5008 5009 bool 5010 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5011 const StringRef ErrMsg) { 5012 if (!trySkipToken(Kind)) { 5013 Error(getLoc(), ErrMsg); 5014 return false; 5015 } 5016 return true; 5017 } 5018 5019 bool 5020 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5021 return !getParser().parseAbsoluteExpression(Imm); 5022 } 5023 5024 bool 5025 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5026 if (isToken(AsmToken::String)) { 5027 Val = getToken().getStringContents(); 5028 lex(); 5029 return true; 5030 } else { 5031 Error(getLoc(), ErrMsg); 5032 return false; 5033 } 5034 } 5035 5036 AsmToken 5037 AMDGPUAsmParser::getToken() const { 5038 return Parser.getTok(); 5039 } 5040 5041 AsmToken 5042 AMDGPUAsmParser::peekToken() { 5043 return getLexer().peekTok(); 5044 } 5045 5046 void 5047 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5048 auto TokCount = getLexer().peekTokens(Tokens); 5049 5050 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5051 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5052 } 5053 5054 AsmToken::TokenKind 5055 AMDGPUAsmParser::getTokenKind() const { 5056 return getLexer().getKind(); 5057 } 5058 5059 SMLoc 5060 AMDGPUAsmParser::getLoc() const { 5061 return getToken().getLoc(); 5062 } 5063 5064 StringRef 5065 AMDGPUAsmParser::getTokenStr() const { 5066 return getToken().getString(); 5067 } 5068 5069 void 5070 AMDGPUAsmParser::lex() { 5071 Parser.Lex(); 5072 } 5073 5074 //===----------------------------------------------------------------------===// 5075 // swizzle 5076 //===----------------------------------------------------------------------===// 5077 5078 LLVM_READNONE 5079 static unsigned 5080 encodeBitmaskPerm(const unsigned AndMask, 5081 const unsigned OrMask, 5082 const unsigned XorMask) { 5083 using namespace llvm::AMDGPU::Swizzle; 5084 5085 return BITMASK_PERM_ENC | 5086 (AndMask << BITMASK_AND_SHIFT) | 5087 (OrMask << BITMASK_OR_SHIFT) | 5088 (XorMask << BITMASK_XOR_SHIFT); 5089 } 5090 5091 bool 5092 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5093 const unsigned MinVal, 5094 const unsigned MaxVal, 5095 const StringRef ErrMsg) { 5096 for (unsigned i = 0; i < OpNum; ++i) { 5097 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5098 return false; 5099 } 5100 SMLoc ExprLoc = Parser.getTok().getLoc(); 5101 if (!parseExpr(Op[i])) { 5102 return false; 5103 } 5104 if (Op[i] < MinVal || Op[i] > MaxVal) { 5105 Error(ExprLoc, ErrMsg); 5106 return false; 5107 } 5108 } 5109 5110 return true; 5111 } 5112 5113 bool 5114 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5115 using namespace llvm::AMDGPU::Swizzle; 5116 5117 int64_t Lane[LANE_NUM]; 5118 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5119 "expected a 2-bit lane id")) { 5120 Imm = QUAD_PERM_ENC; 5121 for (unsigned I = 0; I < LANE_NUM; ++I) { 5122 Imm |= Lane[I] << (LANE_SHIFT * I); 5123 } 5124 return true; 5125 } 5126 return false; 5127 } 5128 5129 bool 5130 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5131 using namespace llvm::AMDGPU::Swizzle; 5132 5133 SMLoc S = Parser.getTok().getLoc(); 5134 int64_t GroupSize; 5135 int64_t LaneIdx; 5136 5137 if (!parseSwizzleOperands(1, &GroupSize, 5138 2, 32, 5139 "group size must be in the interval [2,32]")) { 5140 return false; 5141 } 5142 if (!isPowerOf2_64(GroupSize)) { 5143 Error(S, "group size must be a power of two"); 5144 return false; 5145 } 5146 if (parseSwizzleOperands(1, &LaneIdx, 5147 0, GroupSize - 1, 5148 "lane id must be in the interval [0,group size - 1]")) { 5149 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5150 return true; 5151 } 5152 return false; 5153 } 5154 5155 bool 5156 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5157 using namespace llvm::AMDGPU::Swizzle; 5158 5159 SMLoc S = Parser.getTok().getLoc(); 5160 int64_t GroupSize; 5161 5162 if (!parseSwizzleOperands(1, &GroupSize, 5163 2, 32, "group size must be in the interval [2,32]")) { 5164 return false; 5165 } 5166 if (!isPowerOf2_64(GroupSize)) { 5167 Error(S, "group size must be a power of two"); 5168 return false; 5169 } 5170 5171 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5172 return true; 5173 } 5174 5175 bool 5176 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5177 using namespace llvm::AMDGPU::Swizzle; 5178 5179 SMLoc S = Parser.getTok().getLoc(); 5180 int64_t GroupSize; 5181 5182 if (!parseSwizzleOperands(1, &GroupSize, 5183 1, 16, "group size must be in the interval [1,16]")) { 5184 return false; 5185 } 5186 if (!isPowerOf2_64(GroupSize)) { 5187 Error(S, "group size must be a power of two"); 5188 return false; 5189 } 5190 5191 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5192 return true; 5193 } 5194 5195 bool 5196 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5197 using namespace llvm::AMDGPU::Swizzle; 5198 5199 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5200 return false; 5201 } 5202 5203 StringRef Ctl; 5204 SMLoc StrLoc = Parser.getTok().getLoc(); 5205 if (!parseString(Ctl)) { 5206 return false; 5207 } 5208 if (Ctl.size() != BITMASK_WIDTH) { 5209 Error(StrLoc, "expected a 5-character mask"); 5210 return false; 5211 } 5212 5213 unsigned AndMask = 0; 5214 unsigned OrMask = 0; 5215 unsigned XorMask = 0; 5216 5217 for (size_t i = 0; i < Ctl.size(); ++i) { 5218 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5219 switch(Ctl[i]) { 5220 default: 5221 Error(StrLoc, "invalid mask"); 5222 return false; 5223 case '0': 5224 break; 5225 case '1': 5226 OrMask |= Mask; 5227 break; 5228 case 'p': 5229 AndMask |= Mask; 5230 break; 5231 case 'i': 5232 AndMask |= Mask; 5233 XorMask |= Mask; 5234 break; 5235 } 5236 } 5237 5238 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5239 return true; 5240 } 5241 5242 bool 5243 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5244 5245 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5246 5247 if (!parseExpr(Imm)) { 5248 return false; 5249 } 5250 if (!isUInt<16>(Imm)) { 5251 Error(OffsetLoc, "expected a 16-bit offset"); 5252 return false; 5253 } 5254 return true; 5255 } 5256 5257 bool 5258 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5259 using namespace llvm::AMDGPU::Swizzle; 5260 5261 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5262 5263 SMLoc ModeLoc = Parser.getTok().getLoc(); 5264 bool Ok = false; 5265 5266 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5267 Ok = parseSwizzleQuadPerm(Imm); 5268 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5269 Ok = parseSwizzleBitmaskPerm(Imm); 5270 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5271 Ok = parseSwizzleBroadcast(Imm); 5272 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5273 Ok = parseSwizzleSwap(Imm); 5274 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5275 Ok = parseSwizzleReverse(Imm); 5276 } else { 5277 Error(ModeLoc, "expected a swizzle mode"); 5278 } 5279 5280 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5281 } 5282 5283 return false; 5284 } 5285 5286 OperandMatchResultTy 5287 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5288 SMLoc S = Parser.getTok().getLoc(); 5289 int64_t Imm = 0; 5290 5291 if (trySkipId("offset")) { 5292 5293 bool Ok = false; 5294 if (skipToken(AsmToken::Colon, "expected a colon")) { 5295 if (trySkipId("swizzle")) { 5296 Ok = parseSwizzleMacro(Imm); 5297 } else { 5298 Ok = parseSwizzleOffset(Imm); 5299 } 5300 } 5301 5302 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5303 5304 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5305 } else { 5306 // Swizzle "offset" operand is optional. 5307 // If it is omitted, try parsing other optional operands. 5308 return parseOptionalOpr(Operands); 5309 } 5310 } 5311 5312 bool 5313 AMDGPUOperand::isSwizzle() const { 5314 return isImmTy(ImmTySwizzle); 5315 } 5316 5317 //===----------------------------------------------------------------------===// 5318 // VGPR Index Mode 5319 //===----------------------------------------------------------------------===// 5320 5321 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5322 5323 using namespace llvm::AMDGPU::VGPRIndexMode; 5324 5325 if (trySkipToken(AsmToken::RParen)) { 5326 return OFF; 5327 } 5328 5329 int64_t Imm = 0; 5330 5331 while (true) { 5332 unsigned Mode = 0; 5333 SMLoc S = Parser.getTok().getLoc(); 5334 5335 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5336 if (trySkipId(IdSymbolic[ModeId])) { 5337 Mode = 1 << ModeId; 5338 break; 5339 } 5340 } 5341 5342 if (Mode == 0) { 5343 Error(S, (Imm == 0)? 5344 "expected a VGPR index mode or a closing parenthesis" : 5345 "expected a VGPR index mode"); 5346 break; 5347 } 5348 5349 if (Imm & Mode) { 5350 Error(S, "duplicate VGPR index mode"); 5351 break; 5352 } 5353 Imm |= Mode; 5354 5355 if (trySkipToken(AsmToken::RParen)) 5356 break; 5357 if (!skipToken(AsmToken::Comma, 5358 "expected a comma or a closing parenthesis")) 5359 break; 5360 } 5361 5362 return Imm; 5363 } 5364 5365 OperandMatchResultTy 5366 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5367 5368 int64_t Imm = 0; 5369 SMLoc S = Parser.getTok().getLoc(); 5370 5371 if (getLexer().getKind() == AsmToken::Identifier && 5372 Parser.getTok().getString() == "gpr_idx" && 5373 getLexer().peekTok().is(AsmToken::LParen)) { 5374 5375 Parser.Lex(); 5376 Parser.Lex(); 5377 5378 // If parse failed, trigger an error but do not return error code 5379 // to avoid excessive error messages. 5380 Imm = parseGPRIdxMacro(); 5381 5382 } else { 5383 if (getParser().parseAbsoluteExpression(Imm)) 5384 return MatchOperand_NoMatch; 5385 if (Imm < 0 || !isUInt<4>(Imm)) { 5386 Error(S, "invalid immediate: only 4-bit values are legal"); 5387 } 5388 } 5389 5390 Operands.push_back( 5391 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5392 return MatchOperand_Success; 5393 } 5394 5395 bool AMDGPUOperand::isGPRIdxMode() const { 5396 return isImmTy(ImmTyGprIdxMode); 5397 } 5398 5399 //===----------------------------------------------------------------------===// 5400 // sopp branch targets 5401 //===----------------------------------------------------------------------===// 5402 5403 OperandMatchResultTy 5404 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5405 SMLoc S = Parser.getTok().getLoc(); 5406 5407 switch (getLexer().getKind()) { 5408 default: return MatchOperand_ParseFail; 5409 case AsmToken::Integer: { 5410 int64_t Imm; 5411 if (getParser().parseAbsoluteExpression(Imm)) 5412 return MatchOperand_ParseFail; 5413 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5414 return MatchOperand_Success; 5415 } 5416 5417 case AsmToken::Identifier: 5418 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5419 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5420 Parser.getTok().getString()), getContext()), S)); 5421 Parser.Lex(); 5422 return MatchOperand_Success; 5423 } 5424 } 5425 5426 //===----------------------------------------------------------------------===// 5427 // Boolean holding registers 5428 //===----------------------------------------------------------------------===// 5429 5430 OperandMatchResultTy 5431 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5432 return parseReg(Operands); 5433 } 5434 5435 //===----------------------------------------------------------------------===// 5436 // mubuf 5437 //===----------------------------------------------------------------------===// 5438 5439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5440 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5441 } 5442 5443 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5444 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5445 } 5446 5447 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5448 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5449 } 5450 5451 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5452 const OperandVector &Operands, 5453 bool IsAtomic, 5454 bool IsAtomicReturn, 5455 bool IsLds) { 5456 bool IsLdsOpcode = IsLds; 5457 bool HasLdsModifier = false; 5458 OptionalImmIndexMap OptionalIdx; 5459 assert(IsAtomicReturn ? IsAtomic : true); 5460 unsigned FirstOperandIdx = 1; 5461 5462 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5463 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5464 5465 // Add the register arguments 5466 if (Op.isReg()) { 5467 Op.addRegOperands(Inst, 1); 5468 // Insert a tied src for atomic return dst. 5469 // This cannot be postponed as subsequent calls to 5470 // addImmOperands rely on correct number of MC operands. 5471 if (IsAtomicReturn && i == FirstOperandIdx) 5472 Op.addRegOperands(Inst, 1); 5473 continue; 5474 } 5475 5476 // Handle the case where soffset is an immediate 5477 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5478 Op.addImmOperands(Inst, 1); 5479 continue; 5480 } 5481 5482 HasLdsModifier |= Op.isLDS(); 5483 5484 // Handle tokens like 'offen' which are sometimes hard-coded into the 5485 // asm string. There are no MCInst operands for these. 5486 if (Op.isToken()) { 5487 continue; 5488 } 5489 assert(Op.isImm()); 5490 5491 // Handle optional arguments 5492 OptionalIdx[Op.getImmTy()] = i; 5493 } 5494 5495 // This is a workaround for an llvm quirk which may result in an 5496 // incorrect instruction selection. Lds and non-lds versions of 5497 // MUBUF instructions are identical except that lds versions 5498 // have mandatory 'lds' modifier. However this modifier follows 5499 // optional modifiers and llvm asm matcher regards this 'lds' 5500 // modifier as an optional one. As a result, an lds version 5501 // of opcode may be selected even if it has no 'lds' modifier. 5502 if (IsLdsOpcode && !HasLdsModifier) { 5503 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5504 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5505 Inst.setOpcode(NoLdsOpcode); 5506 IsLdsOpcode = false; 5507 } 5508 } 5509 5510 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5511 if (!IsAtomic) { // glc is hard-coded. 5512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5513 } 5514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5515 5516 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5517 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5518 } 5519 5520 if (isGFX10()) 5521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5522 } 5523 5524 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5525 OptionalImmIndexMap OptionalIdx; 5526 5527 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5528 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5529 5530 // Add the register arguments 5531 if (Op.isReg()) { 5532 Op.addRegOperands(Inst, 1); 5533 continue; 5534 } 5535 5536 // Handle the case where soffset is an immediate 5537 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5538 Op.addImmOperands(Inst, 1); 5539 continue; 5540 } 5541 5542 // Handle tokens like 'offen' which are sometimes hard-coded into the 5543 // asm string. There are no MCInst operands for these. 5544 if (Op.isToken()) { 5545 continue; 5546 } 5547 assert(Op.isImm()); 5548 5549 // Handle optional arguments 5550 OptionalIdx[Op.getImmTy()] = i; 5551 } 5552 5553 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5554 AMDGPUOperand::ImmTyOffset); 5555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5556 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5557 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5558 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5559 5560 if (isGFX10()) 5561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5562 } 5563 5564 //===----------------------------------------------------------------------===// 5565 // mimg 5566 //===----------------------------------------------------------------------===// 5567 5568 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5569 bool IsAtomic) { 5570 unsigned I = 1; 5571 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5572 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5573 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5574 } 5575 5576 if (IsAtomic) { 5577 // Add src, same as dst 5578 assert(Desc.getNumDefs() == 1); 5579 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5580 } 5581 5582 OptionalImmIndexMap OptionalIdx; 5583 5584 for (unsigned E = Operands.size(); I != E; ++I) { 5585 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5586 5587 // Add the register arguments 5588 if (Op.isReg()) { 5589 Op.addRegOperands(Inst, 1); 5590 } else if (Op.isImmModifier()) { 5591 OptionalIdx[Op.getImmTy()] = I; 5592 } else if (!Op.isToken()) { 5593 llvm_unreachable("unexpected operand type"); 5594 } 5595 } 5596 5597 bool IsGFX10 = isGFX10(); 5598 5599 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5600 if (IsGFX10) 5601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5603 if (IsGFX10) 5604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5606 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5610 if (!IsGFX10) 5611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5613 } 5614 5615 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5616 cvtMIMG(Inst, Operands, true); 5617 } 5618 5619 //===----------------------------------------------------------------------===// 5620 // smrd 5621 //===----------------------------------------------------------------------===// 5622 5623 bool AMDGPUOperand::isSMRDOffset8() const { 5624 return isImm() && isUInt<8>(getImm()); 5625 } 5626 5627 bool AMDGPUOperand::isSMRDOffset20() const { 5628 return isImm() && isUInt<20>(getImm()); 5629 } 5630 5631 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5632 // 32-bit literals are only supported on CI and we only want to use them 5633 // when the offset is > 8-bits. 5634 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5635 } 5636 5637 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5638 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5639 } 5640 5641 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5642 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5643 } 5644 5645 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5646 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5647 } 5648 5649 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5650 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5651 } 5652 5653 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5654 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5655 } 5656 5657 //===----------------------------------------------------------------------===// 5658 // vop3 5659 //===----------------------------------------------------------------------===// 5660 5661 static bool ConvertOmodMul(int64_t &Mul) { 5662 if (Mul != 1 && Mul != 2 && Mul != 4) 5663 return false; 5664 5665 Mul >>= 1; 5666 return true; 5667 } 5668 5669 static bool ConvertOmodDiv(int64_t &Div) { 5670 if (Div == 1) { 5671 Div = 0; 5672 return true; 5673 } 5674 5675 if (Div == 2) { 5676 Div = 3; 5677 return true; 5678 } 5679 5680 return false; 5681 } 5682 5683 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5684 if (BoundCtrl == 0) { 5685 BoundCtrl = 1; 5686 return true; 5687 } 5688 5689 if (BoundCtrl == -1) { 5690 BoundCtrl = 0; 5691 return true; 5692 } 5693 5694 return false; 5695 } 5696 5697 // Note: the order in this table matches the order of operands in AsmString. 5698 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5699 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5700 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5701 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5702 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5703 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5704 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5705 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5706 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5707 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5708 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5709 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5710 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5711 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5712 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5713 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5714 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5715 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5716 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5717 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5718 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5719 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5720 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5721 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5722 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5723 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5724 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5725 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5726 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5727 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5728 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5729 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5730 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5731 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5732 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5733 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5734 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5735 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5736 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5737 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5738 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5739 }; 5740 5741 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5742 unsigned size = Operands.size(); 5743 assert(size > 0); 5744 5745 OperandMatchResultTy res = parseOptionalOpr(Operands); 5746 5747 // This is a hack to enable hardcoded mandatory operands which follow 5748 // optional operands. 5749 // 5750 // Current design assumes that all operands after the first optional operand 5751 // are also optional. However implementation of some instructions violates 5752 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5753 // 5754 // To alleviate this problem, we have to (implicitly) parse extra operands 5755 // to make sure autogenerated parser of custom operands never hit hardcoded 5756 // mandatory operands. 5757 5758 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5759 5760 // We have parsed the first optional operand. 5761 // Parse as many operands as necessary to skip all mandatory operands. 5762 5763 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5764 if (res != MatchOperand_Success || 5765 getLexer().is(AsmToken::EndOfStatement)) break; 5766 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5767 res = parseOptionalOpr(Operands); 5768 } 5769 } 5770 5771 return res; 5772 } 5773 5774 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5775 OperandMatchResultTy res; 5776 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5777 // try to parse any optional operand here 5778 if (Op.IsBit) { 5779 res = parseNamedBit(Op.Name, Operands, Op.Type); 5780 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5781 res = parseOModOperand(Operands); 5782 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5783 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5784 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5785 res = parseSDWASel(Operands, Op.Name, Op.Type); 5786 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5787 res = parseSDWADstUnused(Operands); 5788 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5789 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5790 Op.Type == AMDGPUOperand::ImmTyNegLo || 5791 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5792 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5793 Op.ConvertResult); 5794 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5795 res = parseDim(Operands); 5796 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5797 res = parseDfmtNfmt(Operands); 5798 } else { 5799 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5800 } 5801 if (res != MatchOperand_NoMatch) { 5802 return res; 5803 } 5804 } 5805 return MatchOperand_NoMatch; 5806 } 5807 5808 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5809 StringRef Name = Parser.getTok().getString(); 5810 if (Name == "mul") { 5811 return parseIntWithPrefix("mul", Operands, 5812 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5813 } 5814 5815 if (Name == "div") { 5816 return parseIntWithPrefix("div", Operands, 5817 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5818 } 5819 5820 return MatchOperand_NoMatch; 5821 } 5822 5823 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5824 cvtVOP3P(Inst, Operands); 5825 5826 int Opc = Inst.getOpcode(); 5827 5828 int SrcNum; 5829 const int Ops[] = { AMDGPU::OpName::src0, 5830 AMDGPU::OpName::src1, 5831 AMDGPU::OpName::src2 }; 5832 for (SrcNum = 0; 5833 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5834 ++SrcNum); 5835 assert(SrcNum > 0); 5836 5837 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5838 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5839 5840 if ((OpSel & (1 << SrcNum)) != 0) { 5841 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5842 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5843 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5844 } 5845 } 5846 5847 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5848 // 1. This operand is input modifiers 5849 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5850 // 2. This is not last operand 5851 && Desc.NumOperands > (OpNum + 1) 5852 // 3. Next operand is register class 5853 && Desc.OpInfo[OpNum + 1].RegClass != -1 5854 // 4. Next register is not tied to any other operand 5855 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5856 } 5857 5858 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5859 { 5860 OptionalImmIndexMap OptionalIdx; 5861 unsigned Opc = Inst.getOpcode(); 5862 5863 unsigned I = 1; 5864 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5865 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5866 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5867 } 5868 5869 for (unsigned E = Operands.size(); I != E; ++I) { 5870 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5871 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5872 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5873 } else if (Op.isInterpSlot() || 5874 Op.isInterpAttr() || 5875 Op.isAttrChan()) { 5876 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5877 } else if (Op.isImmModifier()) { 5878 OptionalIdx[Op.getImmTy()] = I; 5879 } else { 5880 llvm_unreachable("unhandled operand type"); 5881 } 5882 } 5883 5884 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5886 } 5887 5888 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5889 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5890 } 5891 5892 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5894 } 5895 } 5896 5897 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5898 OptionalImmIndexMap &OptionalIdx) { 5899 unsigned Opc = Inst.getOpcode(); 5900 5901 unsigned I = 1; 5902 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5903 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5904 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5905 } 5906 5907 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5908 // This instruction has src modifiers 5909 for (unsigned E = Operands.size(); I != E; ++I) { 5910 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5911 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5912 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5913 } else if (Op.isImmModifier()) { 5914 OptionalIdx[Op.getImmTy()] = I; 5915 } else if (Op.isRegOrImm()) { 5916 Op.addRegOrImmOperands(Inst, 1); 5917 } else { 5918 llvm_unreachable("unhandled operand type"); 5919 } 5920 } 5921 } else { 5922 // No src modifiers 5923 for (unsigned E = Operands.size(); I != E; ++I) { 5924 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5925 if (Op.isMod()) { 5926 OptionalIdx[Op.getImmTy()] = I; 5927 } else { 5928 Op.addRegOrImmOperands(Inst, 1); 5929 } 5930 } 5931 } 5932 5933 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5935 } 5936 5937 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5939 } 5940 5941 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 5942 // it has src2 register operand that is tied to dst operand 5943 // we don't allow modifiers for this operand in assembler so src2_modifiers 5944 // should be 0. 5945 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 5946 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 5947 Opc == AMDGPU::V_MAC_F32_e64_vi || 5948 Opc == AMDGPU::V_MAC_F16_e64_vi || 5949 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 5950 Opc == AMDGPU::V_FMAC_F32_e64_vi || 5951 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 5952 auto it = Inst.begin(); 5953 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5954 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5955 ++it; 5956 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5957 } 5958 } 5959 5960 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5961 OptionalImmIndexMap OptionalIdx; 5962 cvtVOP3(Inst, Operands, OptionalIdx); 5963 } 5964 5965 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5966 const OperandVector &Operands) { 5967 OptionalImmIndexMap OptIdx; 5968 const int Opc = Inst.getOpcode(); 5969 const MCInstrDesc &Desc = MII.get(Opc); 5970 5971 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5972 5973 cvtVOP3(Inst, Operands, OptIdx); 5974 5975 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5976 assert(!IsPacked); 5977 Inst.addOperand(Inst.getOperand(0)); 5978 } 5979 5980 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5981 // instruction, and then figure out where to actually put the modifiers 5982 5983 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5984 5985 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5986 if (OpSelHiIdx != -1) { 5987 int DefaultVal = IsPacked ? -1 : 0; 5988 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5989 DefaultVal); 5990 } 5991 5992 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5993 if (NegLoIdx != -1) { 5994 assert(IsPacked); 5995 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5996 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5997 } 5998 5999 const int Ops[] = { AMDGPU::OpName::src0, 6000 AMDGPU::OpName::src1, 6001 AMDGPU::OpName::src2 }; 6002 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6003 AMDGPU::OpName::src1_modifiers, 6004 AMDGPU::OpName::src2_modifiers }; 6005 6006 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6007 6008 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6009 unsigned OpSelHi = 0; 6010 unsigned NegLo = 0; 6011 unsigned NegHi = 0; 6012 6013 if (OpSelHiIdx != -1) { 6014 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6015 } 6016 6017 if (NegLoIdx != -1) { 6018 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6019 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6020 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6021 } 6022 6023 for (int J = 0; J < 3; ++J) { 6024 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6025 if (OpIdx == -1) 6026 break; 6027 6028 uint32_t ModVal = 0; 6029 6030 if ((OpSel & (1 << J)) != 0) 6031 ModVal |= SISrcMods::OP_SEL_0; 6032 6033 if ((OpSelHi & (1 << J)) != 0) 6034 ModVal |= SISrcMods::OP_SEL_1; 6035 6036 if ((NegLo & (1 << J)) != 0) 6037 ModVal |= SISrcMods::NEG; 6038 6039 if ((NegHi & (1 << J)) != 0) 6040 ModVal |= SISrcMods::NEG_HI; 6041 6042 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6043 6044 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6045 } 6046 } 6047 6048 //===----------------------------------------------------------------------===// 6049 // dpp 6050 //===----------------------------------------------------------------------===// 6051 6052 bool AMDGPUOperand::isDPP8() const { 6053 return isImmTy(ImmTyDPP8); 6054 } 6055 6056 bool AMDGPUOperand::isDPPCtrl() const { 6057 using namespace AMDGPU::DPP; 6058 6059 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6060 if (result) { 6061 int64_t Imm = getImm(); 6062 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6063 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6064 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6065 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6066 (Imm == DppCtrl::WAVE_SHL1) || 6067 (Imm == DppCtrl::WAVE_ROL1) || 6068 (Imm == DppCtrl::WAVE_SHR1) || 6069 (Imm == DppCtrl::WAVE_ROR1) || 6070 (Imm == DppCtrl::ROW_MIRROR) || 6071 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6072 (Imm == DppCtrl::BCAST15) || 6073 (Imm == DppCtrl::BCAST31) || 6074 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6075 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6076 } 6077 return false; 6078 } 6079 6080 bool AMDGPUOperand::isS16Imm() const { 6081 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6082 } 6083 6084 bool AMDGPUOperand::isU16Imm() const { 6085 return isImm() && isUInt<16>(getImm()); 6086 } 6087 6088 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6089 if (!isGFX10()) 6090 return MatchOperand_NoMatch; 6091 6092 SMLoc S = Parser.getTok().getLoc(); 6093 6094 if (getLexer().isNot(AsmToken::Identifier)) 6095 return MatchOperand_NoMatch; 6096 if (getLexer().getTok().getString() != "dim") 6097 return MatchOperand_NoMatch; 6098 6099 Parser.Lex(); 6100 if (getLexer().isNot(AsmToken::Colon)) 6101 return MatchOperand_ParseFail; 6102 6103 Parser.Lex(); 6104 6105 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6106 // integer. 6107 std::string Token; 6108 if (getLexer().is(AsmToken::Integer)) { 6109 SMLoc Loc = getLexer().getTok().getEndLoc(); 6110 Token = getLexer().getTok().getString(); 6111 Parser.Lex(); 6112 if (getLexer().getTok().getLoc() != Loc) 6113 return MatchOperand_ParseFail; 6114 } 6115 if (getLexer().isNot(AsmToken::Identifier)) 6116 return MatchOperand_ParseFail; 6117 Token += getLexer().getTok().getString(); 6118 6119 StringRef DimId = Token; 6120 if (DimId.startswith("SQ_RSRC_IMG_")) 6121 DimId = DimId.substr(12); 6122 6123 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6124 if (!DimInfo) 6125 return MatchOperand_ParseFail; 6126 6127 Parser.Lex(); 6128 6129 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6130 AMDGPUOperand::ImmTyDim)); 6131 return MatchOperand_Success; 6132 } 6133 6134 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6135 SMLoc S = Parser.getTok().getLoc(); 6136 StringRef Prefix; 6137 6138 if (getLexer().getKind() == AsmToken::Identifier) { 6139 Prefix = Parser.getTok().getString(); 6140 } else { 6141 return MatchOperand_NoMatch; 6142 } 6143 6144 if (Prefix != "dpp8") 6145 return parseDPPCtrl(Operands); 6146 if (!isGFX10()) 6147 return MatchOperand_NoMatch; 6148 6149 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6150 6151 int64_t Sels[8]; 6152 6153 Parser.Lex(); 6154 if (getLexer().isNot(AsmToken::Colon)) 6155 return MatchOperand_ParseFail; 6156 6157 Parser.Lex(); 6158 if (getLexer().isNot(AsmToken::LBrac)) 6159 return MatchOperand_ParseFail; 6160 6161 Parser.Lex(); 6162 if (getParser().parseAbsoluteExpression(Sels[0])) 6163 return MatchOperand_ParseFail; 6164 if (0 > Sels[0] || 7 < Sels[0]) 6165 return MatchOperand_ParseFail; 6166 6167 for (size_t i = 1; i < 8; ++i) { 6168 if (getLexer().isNot(AsmToken::Comma)) 6169 return MatchOperand_ParseFail; 6170 6171 Parser.Lex(); 6172 if (getParser().parseAbsoluteExpression(Sels[i])) 6173 return MatchOperand_ParseFail; 6174 if (0 > Sels[i] || 7 < Sels[i]) 6175 return MatchOperand_ParseFail; 6176 } 6177 6178 if (getLexer().isNot(AsmToken::RBrac)) 6179 return MatchOperand_ParseFail; 6180 Parser.Lex(); 6181 6182 unsigned DPP8 = 0; 6183 for (size_t i = 0; i < 8; ++i) 6184 DPP8 |= (Sels[i] << (i * 3)); 6185 6186 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6187 return MatchOperand_Success; 6188 } 6189 6190 OperandMatchResultTy 6191 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6192 using namespace AMDGPU::DPP; 6193 6194 SMLoc S = Parser.getTok().getLoc(); 6195 StringRef Prefix; 6196 int64_t Int; 6197 6198 if (getLexer().getKind() == AsmToken::Identifier) { 6199 Prefix = Parser.getTok().getString(); 6200 } else { 6201 return MatchOperand_NoMatch; 6202 } 6203 6204 if (Prefix == "row_mirror") { 6205 Int = DppCtrl::ROW_MIRROR; 6206 Parser.Lex(); 6207 } else if (Prefix == "row_half_mirror") { 6208 Int = DppCtrl::ROW_HALF_MIRROR; 6209 Parser.Lex(); 6210 } else { 6211 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6212 if (Prefix != "quad_perm" 6213 && Prefix != "row_shl" 6214 && Prefix != "row_shr" 6215 && Prefix != "row_ror" 6216 && Prefix != "wave_shl" 6217 && Prefix != "wave_rol" 6218 && Prefix != "wave_shr" 6219 && Prefix != "wave_ror" 6220 && Prefix != "row_bcast" 6221 && Prefix != "row_share" 6222 && Prefix != "row_xmask") { 6223 return MatchOperand_NoMatch; 6224 } 6225 6226 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6227 return MatchOperand_NoMatch; 6228 6229 if (!isVI() && !isGFX9() && 6230 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6231 Prefix == "wave_rol" || Prefix == "wave_ror" || 6232 Prefix == "row_bcast")) 6233 return MatchOperand_NoMatch; 6234 6235 Parser.Lex(); 6236 if (getLexer().isNot(AsmToken::Colon)) 6237 return MatchOperand_ParseFail; 6238 6239 if (Prefix == "quad_perm") { 6240 // quad_perm:[%d,%d,%d,%d] 6241 Parser.Lex(); 6242 if (getLexer().isNot(AsmToken::LBrac)) 6243 return MatchOperand_ParseFail; 6244 Parser.Lex(); 6245 6246 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6247 return MatchOperand_ParseFail; 6248 6249 for (int i = 0; i < 3; ++i) { 6250 if (getLexer().isNot(AsmToken::Comma)) 6251 return MatchOperand_ParseFail; 6252 Parser.Lex(); 6253 6254 int64_t Temp; 6255 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6256 return MatchOperand_ParseFail; 6257 const int shift = i*2 + 2; 6258 Int += (Temp << shift); 6259 } 6260 6261 if (getLexer().isNot(AsmToken::RBrac)) 6262 return MatchOperand_ParseFail; 6263 Parser.Lex(); 6264 } else { 6265 // sel:%d 6266 Parser.Lex(); 6267 if (getParser().parseAbsoluteExpression(Int)) 6268 return MatchOperand_ParseFail; 6269 6270 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6271 Int |= DppCtrl::ROW_SHL0; 6272 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6273 Int |= DppCtrl::ROW_SHR0; 6274 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6275 Int |= DppCtrl::ROW_ROR0; 6276 } else if (Prefix == "wave_shl" && 1 == Int) { 6277 Int = DppCtrl::WAVE_SHL1; 6278 } else if (Prefix == "wave_rol" && 1 == Int) { 6279 Int = DppCtrl::WAVE_ROL1; 6280 } else if (Prefix == "wave_shr" && 1 == Int) { 6281 Int = DppCtrl::WAVE_SHR1; 6282 } else if (Prefix == "wave_ror" && 1 == Int) { 6283 Int = DppCtrl::WAVE_ROR1; 6284 } else if (Prefix == "row_bcast") { 6285 if (Int == 15) { 6286 Int = DppCtrl::BCAST15; 6287 } else if (Int == 31) { 6288 Int = DppCtrl::BCAST31; 6289 } else { 6290 return MatchOperand_ParseFail; 6291 } 6292 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6293 Int |= DppCtrl::ROW_SHARE_FIRST; 6294 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6295 Int |= DppCtrl::ROW_XMASK_FIRST; 6296 } else { 6297 return MatchOperand_ParseFail; 6298 } 6299 } 6300 } 6301 6302 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6303 return MatchOperand_Success; 6304 } 6305 6306 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6307 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6308 } 6309 6310 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6311 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6312 } 6313 6314 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6315 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6316 } 6317 6318 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6319 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6320 } 6321 6322 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6323 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6324 } 6325 6326 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6327 OptionalImmIndexMap OptionalIdx; 6328 6329 unsigned I = 1; 6330 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6331 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6332 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6333 } 6334 6335 int Fi = 0; 6336 for (unsigned E = Operands.size(); I != E; ++I) { 6337 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6338 MCOI::TIED_TO); 6339 if (TiedTo != -1) { 6340 assert((unsigned)TiedTo < Inst.getNumOperands()); 6341 // handle tied old or src2 for MAC instructions 6342 Inst.addOperand(Inst.getOperand(TiedTo)); 6343 } 6344 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6345 // Add the register arguments 6346 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6347 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6348 // Skip it. 6349 continue; 6350 } 6351 6352 if (IsDPP8) { 6353 if (Op.isDPP8()) { 6354 Op.addImmOperands(Inst, 1); 6355 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6356 Op.addRegWithFPInputModsOperands(Inst, 2); 6357 } else if (Op.isFI()) { 6358 Fi = Op.getImm(); 6359 } else if (Op.isReg()) { 6360 Op.addRegOperands(Inst, 1); 6361 } else { 6362 llvm_unreachable("Invalid operand type"); 6363 } 6364 } else { 6365 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6366 Op.addRegWithFPInputModsOperands(Inst, 2); 6367 } else if (Op.isDPPCtrl()) { 6368 Op.addImmOperands(Inst, 1); 6369 } else if (Op.isImm()) { 6370 // Handle optional arguments 6371 OptionalIdx[Op.getImmTy()] = I; 6372 } else { 6373 llvm_unreachable("Invalid operand type"); 6374 } 6375 } 6376 } 6377 6378 if (IsDPP8) { 6379 using namespace llvm::AMDGPU::DPP; 6380 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6381 } else { 6382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6385 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6387 } 6388 } 6389 } 6390 6391 //===----------------------------------------------------------------------===// 6392 // sdwa 6393 //===----------------------------------------------------------------------===// 6394 6395 OperandMatchResultTy 6396 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6397 AMDGPUOperand::ImmTy Type) { 6398 using namespace llvm::AMDGPU::SDWA; 6399 6400 SMLoc S = Parser.getTok().getLoc(); 6401 StringRef Value; 6402 OperandMatchResultTy res; 6403 6404 res = parseStringWithPrefix(Prefix, Value); 6405 if (res != MatchOperand_Success) { 6406 return res; 6407 } 6408 6409 int64_t Int; 6410 Int = StringSwitch<int64_t>(Value) 6411 .Case("BYTE_0", SdwaSel::BYTE_0) 6412 .Case("BYTE_1", SdwaSel::BYTE_1) 6413 .Case("BYTE_2", SdwaSel::BYTE_2) 6414 .Case("BYTE_3", SdwaSel::BYTE_3) 6415 .Case("WORD_0", SdwaSel::WORD_0) 6416 .Case("WORD_1", SdwaSel::WORD_1) 6417 .Case("DWORD", SdwaSel::DWORD) 6418 .Default(0xffffffff); 6419 Parser.Lex(); // eat last token 6420 6421 if (Int == 0xffffffff) { 6422 return MatchOperand_ParseFail; 6423 } 6424 6425 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6426 return MatchOperand_Success; 6427 } 6428 6429 OperandMatchResultTy 6430 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6431 using namespace llvm::AMDGPU::SDWA; 6432 6433 SMLoc S = Parser.getTok().getLoc(); 6434 StringRef Value; 6435 OperandMatchResultTy res; 6436 6437 res = parseStringWithPrefix("dst_unused", Value); 6438 if (res != MatchOperand_Success) { 6439 return res; 6440 } 6441 6442 int64_t Int; 6443 Int = StringSwitch<int64_t>(Value) 6444 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6445 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6446 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6447 .Default(0xffffffff); 6448 Parser.Lex(); // eat last token 6449 6450 if (Int == 0xffffffff) { 6451 return MatchOperand_ParseFail; 6452 } 6453 6454 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6455 return MatchOperand_Success; 6456 } 6457 6458 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6459 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6460 } 6461 6462 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6463 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6464 } 6465 6466 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6467 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6468 } 6469 6470 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6471 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6472 } 6473 6474 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6475 uint64_t BasicInstType, bool skipVcc) { 6476 using namespace llvm::AMDGPU::SDWA; 6477 6478 OptionalImmIndexMap OptionalIdx; 6479 bool skippedVcc = false; 6480 6481 unsigned I = 1; 6482 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6483 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6484 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6485 } 6486 6487 for (unsigned E = Operands.size(); I != E; ++I) { 6488 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6489 if (skipVcc && !skippedVcc && Op.isReg() && 6490 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6491 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6492 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6493 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6494 // Skip VCC only if we didn't skip it on previous iteration. 6495 if (BasicInstType == SIInstrFlags::VOP2 && 6496 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6497 skippedVcc = true; 6498 continue; 6499 } else if (BasicInstType == SIInstrFlags::VOPC && 6500 Inst.getNumOperands() == 0) { 6501 skippedVcc = true; 6502 continue; 6503 } 6504 } 6505 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6506 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6507 } else if (Op.isImm()) { 6508 // Handle optional arguments 6509 OptionalIdx[Op.getImmTy()] = I; 6510 } else { 6511 llvm_unreachable("Invalid operand type"); 6512 } 6513 skippedVcc = false; 6514 } 6515 6516 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6517 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6518 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6519 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6520 switch (BasicInstType) { 6521 case SIInstrFlags::VOP1: 6522 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6523 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6525 } 6526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6529 break; 6530 6531 case SIInstrFlags::VOP2: 6532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6533 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6534 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6535 } 6536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6538 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6540 break; 6541 6542 case SIInstrFlags::VOPC: 6543 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6544 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6546 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6547 break; 6548 6549 default: 6550 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6551 } 6552 } 6553 6554 // special case v_mac_{f16, f32}: 6555 // it has src2 register operand that is tied to dst operand 6556 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6557 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6558 auto it = Inst.begin(); 6559 std::advance( 6560 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6561 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6562 } 6563 } 6564 6565 /// Force static initialization. 6566 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6567 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6568 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6569 } 6570 6571 #define GET_REGISTER_MATCHER 6572 #define GET_MATCHER_IMPLEMENTATION 6573 #define GET_MNEMONIC_SPELL_CHECKER 6574 #include "AMDGPUGenAsmMatcher.inc" 6575 6576 // This fuction should be defined after auto-generated include so that we have 6577 // MatchClassKind enum defined 6578 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6579 unsigned Kind) { 6580 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6581 // But MatchInstructionImpl() expects to meet token and fails to validate 6582 // operand. This method checks if we are given immediate operand but expect to 6583 // get corresponding token. 6584 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6585 switch (Kind) { 6586 case MCK_addr64: 6587 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6588 case MCK_gds: 6589 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6590 case MCK_lds: 6591 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6592 case MCK_glc: 6593 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6594 case MCK_idxen: 6595 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6596 case MCK_offen: 6597 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6598 case MCK_SSrcB32: 6599 // When operands have expression values, they will return true for isToken, 6600 // because it is not possible to distinguish between a token and an 6601 // expression at parse time. MatchInstructionImpl() will always try to 6602 // match an operand as a token, when isToken returns true, and when the 6603 // name of the expression is not a valid token, the match will fail, 6604 // so we need to handle it here. 6605 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6606 case MCK_SSrcF32: 6607 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6608 case MCK_SoppBrTarget: 6609 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6610 case MCK_VReg32OrOff: 6611 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6612 case MCK_InterpSlot: 6613 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6614 case MCK_Attr: 6615 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6616 case MCK_AttrChan: 6617 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6618 default: 6619 return Match_InvalidOperand; 6620 } 6621 } 6622 6623 //===----------------------------------------------------------------------===// 6624 // endpgm 6625 //===----------------------------------------------------------------------===// 6626 6627 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6628 SMLoc S = Parser.getTok().getLoc(); 6629 int64_t Imm = 0; 6630 6631 if (!parseExpr(Imm)) { 6632 // The operand is optional, if not present default to 0 6633 Imm = 0; 6634 } 6635 6636 if (!isUInt<16>(Imm)) { 6637 Error(S, "expected a 16-bit value"); 6638 return MatchOperand_ParseFail; 6639 } 6640 6641 Operands.push_back( 6642 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6643 return MatchOperand_Success; 6644 } 6645 6646 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6647