1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcF16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMRDOffset20() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 ImmTy getImmTy() const { 693 assert(isImm()); 694 return Imm.Type; 695 } 696 697 unsigned getReg() const override { 698 assert(isRegKind()); 699 return Reg.RegNo; 700 } 701 702 SMLoc getStartLoc() const override { 703 return StartLoc; 704 } 705 706 SMLoc getEndLoc() const override { 707 return EndLoc; 708 } 709 710 SMRange getLocRange() const { 711 return SMRange(StartLoc, EndLoc); 712 } 713 714 Modifiers getModifiers() const { 715 assert(isRegKind() || isImmTy(ImmTyNone)); 716 return isRegKind() ? Reg.Mods : Imm.Mods; 717 } 718 719 void setModifiers(Modifiers Mods) { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 if (isRegKind()) 722 Reg.Mods = Mods; 723 else 724 Imm.Mods = Mods; 725 } 726 727 bool hasModifiers() const { 728 return getModifiers().hasModifiers(); 729 } 730 731 bool hasFPModifiers() const { 732 return getModifiers().hasFPModifiers(); 733 } 734 735 bool hasIntModifiers() const { 736 return getModifiers().hasIntModifiers(); 737 } 738 739 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 740 741 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 742 743 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 744 745 template <unsigned Bitwidth> 746 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 747 748 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 749 addKImmFPOperands<16>(Inst, N); 750 } 751 752 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 753 addKImmFPOperands<32>(Inst, N); 754 } 755 756 void addRegOperands(MCInst &Inst, unsigned N) const; 757 758 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 759 addRegOperands(Inst, N); 760 } 761 762 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 763 if (isRegKind()) 764 addRegOperands(Inst, N); 765 else if (isExpr()) 766 Inst.addOperand(MCOperand::createExpr(Expr)); 767 else 768 addImmOperands(Inst, N); 769 } 770 771 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 772 Modifiers Mods = getModifiers(); 773 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 774 if (isRegKind()) { 775 addRegOperands(Inst, N); 776 } else { 777 addImmOperands(Inst, N, false); 778 } 779 } 780 781 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 782 assert(!hasIntModifiers()); 783 addRegOrImmWithInputModsOperands(Inst, N); 784 } 785 786 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasFPModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 792 Modifiers Mods = getModifiers(); 793 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 794 assert(isRegKind()); 795 addRegOperands(Inst, N); 796 } 797 798 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 799 assert(!hasIntModifiers()); 800 addRegWithInputModsOperands(Inst, N); 801 } 802 803 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasFPModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 809 if (isImm()) 810 addImmOperands(Inst, N); 811 else { 812 assert(isExpr()); 813 Inst.addOperand(MCOperand::createExpr(Expr)); 814 } 815 } 816 817 static void printImmTy(raw_ostream& OS, ImmTy Type) { 818 switch (Type) { 819 case ImmTyNone: OS << "None"; break; 820 case ImmTyGDS: OS << "GDS"; break; 821 case ImmTyLDS: OS << "LDS"; break; 822 case ImmTyOffen: OS << "Offen"; break; 823 case ImmTyIdxen: OS << "Idxen"; break; 824 case ImmTyAddr64: OS << "Addr64"; break; 825 case ImmTyOffset: OS << "Offset"; break; 826 case ImmTyInstOffset: OS << "InstOffset"; break; 827 case ImmTyOffset0: OS << "Offset0"; break; 828 case ImmTyOffset1: OS << "Offset1"; break; 829 case ImmTyDLC: OS << "DLC"; break; 830 case ImmTyGLC: OS << "GLC"; break; 831 case ImmTySLC: OS << "SLC"; break; 832 case ImmTySWZ: OS << "SWZ"; break; 833 case ImmTyTFE: OS << "TFE"; break; 834 case ImmTyD16: OS << "D16"; break; 835 case ImmTyFORMAT: OS << "FORMAT"; break; 836 case ImmTyClampSI: OS << "ClampSI"; break; 837 case ImmTyOModSI: OS << "OModSI"; break; 838 case ImmTyDPP8: OS << "DPP8"; break; 839 case ImmTyDppCtrl: OS << "DppCtrl"; break; 840 case ImmTyDppRowMask: OS << "DppRowMask"; break; 841 case ImmTyDppBankMask: OS << "DppBankMask"; break; 842 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 843 case ImmTyDppFi: OS << "FI"; break; 844 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 845 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 846 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 847 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 848 case ImmTyDMask: OS << "DMask"; break; 849 case ImmTyDim: OS << "Dim"; break; 850 case ImmTyUNorm: OS << "UNorm"; break; 851 case ImmTyDA: OS << "DA"; break; 852 case ImmTyR128A16: OS << "R128A16"; break; 853 case ImmTyA16: OS << "A16"; break; 854 case ImmTyLWE: OS << "LWE"; break; 855 case ImmTyOff: OS << "Off"; break; 856 case ImmTyExpTgt: OS << "ExpTgt"; break; 857 case ImmTyExpCompr: OS << "ExpCompr"; break; 858 case ImmTyExpVM: OS << "ExpVM"; break; 859 case ImmTyHwreg: OS << "Hwreg"; break; 860 case ImmTySendMsg: OS << "SendMsg"; break; 861 case ImmTyInterpSlot: OS << "InterpSlot"; break; 862 case ImmTyInterpAttr: OS << "InterpAttr"; break; 863 case ImmTyAttrChan: OS << "AttrChan"; break; 864 case ImmTyOpSel: OS << "OpSel"; break; 865 case ImmTyOpSelHi: OS << "OpSelHi"; break; 866 case ImmTyNegLo: OS << "NegLo"; break; 867 case ImmTyNegHi: OS << "NegHi"; break; 868 case ImmTySwizzle: OS << "Swizzle"; break; 869 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 870 case ImmTyHigh: OS << "High"; break; 871 case ImmTyBLGP: OS << "BLGP"; break; 872 case ImmTyCBSZ: OS << "CBSZ"; break; 873 case ImmTyABID: OS << "ABID"; break; 874 case ImmTyEndpgm: OS << "Endpgm"; break; 875 } 876 } 877 878 void print(raw_ostream &OS) const override { 879 switch (Kind) { 880 case Register: 881 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 882 break; 883 case Immediate: 884 OS << '<' << getImm(); 885 if (getImmTy() != ImmTyNone) { 886 OS << " type: "; printImmTy(OS, getImmTy()); 887 } 888 OS << " mods: " << Imm.Mods << '>'; 889 break; 890 case Token: 891 OS << '\'' << getToken() << '\''; 892 break; 893 case Expression: 894 OS << "<expr " << *Expr << '>'; 895 break; 896 } 897 } 898 899 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 900 int64_t Val, SMLoc Loc, 901 ImmTy Type = ImmTyNone, 902 bool IsFPImm = false) { 903 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 904 Op->Imm.Val = Val; 905 Op->Imm.IsFPImm = IsFPImm; 906 Op->Imm.Type = Type; 907 Op->Imm.Mods = Modifiers(); 908 Op->StartLoc = Loc; 909 Op->EndLoc = Loc; 910 return Op; 911 } 912 913 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 914 StringRef Str, SMLoc Loc, 915 bool HasExplicitEncodingSize = true) { 916 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 917 Res->Tok.Data = Str.data(); 918 Res->Tok.Length = Str.size(); 919 Res->StartLoc = Loc; 920 Res->EndLoc = Loc; 921 return Res; 922 } 923 924 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 925 unsigned RegNo, SMLoc S, 926 SMLoc E) { 927 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 928 Op->Reg.RegNo = RegNo; 929 Op->Reg.Mods = Modifiers(); 930 Op->StartLoc = S; 931 Op->EndLoc = E; 932 return Op; 933 } 934 935 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 936 const class MCExpr *Expr, SMLoc S) { 937 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 938 Op->Expr = Expr; 939 Op->StartLoc = S; 940 Op->EndLoc = S; 941 return Op; 942 } 943 }; 944 945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 946 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 947 return OS; 948 } 949 950 //===----------------------------------------------------------------------===// 951 // AsmParser 952 //===----------------------------------------------------------------------===// 953 954 // Holds info related to the current kernel, e.g. count of SGPRs used. 955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 956 // .amdgpu_hsa_kernel or at EOF. 957 class KernelScopeInfo { 958 int SgprIndexUnusedMin = -1; 959 int VgprIndexUnusedMin = -1; 960 MCContext *Ctx = nullptr; 961 962 void usesSgprAt(int i) { 963 if (i >= SgprIndexUnusedMin) { 964 SgprIndexUnusedMin = ++i; 965 if (Ctx) { 966 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 967 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 968 } 969 } 970 } 971 972 void usesVgprAt(int i) { 973 if (i >= VgprIndexUnusedMin) { 974 VgprIndexUnusedMin = ++i; 975 if (Ctx) { 976 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 977 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 978 } 979 } 980 } 981 982 public: 983 KernelScopeInfo() = default; 984 985 void initialize(MCContext &Context) { 986 Ctx = &Context; 987 usesSgprAt(SgprIndexUnusedMin = -1); 988 usesVgprAt(VgprIndexUnusedMin = -1); 989 } 990 991 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 992 switch (RegKind) { 993 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 994 case IS_AGPR: // fall through 995 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 996 default: break; 997 } 998 } 999 }; 1000 1001 class AMDGPUAsmParser : public MCTargetAsmParser { 1002 MCAsmParser &Parser; 1003 1004 // Number of extra operands parsed after the first optional operand. 1005 // This may be necessary to skip hardcoded mandatory operands. 1006 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1007 1008 unsigned ForcedEncodingSize = 0; 1009 bool ForcedDPP = false; 1010 bool ForcedSDWA = false; 1011 KernelScopeInfo KernelScope; 1012 1013 /// @name Auto-generated Match Functions 1014 /// { 1015 1016 #define GET_ASSEMBLER_HEADER 1017 #include "AMDGPUGenAsmMatcher.inc" 1018 1019 /// } 1020 1021 private: 1022 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1023 bool OutOfRangeError(SMRange Range); 1024 /// Calculate VGPR/SGPR blocks required for given target, reserved 1025 /// registers, and user-specified NextFreeXGPR values. 1026 /// 1027 /// \param Features [in] Target features, used for bug corrections. 1028 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1029 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1030 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1031 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1032 /// descriptor field, if valid. 1033 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1034 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1035 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1036 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1037 /// \param VGPRBlocks [out] Result VGPR block count. 1038 /// \param SGPRBlocks [out] Result SGPR block count. 1039 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1040 bool FlatScrUsed, bool XNACKUsed, 1041 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1042 SMRange VGPRRange, unsigned NextFreeSGPR, 1043 SMRange SGPRRange, unsigned &VGPRBlocks, 1044 unsigned &SGPRBlocks); 1045 bool ParseDirectiveAMDGCNTarget(); 1046 bool ParseDirectiveAMDHSAKernel(); 1047 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1048 bool ParseDirectiveHSACodeObjectVersion(); 1049 bool ParseDirectiveHSACodeObjectISA(); 1050 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1051 bool ParseDirectiveAMDKernelCodeT(); 1052 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1053 bool ParseDirectiveAMDGPUHsaKernel(); 1054 1055 bool ParseDirectiveISAVersion(); 1056 bool ParseDirectiveHSAMetadata(); 1057 bool ParseDirectivePALMetadataBegin(); 1058 bool ParseDirectivePALMetadata(); 1059 bool ParseDirectiveAMDGPULDS(); 1060 1061 /// Common code to parse out a block of text (typically YAML) between start and 1062 /// end directives. 1063 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1064 const char *AssemblerDirectiveEnd, 1065 std::string &CollectString); 1066 1067 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1068 RegisterKind RegKind, unsigned Reg1); 1069 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1070 unsigned &RegNum, unsigned &RegWidth, 1071 bool RestoreOnFailure = false); 1072 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1073 unsigned &RegNum, unsigned &RegWidth, 1074 SmallVectorImpl<AsmToken> &Tokens); 1075 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1076 unsigned &RegWidth, 1077 SmallVectorImpl<AsmToken> &Tokens); 1078 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1079 unsigned &RegWidth, 1080 SmallVectorImpl<AsmToken> &Tokens); 1081 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1082 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1083 bool ParseRegRange(unsigned& Num, unsigned& Width); 1084 unsigned getRegularReg(RegisterKind RegKind, 1085 unsigned RegNum, 1086 unsigned RegWidth); 1087 1088 bool isRegister(); 1089 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1090 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1091 void initializeGprCountSymbol(RegisterKind RegKind); 1092 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1093 unsigned RegWidth); 1094 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1095 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1096 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1097 bool IsGdsHardcoded); 1098 1099 public: 1100 enum AMDGPUMatchResultTy { 1101 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1102 }; 1103 enum OperandMode { 1104 OperandMode_Default, 1105 OperandMode_NSA, 1106 }; 1107 1108 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1109 1110 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1111 const MCInstrInfo &MII, 1112 const MCTargetOptions &Options) 1113 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1114 MCAsmParserExtension::Initialize(Parser); 1115 1116 if (getFeatureBits().none()) { 1117 // Set default features. 1118 copySTI().ToggleFeature("southern-islands"); 1119 } 1120 1121 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1122 1123 { 1124 // TODO: make those pre-defined variables read-only. 1125 // Currently there is none suitable machinery in the core llvm-mc for this. 1126 // MCSymbol::isRedefinable is intended for another purpose, and 1127 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1129 MCContext &Ctx = getContext(); 1130 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1131 MCSymbol *Sym = 1132 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1134 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1135 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1136 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1138 } else { 1139 MCSymbol *Sym = 1140 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1144 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1145 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1146 } 1147 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1148 initializeGprCountSymbol(IS_VGPR); 1149 initializeGprCountSymbol(IS_SGPR); 1150 } else 1151 KernelScope.initialize(getContext()); 1152 } 1153 } 1154 1155 bool hasXNACK() const { 1156 return AMDGPU::hasXNACK(getSTI()); 1157 } 1158 1159 bool hasMIMG_R128() const { 1160 return AMDGPU::hasMIMG_R128(getSTI()); 1161 } 1162 1163 bool hasPackedD16() const { 1164 return AMDGPU::hasPackedD16(getSTI()); 1165 } 1166 1167 bool hasGFX10A16() const { 1168 return AMDGPU::hasGFX10A16(getSTI()); 1169 } 1170 1171 bool isSI() const { 1172 return AMDGPU::isSI(getSTI()); 1173 } 1174 1175 bool isCI() const { 1176 return AMDGPU::isCI(getSTI()); 1177 } 1178 1179 bool isVI() const { 1180 return AMDGPU::isVI(getSTI()); 1181 } 1182 1183 bool isGFX9() const { 1184 return AMDGPU::isGFX9(getSTI()); 1185 } 1186 1187 bool isGFX10() const { 1188 return AMDGPU::isGFX10(getSTI()); 1189 } 1190 1191 bool hasInv2PiInlineImm() const { 1192 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1193 } 1194 1195 bool hasFlatOffsets() const { 1196 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1197 } 1198 1199 bool hasSGPR102_SGPR103() const { 1200 return !isVI() && !isGFX9(); 1201 } 1202 1203 bool hasSGPR104_SGPR105() const { 1204 return isGFX10(); 1205 } 1206 1207 bool hasIntClamp() const { 1208 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1209 } 1210 1211 AMDGPUTargetStreamer &getTargetStreamer() { 1212 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1213 return static_cast<AMDGPUTargetStreamer &>(TS); 1214 } 1215 1216 const MCRegisterInfo *getMRI() const { 1217 // We need this const_cast because for some reason getContext() is not const 1218 // in MCAsmParser. 1219 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1220 } 1221 1222 const MCInstrInfo *getMII() const { 1223 return &MII; 1224 } 1225 1226 const FeatureBitset &getFeatureBits() const { 1227 return getSTI().getFeatureBits(); 1228 } 1229 1230 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1231 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1232 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1233 1234 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1235 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1236 bool isForcedDPP() const { return ForcedDPP; } 1237 bool isForcedSDWA() const { return ForcedSDWA; } 1238 ArrayRef<unsigned> getMatchedVariants() const; 1239 1240 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1241 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1242 bool RestoreOnFailure); 1243 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1244 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1245 SMLoc &EndLoc) override; 1246 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1247 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1248 unsigned Kind) override; 1249 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1250 OperandVector &Operands, MCStreamer &Out, 1251 uint64_t &ErrorInfo, 1252 bool MatchingInlineAsm) override; 1253 bool ParseDirective(AsmToken DirectiveID) override; 1254 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1255 OperandMode Mode = OperandMode_Default); 1256 StringRef parseMnemonicSuffix(StringRef Name); 1257 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1258 SMLoc NameLoc, OperandVector &Operands) override; 1259 //bool ProcessInstruction(MCInst &Inst); 1260 1261 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1262 1263 OperandMatchResultTy 1264 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1265 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1266 bool (*ConvertResult)(int64_t &) = nullptr); 1267 1268 OperandMatchResultTy 1269 parseOperandArrayWithPrefix(const char *Prefix, 1270 OperandVector &Operands, 1271 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1272 bool (*ConvertResult)(int64_t&) = nullptr); 1273 1274 OperandMatchResultTy 1275 parseNamedBit(const char *Name, OperandVector &Operands, 1276 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1277 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1278 StringRef &Value); 1279 1280 bool isModifier(); 1281 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1282 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1283 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1284 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1285 bool parseSP3NegModifier(); 1286 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1287 OperandMatchResultTy parseReg(OperandVector &Operands); 1288 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1289 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1290 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1291 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1292 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1293 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1294 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1295 1296 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1297 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1298 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1299 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1300 1301 bool parseCnt(int64_t &IntVal); 1302 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1303 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1304 1305 private: 1306 struct OperandInfoTy { 1307 int64_t Id; 1308 bool IsSymbolic = false; 1309 bool IsDefined = false; 1310 1311 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1312 }; 1313 1314 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1315 bool validateSendMsg(const OperandInfoTy &Msg, 1316 const OperandInfoTy &Op, 1317 const OperandInfoTy &Stream, 1318 const SMLoc Loc); 1319 1320 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1321 bool validateHwreg(const OperandInfoTy &HwReg, 1322 const int64_t Offset, 1323 const int64_t Width, 1324 const SMLoc Loc); 1325 1326 void errorExpTgt(); 1327 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1328 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1329 1330 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1331 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1332 bool validateSOPLiteral(const MCInst &Inst) const; 1333 bool validateConstantBusLimitations(const MCInst &Inst); 1334 bool validateEarlyClobberLimitations(const MCInst &Inst); 1335 bool validateIntClampSupported(const MCInst &Inst); 1336 bool validateMIMGAtomicDMask(const MCInst &Inst); 1337 bool validateMIMGGatherDMask(const MCInst &Inst); 1338 bool validateMovrels(const MCInst &Inst); 1339 bool validateMIMGDataSize(const MCInst &Inst); 1340 bool validateMIMGAddrSize(const MCInst &Inst); 1341 bool validateMIMGD16(const MCInst &Inst); 1342 bool validateMIMGDim(const MCInst &Inst); 1343 bool validateLdsDirect(const MCInst &Inst); 1344 bool validateOpSel(const MCInst &Inst); 1345 bool validateVccOperand(unsigned Reg) const; 1346 bool validateVOP3Literal(const MCInst &Inst) const; 1347 unsigned getConstantBusLimit(unsigned Opcode) const; 1348 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1349 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1350 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1351 1352 bool isId(const StringRef Id) const; 1353 bool isId(const AsmToken &Token, const StringRef Id) const; 1354 bool isToken(const AsmToken::TokenKind Kind) const; 1355 bool trySkipId(const StringRef Id); 1356 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1357 bool trySkipToken(const AsmToken::TokenKind Kind); 1358 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1359 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1360 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1361 AsmToken::TokenKind getTokenKind() const; 1362 bool parseExpr(int64_t &Imm); 1363 bool parseExpr(OperandVector &Operands); 1364 StringRef getTokenStr() const; 1365 AsmToken peekToken(); 1366 AsmToken getToken() const; 1367 SMLoc getLoc() const; 1368 void lex(); 1369 1370 public: 1371 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1372 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1373 1374 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1375 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1376 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1377 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1378 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1379 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1380 1381 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1382 const unsigned MinVal, 1383 const unsigned MaxVal, 1384 const StringRef ErrMsg); 1385 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1386 bool parseSwizzleOffset(int64_t &Imm); 1387 bool parseSwizzleMacro(int64_t &Imm); 1388 bool parseSwizzleQuadPerm(int64_t &Imm); 1389 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1390 bool parseSwizzleBroadcast(int64_t &Imm); 1391 bool parseSwizzleSwap(int64_t &Imm); 1392 bool parseSwizzleReverse(int64_t &Imm); 1393 1394 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1395 int64_t parseGPRIdxMacro(); 1396 1397 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1398 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1399 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1400 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1401 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1402 1403 AMDGPUOperand::Ptr defaultDLC() const; 1404 AMDGPUOperand::Ptr defaultGLC() const; 1405 AMDGPUOperand::Ptr defaultSLC() const; 1406 1407 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1408 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1409 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1410 AMDGPUOperand::Ptr defaultFlatOffset() const; 1411 1412 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1413 1414 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1415 OptionalImmIndexMap &OptionalIdx); 1416 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1417 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1418 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1419 1420 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1421 1422 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1423 bool IsAtomic = false); 1424 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1425 1426 OperandMatchResultTy parseDim(OperandVector &Operands); 1427 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1428 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1429 AMDGPUOperand::Ptr defaultRowMask() const; 1430 AMDGPUOperand::Ptr defaultBankMask() const; 1431 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1432 AMDGPUOperand::Ptr defaultFI() const; 1433 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1434 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1435 1436 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1437 AMDGPUOperand::ImmTy Type); 1438 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1439 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1440 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1441 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1442 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1443 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1444 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1445 uint64_t BasicInstType, 1446 bool SkipDstVcc = false, 1447 bool SkipSrcVcc = false); 1448 1449 AMDGPUOperand::Ptr defaultBLGP() const; 1450 AMDGPUOperand::Ptr defaultCBSZ() const; 1451 AMDGPUOperand::Ptr defaultABID() const; 1452 1453 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1454 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1455 }; 1456 1457 struct OptionalOperand { 1458 const char *Name; 1459 AMDGPUOperand::ImmTy Type; 1460 bool IsBit; 1461 bool (*ConvertResult)(int64_t&); 1462 }; 1463 1464 } // end anonymous namespace 1465 1466 // May be called with integer type with equivalent bitwidth. 1467 static const fltSemantics *getFltSemantics(unsigned Size) { 1468 switch (Size) { 1469 case 4: 1470 return &APFloat::IEEEsingle(); 1471 case 8: 1472 return &APFloat::IEEEdouble(); 1473 case 2: 1474 return &APFloat::IEEEhalf(); 1475 default: 1476 llvm_unreachable("unsupported fp type"); 1477 } 1478 } 1479 1480 static const fltSemantics *getFltSemantics(MVT VT) { 1481 return getFltSemantics(VT.getSizeInBits() / 8); 1482 } 1483 1484 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1485 switch (OperandType) { 1486 case AMDGPU::OPERAND_REG_IMM_INT32: 1487 case AMDGPU::OPERAND_REG_IMM_FP32: 1488 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1489 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1490 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1491 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1492 return &APFloat::IEEEsingle(); 1493 case AMDGPU::OPERAND_REG_IMM_INT64: 1494 case AMDGPU::OPERAND_REG_IMM_FP64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1496 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1497 return &APFloat::IEEEdouble(); 1498 case AMDGPU::OPERAND_REG_IMM_INT16: 1499 case AMDGPU::OPERAND_REG_IMM_FP16: 1500 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1501 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1502 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1503 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1504 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1505 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1506 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1507 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1508 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1509 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1510 return &APFloat::IEEEhalf(); 1511 default: 1512 llvm_unreachable("unsupported fp type"); 1513 } 1514 } 1515 1516 //===----------------------------------------------------------------------===// 1517 // Operand 1518 //===----------------------------------------------------------------------===// 1519 1520 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1521 bool Lost; 1522 1523 // Convert literal to single precision 1524 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1525 APFloat::rmNearestTiesToEven, 1526 &Lost); 1527 // We allow precision lost but not overflow or underflow 1528 if (Status != APFloat::opOK && 1529 Lost && 1530 ((Status & APFloat::opOverflow) != 0 || 1531 (Status & APFloat::opUnderflow) != 0)) { 1532 return false; 1533 } 1534 1535 return true; 1536 } 1537 1538 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1539 return isUIntN(Size, Val) || isIntN(Size, Val); 1540 } 1541 1542 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1543 1544 // This is a hack to enable named inline values like 1545 // shared_base with both 32-bit and 64-bit operands. 1546 // Note that these values are defined as 1547 // 32-bit operands only. 1548 if (isInlineValue()) { 1549 return true; 1550 } 1551 1552 if (!isImmTy(ImmTyNone)) { 1553 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1554 return false; 1555 } 1556 // TODO: We should avoid using host float here. It would be better to 1557 // check the float bit values which is what a few other places do. 1558 // We've had bot failures before due to weird NaN support on mips hosts. 1559 1560 APInt Literal(64, Imm.Val); 1561 1562 if (Imm.IsFPImm) { // We got fp literal token 1563 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1564 return AMDGPU::isInlinableLiteral64(Imm.Val, 1565 AsmParser->hasInv2PiInlineImm()); 1566 } 1567 1568 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1569 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1570 return false; 1571 1572 if (type.getScalarSizeInBits() == 16) { 1573 return AMDGPU::isInlinableLiteral16( 1574 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1575 AsmParser->hasInv2PiInlineImm()); 1576 } 1577 1578 // Check if single precision literal is inlinable 1579 return AMDGPU::isInlinableLiteral32( 1580 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1581 AsmParser->hasInv2PiInlineImm()); 1582 } 1583 1584 // We got int literal token. 1585 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1586 return AMDGPU::isInlinableLiteral64(Imm.Val, 1587 AsmParser->hasInv2PiInlineImm()); 1588 } 1589 1590 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1591 return false; 1592 } 1593 1594 if (type.getScalarSizeInBits() == 16) { 1595 return AMDGPU::isInlinableLiteral16( 1596 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1597 AsmParser->hasInv2PiInlineImm()); 1598 } 1599 1600 return AMDGPU::isInlinableLiteral32( 1601 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1602 AsmParser->hasInv2PiInlineImm()); 1603 } 1604 1605 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1606 // Check that this immediate can be added as literal 1607 if (!isImmTy(ImmTyNone)) { 1608 return false; 1609 } 1610 1611 if (!Imm.IsFPImm) { 1612 // We got int literal token. 1613 1614 if (type == MVT::f64 && hasFPModifiers()) { 1615 // Cannot apply fp modifiers to int literals preserving the same semantics 1616 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1617 // disable these cases. 1618 return false; 1619 } 1620 1621 unsigned Size = type.getSizeInBits(); 1622 if (Size == 64) 1623 Size = 32; 1624 1625 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1626 // types. 1627 return isSafeTruncation(Imm.Val, Size); 1628 } 1629 1630 // We got fp literal token 1631 if (type == MVT::f64) { // Expected 64-bit fp operand 1632 // We would set low 64-bits of literal to zeroes but we accept this literals 1633 return true; 1634 } 1635 1636 if (type == MVT::i64) { // Expected 64-bit int operand 1637 // We don't allow fp literals in 64-bit integer instructions. It is 1638 // unclear how we should encode them. 1639 return false; 1640 } 1641 1642 // We allow fp literals with f16x2 operands assuming that the specified 1643 // literal goes into the lower half and the upper half is zero. We also 1644 // require that the literal may be losslesly converted to f16. 1645 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1646 (type == MVT::v2i16)? MVT::i16 : type; 1647 1648 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1649 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1650 } 1651 1652 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1653 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1654 } 1655 1656 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1657 if (AsmParser->isVI()) 1658 return isVReg32(); 1659 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1660 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1661 else 1662 return false; 1663 } 1664 1665 bool AMDGPUOperand::isSDWAFP16Operand() const { 1666 return isSDWAOperand(MVT::f16); 1667 } 1668 1669 bool AMDGPUOperand::isSDWAFP32Operand() const { 1670 return isSDWAOperand(MVT::f32); 1671 } 1672 1673 bool AMDGPUOperand::isSDWAInt16Operand() const { 1674 return isSDWAOperand(MVT::i16); 1675 } 1676 1677 bool AMDGPUOperand::isSDWAInt32Operand() const { 1678 return isSDWAOperand(MVT::i32); 1679 } 1680 1681 bool AMDGPUOperand::isBoolReg() const { 1682 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1683 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1684 } 1685 1686 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1687 { 1688 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1689 assert(Size == 2 || Size == 4 || Size == 8); 1690 1691 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1692 1693 if (Imm.Mods.Abs) { 1694 Val &= ~FpSignMask; 1695 } 1696 if (Imm.Mods.Neg) { 1697 Val ^= FpSignMask; 1698 } 1699 1700 return Val; 1701 } 1702 1703 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1704 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1705 Inst.getNumOperands())) { 1706 addLiteralImmOperand(Inst, Imm.Val, 1707 ApplyModifiers & 1708 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1709 } else { 1710 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1711 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1712 } 1713 } 1714 1715 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1716 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1717 auto OpNum = Inst.getNumOperands(); 1718 // Check that this operand accepts literals 1719 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1720 1721 if (ApplyModifiers) { 1722 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1723 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1724 Val = applyInputFPModifiers(Val, Size); 1725 } 1726 1727 APInt Literal(64, Val); 1728 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1729 1730 if (Imm.IsFPImm) { // We got fp literal token 1731 switch (OpTy) { 1732 case AMDGPU::OPERAND_REG_IMM_INT64: 1733 case AMDGPU::OPERAND_REG_IMM_FP64: 1734 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1735 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1736 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1737 AsmParser->hasInv2PiInlineImm())) { 1738 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1739 return; 1740 } 1741 1742 // Non-inlineable 1743 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1744 // For fp operands we check if low 32 bits are zeros 1745 if (Literal.getLoBits(32) != 0) { 1746 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1747 "Can't encode literal as exact 64-bit floating-point operand. " 1748 "Low 32-bits will be set to zero"); 1749 } 1750 1751 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1752 return; 1753 } 1754 1755 // We don't allow fp literals in 64-bit integer instructions. It is 1756 // unclear how we should encode them. This case should be checked earlier 1757 // in predicate methods (isLiteralImm()) 1758 llvm_unreachable("fp literal in 64-bit integer instruction."); 1759 1760 case AMDGPU::OPERAND_REG_IMM_INT32: 1761 case AMDGPU::OPERAND_REG_IMM_FP32: 1762 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1763 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1764 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1766 case AMDGPU::OPERAND_REG_IMM_INT16: 1767 case AMDGPU::OPERAND_REG_IMM_FP16: 1768 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1769 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1770 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1771 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1772 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1773 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1774 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1775 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1776 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1777 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1778 bool lost; 1779 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1780 // Convert literal to single precision 1781 FPLiteral.convert(*getOpFltSemantics(OpTy), 1782 APFloat::rmNearestTiesToEven, &lost); 1783 // We allow precision lost but not overflow or underflow. This should be 1784 // checked earlier in isLiteralImm() 1785 1786 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1787 Inst.addOperand(MCOperand::createImm(ImmVal)); 1788 return; 1789 } 1790 default: 1791 llvm_unreachable("invalid operand size"); 1792 } 1793 1794 return; 1795 } 1796 1797 // We got int literal token. 1798 // Only sign extend inline immediates. 1799 switch (OpTy) { 1800 case AMDGPU::OPERAND_REG_IMM_INT32: 1801 case AMDGPU::OPERAND_REG_IMM_FP32: 1802 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1803 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1806 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1807 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1808 if (isSafeTruncation(Val, 32) && 1809 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1810 AsmParser->hasInv2PiInlineImm())) { 1811 Inst.addOperand(MCOperand::createImm(Val)); 1812 return; 1813 } 1814 1815 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1816 return; 1817 1818 case AMDGPU::OPERAND_REG_IMM_INT64: 1819 case AMDGPU::OPERAND_REG_IMM_FP64: 1820 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1821 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1822 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1823 Inst.addOperand(MCOperand::createImm(Val)); 1824 return; 1825 } 1826 1827 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1828 return; 1829 1830 case AMDGPU::OPERAND_REG_IMM_INT16: 1831 case AMDGPU::OPERAND_REG_IMM_FP16: 1832 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1833 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1834 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1835 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1836 if (isSafeTruncation(Val, 16) && 1837 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1838 AsmParser->hasInv2PiInlineImm())) { 1839 Inst.addOperand(MCOperand::createImm(Val)); 1840 return; 1841 } 1842 1843 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1844 return; 1845 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1850 assert(isSafeTruncation(Val, 16)); 1851 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1852 AsmParser->hasInv2PiInlineImm())); 1853 1854 Inst.addOperand(MCOperand::createImm(Val)); 1855 return; 1856 } 1857 default: 1858 llvm_unreachable("invalid operand size"); 1859 } 1860 } 1861 1862 template <unsigned Bitwidth> 1863 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1864 APInt Literal(64, Imm.Val); 1865 1866 if (!Imm.IsFPImm) { 1867 // We got int literal token. 1868 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1869 return; 1870 } 1871 1872 bool Lost; 1873 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1874 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1875 APFloat::rmNearestTiesToEven, &Lost); 1876 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1877 } 1878 1879 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1880 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1881 } 1882 1883 static bool isInlineValue(unsigned Reg) { 1884 switch (Reg) { 1885 case AMDGPU::SRC_SHARED_BASE: 1886 case AMDGPU::SRC_SHARED_LIMIT: 1887 case AMDGPU::SRC_PRIVATE_BASE: 1888 case AMDGPU::SRC_PRIVATE_LIMIT: 1889 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1890 return true; 1891 case AMDGPU::SRC_VCCZ: 1892 case AMDGPU::SRC_EXECZ: 1893 case AMDGPU::SRC_SCC: 1894 return true; 1895 case AMDGPU::SGPR_NULL: 1896 return true; 1897 default: 1898 return false; 1899 } 1900 } 1901 1902 bool AMDGPUOperand::isInlineValue() const { 1903 return isRegKind() && ::isInlineValue(getReg()); 1904 } 1905 1906 //===----------------------------------------------------------------------===// 1907 // AsmParser 1908 //===----------------------------------------------------------------------===// 1909 1910 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1911 if (Is == IS_VGPR) { 1912 switch (RegWidth) { 1913 default: return -1; 1914 case 1: return AMDGPU::VGPR_32RegClassID; 1915 case 2: return AMDGPU::VReg_64RegClassID; 1916 case 3: return AMDGPU::VReg_96RegClassID; 1917 case 4: return AMDGPU::VReg_128RegClassID; 1918 case 5: return AMDGPU::VReg_160RegClassID; 1919 case 6: return AMDGPU::VReg_192RegClassID; 1920 case 8: return AMDGPU::VReg_256RegClassID; 1921 case 16: return AMDGPU::VReg_512RegClassID; 1922 case 32: return AMDGPU::VReg_1024RegClassID; 1923 } 1924 } else if (Is == IS_TTMP) { 1925 switch (RegWidth) { 1926 default: return -1; 1927 case 1: return AMDGPU::TTMP_32RegClassID; 1928 case 2: return AMDGPU::TTMP_64RegClassID; 1929 case 4: return AMDGPU::TTMP_128RegClassID; 1930 case 8: return AMDGPU::TTMP_256RegClassID; 1931 case 16: return AMDGPU::TTMP_512RegClassID; 1932 } 1933 } else if (Is == IS_SGPR) { 1934 switch (RegWidth) { 1935 default: return -1; 1936 case 1: return AMDGPU::SGPR_32RegClassID; 1937 case 2: return AMDGPU::SGPR_64RegClassID; 1938 case 3: return AMDGPU::SGPR_96RegClassID; 1939 case 4: return AMDGPU::SGPR_128RegClassID; 1940 case 5: return AMDGPU::SGPR_160RegClassID; 1941 case 6: return AMDGPU::SGPR_192RegClassID; 1942 case 8: return AMDGPU::SGPR_256RegClassID; 1943 case 16: return AMDGPU::SGPR_512RegClassID; 1944 } 1945 } else if (Is == IS_AGPR) { 1946 switch (RegWidth) { 1947 default: return -1; 1948 case 1: return AMDGPU::AGPR_32RegClassID; 1949 case 2: return AMDGPU::AReg_64RegClassID; 1950 case 3: return AMDGPU::AReg_96RegClassID; 1951 case 4: return AMDGPU::AReg_128RegClassID; 1952 case 5: return AMDGPU::AReg_160RegClassID; 1953 case 6: return AMDGPU::AReg_192RegClassID; 1954 case 8: return AMDGPU::AReg_256RegClassID; 1955 case 16: return AMDGPU::AReg_512RegClassID; 1956 case 32: return AMDGPU::AReg_1024RegClassID; 1957 } 1958 } 1959 return -1; 1960 } 1961 1962 static unsigned getSpecialRegForName(StringRef RegName) { 1963 return StringSwitch<unsigned>(RegName) 1964 .Case("exec", AMDGPU::EXEC) 1965 .Case("vcc", AMDGPU::VCC) 1966 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1967 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1968 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1969 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1970 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1971 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1972 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1973 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1974 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1975 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1976 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1977 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1978 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1979 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1980 .Case("m0", AMDGPU::M0) 1981 .Case("vccz", AMDGPU::SRC_VCCZ) 1982 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1983 .Case("execz", AMDGPU::SRC_EXECZ) 1984 .Case("src_execz", AMDGPU::SRC_EXECZ) 1985 .Case("scc", AMDGPU::SRC_SCC) 1986 .Case("src_scc", AMDGPU::SRC_SCC) 1987 .Case("tba", AMDGPU::TBA) 1988 .Case("tma", AMDGPU::TMA) 1989 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1990 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1991 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1992 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1993 .Case("vcc_lo", AMDGPU::VCC_LO) 1994 .Case("vcc_hi", AMDGPU::VCC_HI) 1995 .Case("exec_lo", AMDGPU::EXEC_LO) 1996 .Case("exec_hi", AMDGPU::EXEC_HI) 1997 .Case("tma_lo", AMDGPU::TMA_LO) 1998 .Case("tma_hi", AMDGPU::TMA_HI) 1999 .Case("tba_lo", AMDGPU::TBA_LO) 2000 .Case("tba_hi", AMDGPU::TBA_HI) 2001 .Case("pc", AMDGPU::PC_REG) 2002 .Case("null", AMDGPU::SGPR_NULL) 2003 .Default(AMDGPU::NoRegister); 2004 } 2005 2006 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2007 SMLoc &EndLoc, bool RestoreOnFailure) { 2008 auto R = parseRegister(); 2009 if (!R) return true; 2010 assert(R->isReg()); 2011 RegNo = R->getReg(); 2012 StartLoc = R->getStartLoc(); 2013 EndLoc = R->getEndLoc(); 2014 return false; 2015 } 2016 2017 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2018 SMLoc &EndLoc) { 2019 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2020 } 2021 2022 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2023 SMLoc &StartLoc, 2024 SMLoc &EndLoc) { 2025 bool Result = 2026 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2027 bool PendingErrors = getParser().hasPendingError(); 2028 getParser().clearPendingErrors(); 2029 if (PendingErrors) 2030 return MatchOperand_ParseFail; 2031 if (Result) 2032 return MatchOperand_NoMatch; 2033 return MatchOperand_Success; 2034 } 2035 2036 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2037 RegisterKind RegKind, unsigned Reg1) { 2038 switch (RegKind) { 2039 case IS_SPECIAL: 2040 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2041 Reg = AMDGPU::EXEC; 2042 RegWidth = 2; 2043 return true; 2044 } 2045 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2046 Reg = AMDGPU::FLAT_SCR; 2047 RegWidth = 2; 2048 return true; 2049 } 2050 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2051 Reg = AMDGPU::XNACK_MASK; 2052 RegWidth = 2; 2053 return true; 2054 } 2055 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2056 Reg = AMDGPU::VCC; 2057 RegWidth = 2; 2058 return true; 2059 } 2060 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2061 Reg = AMDGPU::TBA; 2062 RegWidth = 2; 2063 return true; 2064 } 2065 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2066 Reg = AMDGPU::TMA; 2067 RegWidth = 2; 2068 return true; 2069 } 2070 return false; 2071 case IS_VGPR: 2072 case IS_SGPR: 2073 case IS_AGPR: 2074 case IS_TTMP: 2075 if (Reg1 != Reg + RegWidth) { 2076 return false; 2077 } 2078 RegWidth++; 2079 return true; 2080 default: 2081 llvm_unreachable("unexpected register kind"); 2082 } 2083 } 2084 2085 struct RegInfo { 2086 StringLiteral Name; 2087 RegisterKind Kind; 2088 }; 2089 2090 static constexpr RegInfo RegularRegisters[] = { 2091 {{"v"}, IS_VGPR}, 2092 {{"s"}, IS_SGPR}, 2093 {{"ttmp"}, IS_TTMP}, 2094 {{"acc"}, IS_AGPR}, 2095 {{"a"}, IS_AGPR}, 2096 }; 2097 2098 static bool isRegularReg(RegisterKind Kind) { 2099 return Kind == IS_VGPR || 2100 Kind == IS_SGPR || 2101 Kind == IS_TTMP || 2102 Kind == IS_AGPR; 2103 } 2104 2105 static const RegInfo* getRegularRegInfo(StringRef Str) { 2106 for (const RegInfo &Reg : RegularRegisters) 2107 if (Str.startswith(Reg.Name)) 2108 return &Reg; 2109 return nullptr; 2110 } 2111 2112 static bool getRegNum(StringRef Str, unsigned& Num) { 2113 return !Str.getAsInteger(10, Num); 2114 } 2115 2116 bool 2117 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2118 const AsmToken &NextToken) const { 2119 2120 // A list of consecutive registers: [s0,s1,s2,s3] 2121 if (Token.is(AsmToken::LBrac)) 2122 return true; 2123 2124 if (!Token.is(AsmToken::Identifier)) 2125 return false; 2126 2127 // A single register like s0 or a range of registers like s[0:1] 2128 2129 StringRef Str = Token.getString(); 2130 const RegInfo *Reg = getRegularRegInfo(Str); 2131 if (Reg) { 2132 StringRef RegName = Reg->Name; 2133 StringRef RegSuffix = Str.substr(RegName.size()); 2134 if (!RegSuffix.empty()) { 2135 unsigned Num; 2136 // A single register with an index: rXX 2137 if (getRegNum(RegSuffix, Num)) 2138 return true; 2139 } else { 2140 // A range of registers: r[XX:YY]. 2141 if (NextToken.is(AsmToken::LBrac)) 2142 return true; 2143 } 2144 } 2145 2146 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2147 } 2148 2149 bool 2150 AMDGPUAsmParser::isRegister() 2151 { 2152 return isRegister(getToken(), peekToken()); 2153 } 2154 2155 unsigned 2156 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2157 unsigned RegNum, 2158 unsigned RegWidth) { 2159 2160 assert(isRegularReg(RegKind)); 2161 2162 unsigned AlignSize = 1; 2163 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2164 // SGPR and TTMP registers must be aligned. 2165 // Max required alignment is 4 dwords. 2166 AlignSize = std::min(RegWidth, 4u); 2167 } 2168 2169 if (RegNum % AlignSize != 0) 2170 return AMDGPU::NoRegister; 2171 2172 unsigned RegIdx = RegNum / AlignSize; 2173 int RCID = getRegClass(RegKind, RegWidth); 2174 if (RCID == -1) 2175 return AMDGPU::NoRegister; 2176 2177 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2178 const MCRegisterClass RC = TRI->getRegClass(RCID); 2179 if (RegIdx >= RC.getNumRegs()) 2180 return AMDGPU::NoRegister; 2181 2182 return RC.getRegister(RegIdx); 2183 } 2184 2185 bool 2186 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2187 int64_t RegLo, RegHi; 2188 if (!trySkipToken(AsmToken::LBrac)) 2189 return false; 2190 2191 if (!parseExpr(RegLo)) 2192 return false; 2193 2194 if (trySkipToken(AsmToken::Colon)) { 2195 if (!parseExpr(RegHi)) 2196 return false; 2197 } else { 2198 RegHi = RegLo; 2199 } 2200 2201 if (!trySkipToken(AsmToken::RBrac)) 2202 return false; 2203 2204 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2205 return false; 2206 2207 Num = static_cast<unsigned>(RegLo); 2208 Width = (RegHi - RegLo) + 1; 2209 return true; 2210 } 2211 2212 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2213 unsigned &RegNum, unsigned &RegWidth, 2214 SmallVectorImpl<AsmToken> &Tokens) { 2215 assert(isToken(AsmToken::Identifier)); 2216 unsigned Reg = getSpecialRegForName(getTokenStr()); 2217 if (Reg) { 2218 RegNum = 0; 2219 RegWidth = 1; 2220 RegKind = IS_SPECIAL; 2221 Tokens.push_back(getToken()); 2222 lex(); // skip register name 2223 } 2224 return Reg; 2225 } 2226 2227 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2228 unsigned &RegNum, unsigned &RegWidth, 2229 SmallVectorImpl<AsmToken> &Tokens) { 2230 assert(isToken(AsmToken::Identifier)); 2231 StringRef RegName = getTokenStr(); 2232 2233 const RegInfo *RI = getRegularRegInfo(RegName); 2234 if (!RI) 2235 return AMDGPU::NoRegister; 2236 Tokens.push_back(getToken()); 2237 lex(); // skip register name 2238 2239 RegKind = RI->Kind; 2240 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2241 if (!RegSuffix.empty()) { 2242 // Single 32-bit register: vXX. 2243 if (!getRegNum(RegSuffix, RegNum)) 2244 return AMDGPU::NoRegister; 2245 RegWidth = 1; 2246 } else { 2247 // Range of registers: v[XX:YY]. ":YY" is optional. 2248 if (!ParseRegRange(RegNum, RegWidth)) 2249 return AMDGPU::NoRegister; 2250 } 2251 2252 return getRegularReg(RegKind, RegNum, RegWidth); 2253 } 2254 2255 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2256 unsigned &RegWidth, 2257 SmallVectorImpl<AsmToken> &Tokens) { 2258 unsigned Reg = AMDGPU::NoRegister; 2259 2260 if (!trySkipToken(AsmToken::LBrac)) 2261 return AMDGPU::NoRegister; 2262 2263 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2264 2265 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2266 return AMDGPU::NoRegister; 2267 if (RegWidth != 1) 2268 return AMDGPU::NoRegister; 2269 2270 for (; trySkipToken(AsmToken::Comma); ) { 2271 RegisterKind NextRegKind; 2272 unsigned NextReg, NextRegNum, NextRegWidth; 2273 2274 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2275 Tokens)) 2276 return AMDGPU::NoRegister; 2277 if (NextRegWidth != 1) 2278 return AMDGPU::NoRegister; 2279 if (NextRegKind != RegKind) 2280 return AMDGPU::NoRegister; 2281 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2282 return AMDGPU::NoRegister; 2283 } 2284 2285 if (!trySkipToken(AsmToken::RBrac)) 2286 return AMDGPU::NoRegister; 2287 2288 if (isRegularReg(RegKind)) 2289 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2290 2291 return Reg; 2292 } 2293 2294 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2295 unsigned &RegNum, unsigned &RegWidth, 2296 SmallVectorImpl<AsmToken> &Tokens) { 2297 Reg = AMDGPU::NoRegister; 2298 2299 if (isToken(AsmToken::Identifier)) { 2300 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2301 if (Reg == AMDGPU::NoRegister) 2302 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2303 } else { 2304 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2305 } 2306 2307 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2308 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2309 } 2310 2311 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2312 unsigned &RegNum, unsigned &RegWidth, 2313 bool RestoreOnFailure) { 2314 Reg = AMDGPU::NoRegister; 2315 2316 SmallVector<AsmToken, 1> Tokens; 2317 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2318 if (RestoreOnFailure) { 2319 while (!Tokens.empty()) { 2320 getLexer().UnLex(Tokens.pop_back_val()); 2321 } 2322 } 2323 return true; 2324 } 2325 return false; 2326 } 2327 2328 Optional<StringRef> 2329 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2330 switch (RegKind) { 2331 case IS_VGPR: 2332 return StringRef(".amdgcn.next_free_vgpr"); 2333 case IS_SGPR: 2334 return StringRef(".amdgcn.next_free_sgpr"); 2335 default: 2336 return None; 2337 } 2338 } 2339 2340 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2341 auto SymbolName = getGprCountSymbolName(RegKind); 2342 assert(SymbolName && "initializing invalid register kind"); 2343 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2344 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2345 } 2346 2347 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2348 unsigned DwordRegIndex, 2349 unsigned RegWidth) { 2350 // Symbols are only defined for GCN targets 2351 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2352 return true; 2353 2354 auto SymbolName = getGprCountSymbolName(RegKind); 2355 if (!SymbolName) 2356 return true; 2357 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2358 2359 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2360 int64_t OldCount; 2361 2362 if (!Sym->isVariable()) 2363 return !Error(getParser().getTok().getLoc(), 2364 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2365 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2366 return !Error( 2367 getParser().getTok().getLoc(), 2368 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2369 2370 if (OldCount <= NewMax) 2371 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2372 2373 return true; 2374 } 2375 2376 std::unique_ptr<AMDGPUOperand> 2377 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2378 const auto &Tok = Parser.getTok(); 2379 SMLoc StartLoc = Tok.getLoc(); 2380 SMLoc EndLoc = Tok.getEndLoc(); 2381 RegisterKind RegKind; 2382 unsigned Reg, RegNum, RegWidth; 2383 2384 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2385 //FIXME: improve error messages (bug 41303). 2386 Error(StartLoc, "not a valid operand."); 2387 return nullptr; 2388 } 2389 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2390 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2391 return nullptr; 2392 } else 2393 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2394 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2395 } 2396 2397 OperandMatchResultTy 2398 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2399 // TODO: add syntactic sugar for 1/(2*PI) 2400 2401 assert(!isRegister()); 2402 assert(!isModifier()); 2403 2404 const auto& Tok = getToken(); 2405 const auto& NextTok = peekToken(); 2406 bool IsReal = Tok.is(AsmToken::Real); 2407 SMLoc S = getLoc(); 2408 bool Negate = false; 2409 2410 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2411 lex(); 2412 IsReal = true; 2413 Negate = true; 2414 } 2415 2416 if (IsReal) { 2417 // Floating-point expressions are not supported. 2418 // Can only allow floating-point literals with an 2419 // optional sign. 2420 2421 StringRef Num = getTokenStr(); 2422 lex(); 2423 2424 APFloat RealVal(APFloat::IEEEdouble()); 2425 auto roundMode = APFloat::rmNearestTiesToEven; 2426 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2427 return MatchOperand_ParseFail; 2428 } 2429 if (Negate) 2430 RealVal.changeSign(); 2431 2432 Operands.push_back( 2433 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2434 AMDGPUOperand::ImmTyNone, true)); 2435 2436 return MatchOperand_Success; 2437 2438 } else { 2439 int64_t IntVal; 2440 const MCExpr *Expr; 2441 SMLoc S = getLoc(); 2442 2443 if (HasSP3AbsModifier) { 2444 // This is a workaround for handling expressions 2445 // as arguments of SP3 'abs' modifier, for example: 2446 // |1.0| 2447 // |-1| 2448 // |1+x| 2449 // This syntax is not compatible with syntax of standard 2450 // MC expressions (due to the trailing '|'). 2451 SMLoc EndLoc; 2452 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2453 return MatchOperand_ParseFail; 2454 } else { 2455 if (Parser.parseExpression(Expr)) 2456 return MatchOperand_ParseFail; 2457 } 2458 2459 if (Expr->evaluateAsAbsolute(IntVal)) { 2460 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2461 } else { 2462 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2463 } 2464 2465 return MatchOperand_Success; 2466 } 2467 2468 return MatchOperand_NoMatch; 2469 } 2470 2471 OperandMatchResultTy 2472 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2473 if (!isRegister()) 2474 return MatchOperand_NoMatch; 2475 2476 if (auto R = parseRegister()) { 2477 assert(R->isReg()); 2478 Operands.push_back(std::move(R)); 2479 return MatchOperand_Success; 2480 } 2481 return MatchOperand_ParseFail; 2482 } 2483 2484 OperandMatchResultTy 2485 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2486 auto res = parseReg(Operands); 2487 if (res != MatchOperand_NoMatch) { 2488 return res; 2489 } else if (isModifier()) { 2490 return MatchOperand_NoMatch; 2491 } else { 2492 return parseImm(Operands, HasSP3AbsMod); 2493 } 2494 } 2495 2496 bool 2497 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2498 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2499 const auto &str = Token.getString(); 2500 return str == "abs" || str == "neg" || str == "sext"; 2501 } 2502 return false; 2503 } 2504 2505 bool 2506 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2507 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2508 } 2509 2510 bool 2511 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2512 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2513 } 2514 2515 bool 2516 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2517 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2518 } 2519 2520 // Check if this is an operand modifier or an opcode modifier 2521 // which may look like an expression but it is not. We should 2522 // avoid parsing these modifiers as expressions. Currently 2523 // recognized sequences are: 2524 // |...| 2525 // abs(...) 2526 // neg(...) 2527 // sext(...) 2528 // -reg 2529 // -|...| 2530 // -abs(...) 2531 // name:... 2532 // Note that simple opcode modifiers like 'gds' may be parsed as 2533 // expressions; this is a special case. See getExpressionAsToken. 2534 // 2535 bool 2536 AMDGPUAsmParser::isModifier() { 2537 2538 AsmToken Tok = getToken(); 2539 AsmToken NextToken[2]; 2540 peekTokens(NextToken); 2541 2542 return isOperandModifier(Tok, NextToken[0]) || 2543 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2544 isOpcodeModifierWithVal(Tok, NextToken[0]); 2545 } 2546 2547 // Check if the current token is an SP3 'neg' modifier. 2548 // Currently this modifier is allowed in the following context: 2549 // 2550 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2551 // 2. Before an 'abs' modifier: -abs(...) 2552 // 3. Before an SP3 'abs' modifier: -|...| 2553 // 2554 // In all other cases "-" is handled as a part 2555 // of an expression that follows the sign. 2556 // 2557 // Note: When "-" is followed by an integer literal, 2558 // this is interpreted as integer negation rather 2559 // than a floating-point NEG modifier applied to N. 2560 // Beside being contr-intuitive, such use of floating-point 2561 // NEG modifier would have resulted in different meaning 2562 // of integer literals used with VOP1/2/C and VOP3, 2563 // for example: 2564 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2565 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2566 // Negative fp literals with preceding "-" are 2567 // handled likewise for unifomtity 2568 // 2569 bool 2570 AMDGPUAsmParser::parseSP3NegModifier() { 2571 2572 AsmToken NextToken[2]; 2573 peekTokens(NextToken); 2574 2575 if (isToken(AsmToken::Minus) && 2576 (isRegister(NextToken[0], NextToken[1]) || 2577 NextToken[0].is(AsmToken::Pipe) || 2578 isId(NextToken[0], "abs"))) { 2579 lex(); 2580 return true; 2581 } 2582 2583 return false; 2584 } 2585 2586 OperandMatchResultTy 2587 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2588 bool AllowImm) { 2589 bool Neg, SP3Neg; 2590 bool Abs, SP3Abs; 2591 SMLoc Loc; 2592 2593 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2594 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2595 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2596 return MatchOperand_ParseFail; 2597 } 2598 2599 SP3Neg = parseSP3NegModifier(); 2600 2601 Loc = getLoc(); 2602 Neg = trySkipId("neg"); 2603 if (Neg && SP3Neg) { 2604 Error(Loc, "expected register or immediate"); 2605 return MatchOperand_ParseFail; 2606 } 2607 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2608 return MatchOperand_ParseFail; 2609 2610 Abs = trySkipId("abs"); 2611 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2612 return MatchOperand_ParseFail; 2613 2614 Loc = getLoc(); 2615 SP3Abs = trySkipToken(AsmToken::Pipe); 2616 if (Abs && SP3Abs) { 2617 Error(Loc, "expected register or immediate"); 2618 return MatchOperand_ParseFail; 2619 } 2620 2621 OperandMatchResultTy Res; 2622 if (AllowImm) { 2623 Res = parseRegOrImm(Operands, SP3Abs); 2624 } else { 2625 Res = parseReg(Operands); 2626 } 2627 if (Res != MatchOperand_Success) { 2628 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2629 } 2630 2631 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2632 return MatchOperand_ParseFail; 2633 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2634 return MatchOperand_ParseFail; 2635 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2636 return MatchOperand_ParseFail; 2637 2638 AMDGPUOperand::Modifiers Mods; 2639 Mods.Abs = Abs || SP3Abs; 2640 Mods.Neg = Neg || SP3Neg; 2641 2642 if (Mods.hasFPModifiers()) { 2643 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2644 if (Op.isExpr()) { 2645 Error(Op.getStartLoc(), "expected an absolute expression"); 2646 return MatchOperand_ParseFail; 2647 } 2648 Op.setModifiers(Mods); 2649 } 2650 return MatchOperand_Success; 2651 } 2652 2653 OperandMatchResultTy 2654 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2655 bool AllowImm) { 2656 bool Sext = trySkipId("sext"); 2657 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2658 return MatchOperand_ParseFail; 2659 2660 OperandMatchResultTy Res; 2661 if (AllowImm) { 2662 Res = parseRegOrImm(Operands); 2663 } else { 2664 Res = parseReg(Operands); 2665 } 2666 if (Res != MatchOperand_Success) { 2667 return Sext? MatchOperand_ParseFail : Res; 2668 } 2669 2670 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2671 return MatchOperand_ParseFail; 2672 2673 AMDGPUOperand::Modifiers Mods; 2674 Mods.Sext = Sext; 2675 2676 if (Mods.hasIntModifiers()) { 2677 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2678 if (Op.isExpr()) { 2679 Error(Op.getStartLoc(), "expected an absolute expression"); 2680 return MatchOperand_ParseFail; 2681 } 2682 Op.setModifiers(Mods); 2683 } 2684 2685 return MatchOperand_Success; 2686 } 2687 2688 OperandMatchResultTy 2689 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2690 return parseRegOrImmWithFPInputMods(Operands, false); 2691 } 2692 2693 OperandMatchResultTy 2694 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2695 return parseRegOrImmWithIntInputMods(Operands, false); 2696 } 2697 2698 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2699 auto Loc = getLoc(); 2700 if (trySkipId("off")) { 2701 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2702 AMDGPUOperand::ImmTyOff, false)); 2703 return MatchOperand_Success; 2704 } 2705 2706 if (!isRegister()) 2707 return MatchOperand_NoMatch; 2708 2709 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2710 if (Reg) { 2711 Operands.push_back(std::move(Reg)); 2712 return MatchOperand_Success; 2713 } 2714 2715 return MatchOperand_ParseFail; 2716 2717 } 2718 2719 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2720 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2721 2722 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2723 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2724 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2725 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2726 return Match_InvalidOperand; 2727 2728 if ((TSFlags & SIInstrFlags::VOP3) && 2729 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2730 getForcedEncodingSize() != 64) 2731 return Match_PreferE32; 2732 2733 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2734 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2735 // v_mac_f32/16 allow only dst_sel == DWORD; 2736 auto OpNum = 2737 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2738 const auto &Op = Inst.getOperand(OpNum); 2739 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2740 return Match_InvalidOperand; 2741 } 2742 } 2743 2744 return Match_Success; 2745 } 2746 2747 // What asm variants we should check 2748 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2749 if (getForcedEncodingSize() == 32) { 2750 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2751 return makeArrayRef(Variants); 2752 } 2753 2754 if (isForcedVOP3()) { 2755 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2756 return makeArrayRef(Variants); 2757 } 2758 2759 if (isForcedSDWA()) { 2760 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2761 AMDGPUAsmVariants::SDWA9}; 2762 return makeArrayRef(Variants); 2763 } 2764 2765 if (isForcedDPP()) { 2766 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2767 return makeArrayRef(Variants); 2768 } 2769 2770 static const unsigned Variants[] = { 2771 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2772 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2773 }; 2774 2775 return makeArrayRef(Variants); 2776 } 2777 2778 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2779 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2780 const unsigned Num = Desc.getNumImplicitUses(); 2781 for (unsigned i = 0; i < Num; ++i) { 2782 unsigned Reg = Desc.ImplicitUses[i]; 2783 switch (Reg) { 2784 case AMDGPU::FLAT_SCR: 2785 case AMDGPU::VCC: 2786 case AMDGPU::VCC_LO: 2787 case AMDGPU::VCC_HI: 2788 case AMDGPU::M0: 2789 return Reg; 2790 default: 2791 break; 2792 } 2793 } 2794 return AMDGPU::NoRegister; 2795 } 2796 2797 // NB: This code is correct only when used to check constant 2798 // bus limitations because GFX7 support no f16 inline constants. 2799 // Note that there are no cases when a GFX7 opcode violates 2800 // constant bus limitations due to the use of an f16 constant. 2801 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2802 unsigned OpIdx) const { 2803 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2804 2805 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2806 return false; 2807 } 2808 2809 const MCOperand &MO = Inst.getOperand(OpIdx); 2810 2811 int64_t Val = MO.getImm(); 2812 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2813 2814 switch (OpSize) { // expected operand size 2815 case 8: 2816 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2817 case 4: 2818 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2819 case 2: { 2820 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2821 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2822 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2823 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2824 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2825 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2826 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2827 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2828 } else { 2829 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2830 } 2831 } 2832 default: 2833 llvm_unreachable("invalid operand size"); 2834 } 2835 } 2836 2837 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2838 if (!isGFX10()) 2839 return 1; 2840 2841 switch (Opcode) { 2842 // 64-bit shift instructions can use only one scalar value input 2843 case AMDGPU::V_LSHLREV_B64: 2844 case AMDGPU::V_LSHLREV_B64_gfx10: 2845 case AMDGPU::V_LSHL_B64: 2846 case AMDGPU::V_LSHRREV_B64: 2847 case AMDGPU::V_LSHRREV_B64_gfx10: 2848 case AMDGPU::V_LSHR_B64: 2849 case AMDGPU::V_ASHRREV_I64: 2850 case AMDGPU::V_ASHRREV_I64_gfx10: 2851 case AMDGPU::V_ASHR_I64: 2852 return 1; 2853 default: 2854 return 2; 2855 } 2856 } 2857 2858 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2859 const MCOperand &MO = Inst.getOperand(OpIdx); 2860 if (MO.isImm()) { 2861 return !isInlineConstant(Inst, OpIdx); 2862 } else if (MO.isReg()) { 2863 auto Reg = MO.getReg(); 2864 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2865 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2866 } else { 2867 return true; 2868 } 2869 } 2870 2871 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2872 const unsigned Opcode = Inst.getOpcode(); 2873 const MCInstrDesc &Desc = MII.get(Opcode); 2874 unsigned ConstantBusUseCount = 0; 2875 unsigned NumLiterals = 0; 2876 unsigned LiteralSize; 2877 2878 if (Desc.TSFlags & 2879 (SIInstrFlags::VOPC | 2880 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2881 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2882 SIInstrFlags::SDWA)) { 2883 // Check special imm operands (used by madmk, etc) 2884 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2885 ++ConstantBusUseCount; 2886 } 2887 2888 SmallDenseSet<unsigned> SGPRsUsed; 2889 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2890 if (SGPRUsed != AMDGPU::NoRegister) { 2891 SGPRsUsed.insert(SGPRUsed); 2892 ++ConstantBusUseCount; 2893 } 2894 2895 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2896 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2897 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2898 2899 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2900 2901 for (int OpIdx : OpIndices) { 2902 if (OpIdx == -1) break; 2903 2904 const MCOperand &MO = Inst.getOperand(OpIdx); 2905 if (usesConstantBus(Inst, OpIdx)) { 2906 if (MO.isReg()) { 2907 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2908 // Pairs of registers with a partial intersections like these 2909 // s0, s[0:1] 2910 // flat_scratch_lo, flat_scratch 2911 // flat_scratch_lo, flat_scratch_hi 2912 // are theoretically valid but they are disabled anyway. 2913 // Note that this code mimics SIInstrInfo::verifyInstruction 2914 if (!SGPRsUsed.count(Reg)) { 2915 SGPRsUsed.insert(Reg); 2916 ++ConstantBusUseCount; 2917 } 2918 } else { // Expression or a literal 2919 2920 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2921 continue; // special operand like VINTERP attr_chan 2922 2923 // An instruction may use only one literal. 2924 // This has been validated on the previous step. 2925 // See validateVOP3Literal. 2926 // This literal may be used as more than one operand. 2927 // If all these operands are of the same size, 2928 // this literal counts as one scalar value. 2929 // Otherwise it counts as 2 scalar values. 2930 // See "GFX10 Shader Programming", section 3.6.2.3. 2931 2932 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2933 if (Size < 4) Size = 4; 2934 2935 if (NumLiterals == 0) { 2936 NumLiterals = 1; 2937 LiteralSize = Size; 2938 } else if (LiteralSize != Size) { 2939 NumLiterals = 2; 2940 } 2941 } 2942 } 2943 } 2944 } 2945 ConstantBusUseCount += NumLiterals; 2946 2947 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2948 } 2949 2950 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2951 const unsigned Opcode = Inst.getOpcode(); 2952 const MCInstrDesc &Desc = MII.get(Opcode); 2953 2954 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2955 if (DstIdx == -1 || 2956 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2957 return true; 2958 } 2959 2960 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2961 2962 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2963 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2964 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2965 2966 assert(DstIdx != -1); 2967 const MCOperand &Dst = Inst.getOperand(DstIdx); 2968 assert(Dst.isReg()); 2969 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2970 2971 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2972 2973 for (int SrcIdx : SrcIndices) { 2974 if (SrcIdx == -1) break; 2975 const MCOperand &Src = Inst.getOperand(SrcIdx); 2976 if (Src.isReg()) { 2977 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2978 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2979 return false; 2980 } 2981 } 2982 } 2983 2984 return true; 2985 } 2986 2987 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2988 2989 const unsigned Opc = Inst.getOpcode(); 2990 const MCInstrDesc &Desc = MII.get(Opc); 2991 2992 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2993 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2994 assert(ClampIdx != -1); 2995 return Inst.getOperand(ClampIdx).getImm() == 0; 2996 } 2997 2998 return true; 2999 } 3000 3001 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3002 3003 const unsigned Opc = Inst.getOpcode(); 3004 const MCInstrDesc &Desc = MII.get(Opc); 3005 3006 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3007 return true; 3008 3009 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3010 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3011 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3012 3013 assert(VDataIdx != -1); 3014 assert(DMaskIdx != -1); 3015 assert(TFEIdx != -1); 3016 3017 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3018 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3019 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3020 if (DMask == 0) 3021 DMask = 1; 3022 3023 unsigned DataSize = 3024 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3025 if (hasPackedD16()) { 3026 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3027 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3028 DataSize = (DataSize + 1) / 2; 3029 } 3030 3031 return (VDataSize / 4) == DataSize + TFESize; 3032 } 3033 3034 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3035 const unsigned Opc = Inst.getOpcode(); 3036 const MCInstrDesc &Desc = MII.get(Opc); 3037 3038 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3039 return true; 3040 3041 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3042 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3043 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3044 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3045 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3046 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3047 3048 assert(VAddr0Idx != -1); 3049 assert(SrsrcIdx != -1); 3050 assert(DimIdx != -1); 3051 assert(SrsrcIdx > VAddr0Idx); 3052 3053 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3054 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3055 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3056 unsigned VAddrSize = 3057 IsNSA ? SrsrcIdx - VAddr0Idx 3058 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3059 3060 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3061 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3062 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3063 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3064 if (!IsNSA) { 3065 if (AddrSize > 8) 3066 AddrSize = 16; 3067 else if (AddrSize > 4) 3068 AddrSize = 8; 3069 } 3070 3071 return VAddrSize == AddrSize; 3072 } 3073 3074 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3075 3076 const unsigned Opc = Inst.getOpcode(); 3077 const MCInstrDesc &Desc = MII.get(Opc); 3078 3079 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3080 return true; 3081 if (!Desc.mayLoad() || !Desc.mayStore()) 3082 return true; // Not atomic 3083 3084 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3085 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3086 3087 // This is an incomplete check because image_atomic_cmpswap 3088 // may only use 0x3 and 0xf while other atomic operations 3089 // may use 0x1 and 0x3. However these limitations are 3090 // verified when we check that dmask matches dst size. 3091 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3092 } 3093 3094 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3095 3096 const unsigned Opc = Inst.getOpcode(); 3097 const MCInstrDesc &Desc = MII.get(Opc); 3098 3099 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3100 return true; 3101 3102 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3103 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3104 3105 // GATHER4 instructions use dmask in a different fashion compared to 3106 // other MIMG instructions. The only useful DMASK values are 3107 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3108 // (red,red,red,red) etc.) The ISA document doesn't mention 3109 // this. 3110 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3111 } 3112 3113 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3114 { 3115 switch (Opcode) { 3116 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3117 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3118 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3119 return true; 3120 default: 3121 return false; 3122 } 3123 } 3124 3125 // movrels* opcodes should only allow VGPRS as src0. 3126 // This is specified in .td description for vop1/vop3, 3127 // but sdwa is handled differently. See isSDWAOperand. 3128 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3129 3130 const unsigned Opc = Inst.getOpcode(); 3131 const MCInstrDesc &Desc = MII.get(Opc); 3132 3133 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3134 return true; 3135 3136 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3137 assert(Src0Idx != -1); 3138 3139 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3140 if (!Src0.isReg()) 3141 return false; 3142 3143 auto Reg = Src0.getReg(); 3144 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3145 return !isSGPR(mc2PseudoReg(Reg), TRI); 3146 } 3147 3148 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3149 3150 const unsigned Opc = Inst.getOpcode(); 3151 const MCInstrDesc &Desc = MII.get(Opc); 3152 3153 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3154 return true; 3155 3156 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3157 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3158 if (isCI() || isSI()) 3159 return false; 3160 } 3161 3162 return true; 3163 } 3164 3165 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3166 const unsigned Opc = Inst.getOpcode(); 3167 const MCInstrDesc &Desc = MII.get(Opc); 3168 3169 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3170 return true; 3171 3172 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3173 if (DimIdx < 0) 3174 return true; 3175 3176 long Imm = Inst.getOperand(DimIdx).getImm(); 3177 if (Imm < 0 || Imm >= 8) 3178 return false; 3179 3180 return true; 3181 } 3182 3183 static bool IsRevOpcode(const unsigned Opcode) 3184 { 3185 switch (Opcode) { 3186 case AMDGPU::V_SUBREV_F32_e32: 3187 case AMDGPU::V_SUBREV_F32_e64: 3188 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3189 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3190 case AMDGPU::V_SUBREV_F32_e32_vi: 3191 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3192 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3193 case AMDGPU::V_SUBREV_F32_e64_vi: 3194 3195 case AMDGPU::V_SUBREV_I32_e32: 3196 case AMDGPU::V_SUBREV_I32_e64: 3197 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3198 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3199 3200 case AMDGPU::V_SUBBREV_U32_e32: 3201 case AMDGPU::V_SUBBREV_U32_e64: 3202 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3203 case AMDGPU::V_SUBBREV_U32_e32_vi: 3204 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3205 case AMDGPU::V_SUBBREV_U32_e64_vi: 3206 3207 case AMDGPU::V_SUBREV_U32_e32: 3208 case AMDGPU::V_SUBREV_U32_e64: 3209 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3210 case AMDGPU::V_SUBREV_U32_e32_vi: 3211 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3212 case AMDGPU::V_SUBREV_U32_e64_vi: 3213 3214 case AMDGPU::V_SUBREV_F16_e32: 3215 case AMDGPU::V_SUBREV_F16_e64: 3216 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3217 case AMDGPU::V_SUBREV_F16_e32_vi: 3218 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3219 case AMDGPU::V_SUBREV_F16_e64_vi: 3220 3221 case AMDGPU::V_SUBREV_U16_e32: 3222 case AMDGPU::V_SUBREV_U16_e64: 3223 case AMDGPU::V_SUBREV_U16_e32_vi: 3224 case AMDGPU::V_SUBREV_U16_e64_vi: 3225 3226 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3227 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3228 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3229 3230 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3231 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3232 3233 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3234 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3235 3236 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3237 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3238 3239 case AMDGPU::V_LSHRREV_B32_e32: 3240 case AMDGPU::V_LSHRREV_B32_e64: 3241 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3242 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3243 case AMDGPU::V_LSHRREV_B32_e32_vi: 3244 case AMDGPU::V_LSHRREV_B32_e64_vi: 3245 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3246 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3247 3248 case AMDGPU::V_ASHRREV_I32_e32: 3249 case AMDGPU::V_ASHRREV_I32_e64: 3250 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3251 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3252 case AMDGPU::V_ASHRREV_I32_e32_vi: 3253 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3254 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3255 case AMDGPU::V_ASHRREV_I32_e64_vi: 3256 3257 case AMDGPU::V_LSHLREV_B32_e32: 3258 case AMDGPU::V_LSHLREV_B32_e64: 3259 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3260 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3261 case AMDGPU::V_LSHLREV_B32_e32_vi: 3262 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3263 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3264 case AMDGPU::V_LSHLREV_B32_e64_vi: 3265 3266 case AMDGPU::V_LSHLREV_B16_e32: 3267 case AMDGPU::V_LSHLREV_B16_e64: 3268 case AMDGPU::V_LSHLREV_B16_e32_vi: 3269 case AMDGPU::V_LSHLREV_B16_e64_vi: 3270 case AMDGPU::V_LSHLREV_B16_gfx10: 3271 3272 case AMDGPU::V_LSHRREV_B16_e32: 3273 case AMDGPU::V_LSHRREV_B16_e64: 3274 case AMDGPU::V_LSHRREV_B16_e32_vi: 3275 case AMDGPU::V_LSHRREV_B16_e64_vi: 3276 case AMDGPU::V_LSHRREV_B16_gfx10: 3277 3278 case AMDGPU::V_ASHRREV_I16_e32: 3279 case AMDGPU::V_ASHRREV_I16_e64: 3280 case AMDGPU::V_ASHRREV_I16_e32_vi: 3281 case AMDGPU::V_ASHRREV_I16_e64_vi: 3282 case AMDGPU::V_ASHRREV_I16_gfx10: 3283 3284 case AMDGPU::V_LSHLREV_B64: 3285 case AMDGPU::V_LSHLREV_B64_gfx10: 3286 case AMDGPU::V_LSHLREV_B64_vi: 3287 3288 case AMDGPU::V_LSHRREV_B64: 3289 case AMDGPU::V_LSHRREV_B64_gfx10: 3290 case AMDGPU::V_LSHRREV_B64_vi: 3291 3292 case AMDGPU::V_ASHRREV_I64: 3293 case AMDGPU::V_ASHRREV_I64_gfx10: 3294 case AMDGPU::V_ASHRREV_I64_vi: 3295 3296 case AMDGPU::V_PK_LSHLREV_B16: 3297 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3298 case AMDGPU::V_PK_LSHLREV_B16_vi: 3299 3300 case AMDGPU::V_PK_LSHRREV_B16: 3301 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3302 case AMDGPU::V_PK_LSHRREV_B16_vi: 3303 case AMDGPU::V_PK_ASHRREV_I16: 3304 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3305 case AMDGPU::V_PK_ASHRREV_I16_vi: 3306 return true; 3307 default: 3308 return false; 3309 } 3310 } 3311 3312 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3313 3314 using namespace SIInstrFlags; 3315 const unsigned Opcode = Inst.getOpcode(); 3316 const MCInstrDesc &Desc = MII.get(Opcode); 3317 3318 // lds_direct register is defined so that it can be used 3319 // with 9-bit operands only. Ignore encodings which do not accept these. 3320 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3321 return true; 3322 3323 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3324 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3325 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3326 3327 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3328 3329 // lds_direct cannot be specified as either src1 or src2. 3330 for (int SrcIdx : SrcIndices) { 3331 if (SrcIdx == -1) break; 3332 const MCOperand &Src = Inst.getOperand(SrcIdx); 3333 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3334 return false; 3335 } 3336 } 3337 3338 if (Src0Idx == -1) 3339 return true; 3340 3341 const MCOperand &Src = Inst.getOperand(Src0Idx); 3342 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3343 return true; 3344 3345 // lds_direct is specified as src0. Check additional limitations. 3346 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3347 } 3348 3349 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3350 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3351 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3352 if (Op.isFlatOffset()) 3353 return Op.getStartLoc(); 3354 } 3355 return getLoc(); 3356 } 3357 3358 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3359 const OperandVector &Operands) { 3360 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3361 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3362 return true; 3363 3364 auto Opcode = Inst.getOpcode(); 3365 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3366 assert(OpNum != -1); 3367 3368 const auto &Op = Inst.getOperand(OpNum); 3369 if (!hasFlatOffsets() && Op.getImm() != 0) { 3370 Error(getFlatOffsetLoc(Operands), 3371 "flat offset modifier is not supported on this GPU"); 3372 return false; 3373 } 3374 3375 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3376 // For FLAT segment the offset must be positive; 3377 // MSB is ignored and forced to zero. 3378 unsigned OffsetSize = isGFX9() ? 13 : 12; 3379 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3380 if (!isIntN(OffsetSize, Op.getImm())) { 3381 Error(getFlatOffsetLoc(Operands), 3382 isGFX9() ? "expected a 13-bit signed offset" : 3383 "expected a 12-bit signed offset"); 3384 return false; 3385 } 3386 } else { 3387 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3388 Error(getFlatOffsetLoc(Operands), 3389 isGFX9() ? "expected a 12-bit unsigned offset" : 3390 "expected an 11-bit unsigned offset"); 3391 return false; 3392 } 3393 } 3394 3395 return true; 3396 } 3397 3398 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3399 unsigned Opcode = Inst.getOpcode(); 3400 const MCInstrDesc &Desc = MII.get(Opcode); 3401 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3402 return true; 3403 3404 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3405 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3406 3407 const int OpIndices[] = { Src0Idx, Src1Idx }; 3408 3409 unsigned NumExprs = 0; 3410 unsigned NumLiterals = 0; 3411 uint32_t LiteralValue; 3412 3413 for (int OpIdx : OpIndices) { 3414 if (OpIdx == -1) break; 3415 3416 const MCOperand &MO = Inst.getOperand(OpIdx); 3417 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3418 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3419 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3420 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3421 if (NumLiterals == 0 || LiteralValue != Value) { 3422 LiteralValue = Value; 3423 ++NumLiterals; 3424 } 3425 } else if (MO.isExpr()) { 3426 ++NumExprs; 3427 } 3428 } 3429 } 3430 3431 return NumLiterals + NumExprs <= 1; 3432 } 3433 3434 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3435 const unsigned Opc = Inst.getOpcode(); 3436 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3437 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3438 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3439 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3440 3441 if (OpSel & ~3) 3442 return false; 3443 } 3444 return true; 3445 } 3446 3447 // Check if VCC register matches wavefront size 3448 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3449 auto FB = getFeatureBits(); 3450 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3451 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3452 } 3453 3454 // VOP3 literal is only allowed in GFX10+ and only one can be used 3455 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3456 unsigned Opcode = Inst.getOpcode(); 3457 const MCInstrDesc &Desc = MII.get(Opcode); 3458 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3459 return true; 3460 3461 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3462 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3463 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3464 3465 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3466 3467 unsigned NumExprs = 0; 3468 unsigned NumLiterals = 0; 3469 uint32_t LiteralValue; 3470 3471 for (int OpIdx : OpIndices) { 3472 if (OpIdx == -1) break; 3473 3474 const MCOperand &MO = Inst.getOperand(OpIdx); 3475 if (!MO.isImm() && !MO.isExpr()) 3476 continue; 3477 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3478 continue; 3479 3480 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3481 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3482 return false; 3483 3484 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3485 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3486 if (NumLiterals == 0 || LiteralValue != Value) { 3487 LiteralValue = Value; 3488 ++NumLiterals; 3489 } 3490 } else if (MO.isExpr()) { 3491 ++NumExprs; 3492 } 3493 } 3494 NumLiterals += NumExprs; 3495 3496 return !NumLiterals || 3497 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3498 } 3499 3500 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3501 const SMLoc &IDLoc, 3502 const OperandVector &Operands) { 3503 if (!validateLdsDirect(Inst)) { 3504 Error(IDLoc, 3505 "invalid use of lds_direct"); 3506 return false; 3507 } 3508 if (!validateSOPLiteral(Inst)) { 3509 Error(IDLoc, 3510 "only one literal operand is allowed"); 3511 return false; 3512 } 3513 if (!validateVOP3Literal(Inst)) { 3514 Error(IDLoc, 3515 "invalid literal operand"); 3516 return false; 3517 } 3518 if (!validateConstantBusLimitations(Inst)) { 3519 Error(IDLoc, 3520 "invalid operand (violates constant bus restrictions)"); 3521 return false; 3522 } 3523 if (!validateEarlyClobberLimitations(Inst)) { 3524 Error(IDLoc, 3525 "destination must be different than all sources"); 3526 return false; 3527 } 3528 if (!validateIntClampSupported(Inst)) { 3529 Error(IDLoc, 3530 "integer clamping is not supported on this GPU"); 3531 return false; 3532 } 3533 if (!validateOpSel(Inst)) { 3534 Error(IDLoc, 3535 "invalid op_sel operand"); 3536 return false; 3537 } 3538 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3539 if (!validateMIMGD16(Inst)) { 3540 Error(IDLoc, 3541 "d16 modifier is not supported on this GPU"); 3542 return false; 3543 } 3544 if (!validateMIMGDim(Inst)) { 3545 Error(IDLoc, "dim modifier is required on this GPU"); 3546 return false; 3547 } 3548 if (!validateMIMGDataSize(Inst)) { 3549 Error(IDLoc, 3550 "image data size does not match dmask and tfe"); 3551 return false; 3552 } 3553 if (!validateMIMGAddrSize(Inst)) { 3554 Error(IDLoc, 3555 "image address size does not match dim and a16"); 3556 return false; 3557 } 3558 if (!validateMIMGAtomicDMask(Inst)) { 3559 Error(IDLoc, 3560 "invalid atomic image dmask"); 3561 return false; 3562 } 3563 if (!validateMIMGGatherDMask(Inst)) { 3564 Error(IDLoc, 3565 "invalid image_gather dmask: only one bit must be set"); 3566 return false; 3567 } 3568 if (!validateMovrels(Inst)) { 3569 Error(IDLoc, "source operand must be a VGPR"); 3570 return false; 3571 } 3572 if (!validateFlatOffset(Inst, Operands)) { 3573 return false; 3574 } 3575 3576 return true; 3577 } 3578 3579 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3580 const FeatureBitset &FBS, 3581 unsigned VariantID = 0); 3582 3583 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3584 OperandVector &Operands, 3585 MCStreamer &Out, 3586 uint64_t &ErrorInfo, 3587 bool MatchingInlineAsm) { 3588 MCInst Inst; 3589 unsigned Result = Match_Success; 3590 for (auto Variant : getMatchedVariants()) { 3591 uint64_t EI; 3592 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3593 Variant); 3594 // We order match statuses from least to most specific. We use most specific 3595 // status as resulting 3596 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3597 if ((R == Match_Success) || 3598 (R == Match_PreferE32) || 3599 (R == Match_MissingFeature && Result != Match_PreferE32) || 3600 (R == Match_InvalidOperand && Result != Match_MissingFeature 3601 && Result != Match_PreferE32) || 3602 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3603 && Result != Match_MissingFeature 3604 && Result != Match_PreferE32)) { 3605 Result = R; 3606 ErrorInfo = EI; 3607 } 3608 if (R == Match_Success) 3609 break; 3610 } 3611 3612 switch (Result) { 3613 default: break; 3614 case Match_Success: 3615 if (!validateInstruction(Inst, IDLoc, Operands)) { 3616 return true; 3617 } 3618 Inst.setLoc(IDLoc); 3619 Out.emitInstruction(Inst, getSTI()); 3620 return false; 3621 3622 case Match_MissingFeature: 3623 return Error(IDLoc, "instruction not supported on this GPU"); 3624 3625 case Match_MnemonicFail: { 3626 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3627 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3628 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3629 return Error(IDLoc, "invalid instruction" + Suggestion, 3630 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3631 } 3632 3633 case Match_InvalidOperand: { 3634 SMLoc ErrorLoc = IDLoc; 3635 if (ErrorInfo != ~0ULL) { 3636 if (ErrorInfo >= Operands.size()) { 3637 return Error(IDLoc, "too few operands for instruction"); 3638 } 3639 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3640 if (ErrorLoc == SMLoc()) 3641 ErrorLoc = IDLoc; 3642 } 3643 return Error(ErrorLoc, "invalid operand for instruction"); 3644 } 3645 3646 case Match_PreferE32: 3647 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3648 "should be encoded as e32"); 3649 } 3650 llvm_unreachable("Implement any new match types added!"); 3651 } 3652 3653 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3654 int64_t Tmp = -1; 3655 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3656 return true; 3657 } 3658 if (getParser().parseAbsoluteExpression(Tmp)) { 3659 return true; 3660 } 3661 Ret = static_cast<uint32_t>(Tmp); 3662 return false; 3663 } 3664 3665 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3666 uint32_t &Minor) { 3667 if (ParseAsAbsoluteExpression(Major)) 3668 return TokError("invalid major version"); 3669 3670 if (getLexer().isNot(AsmToken::Comma)) 3671 return TokError("minor version number required, comma expected"); 3672 Lex(); 3673 3674 if (ParseAsAbsoluteExpression(Minor)) 3675 return TokError("invalid minor version"); 3676 3677 return false; 3678 } 3679 3680 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3681 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3682 return TokError("directive only supported for amdgcn architecture"); 3683 3684 std::string Target; 3685 3686 SMLoc TargetStart = getTok().getLoc(); 3687 if (getParser().parseEscapedString(Target)) 3688 return true; 3689 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3690 3691 std::string ExpectedTarget; 3692 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3693 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3694 3695 if (Target != ExpectedTargetOS.str()) 3696 return getParser().Error(TargetRange.Start, "target must match options", 3697 TargetRange); 3698 3699 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3700 return false; 3701 } 3702 3703 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3704 return getParser().Error(Range.Start, "value out of range", Range); 3705 } 3706 3707 bool AMDGPUAsmParser::calculateGPRBlocks( 3708 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3709 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3710 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3711 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3712 // TODO(scott.linder): These calculations are duplicated from 3713 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3714 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3715 3716 unsigned NumVGPRs = NextFreeVGPR; 3717 unsigned NumSGPRs = NextFreeSGPR; 3718 3719 if (Version.Major >= 10) 3720 NumSGPRs = 0; 3721 else { 3722 unsigned MaxAddressableNumSGPRs = 3723 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3724 3725 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3726 NumSGPRs > MaxAddressableNumSGPRs) 3727 return OutOfRangeError(SGPRRange); 3728 3729 NumSGPRs += 3730 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3731 3732 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3733 NumSGPRs > MaxAddressableNumSGPRs) 3734 return OutOfRangeError(SGPRRange); 3735 3736 if (Features.test(FeatureSGPRInitBug)) 3737 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3738 } 3739 3740 VGPRBlocks = 3741 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3742 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3743 3744 return false; 3745 } 3746 3747 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3748 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3749 return TokError("directive only supported for amdgcn architecture"); 3750 3751 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3752 return TokError("directive only supported for amdhsa OS"); 3753 3754 StringRef KernelName; 3755 if (getParser().parseIdentifier(KernelName)) 3756 return true; 3757 3758 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3759 3760 StringSet<> Seen; 3761 3762 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3763 3764 SMRange VGPRRange; 3765 uint64_t NextFreeVGPR = 0; 3766 SMRange SGPRRange; 3767 uint64_t NextFreeSGPR = 0; 3768 unsigned UserSGPRCount = 0; 3769 bool ReserveVCC = true; 3770 bool ReserveFlatScr = true; 3771 bool ReserveXNACK = hasXNACK(); 3772 Optional<bool> EnableWavefrontSize32; 3773 3774 while (true) { 3775 while (getLexer().is(AsmToken::EndOfStatement)) 3776 Lex(); 3777 3778 if (getLexer().isNot(AsmToken::Identifier)) 3779 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3780 3781 StringRef ID = getTok().getIdentifier(); 3782 SMRange IDRange = getTok().getLocRange(); 3783 Lex(); 3784 3785 if (ID == ".end_amdhsa_kernel") 3786 break; 3787 3788 if (Seen.find(ID) != Seen.end()) 3789 return TokError(".amdhsa_ directives cannot be repeated"); 3790 Seen.insert(ID); 3791 3792 SMLoc ValStart = getTok().getLoc(); 3793 int64_t IVal; 3794 if (getParser().parseAbsoluteExpression(IVal)) 3795 return true; 3796 SMLoc ValEnd = getTok().getLoc(); 3797 SMRange ValRange = SMRange(ValStart, ValEnd); 3798 3799 if (IVal < 0) 3800 return OutOfRangeError(ValRange); 3801 3802 uint64_t Val = IVal; 3803 3804 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3805 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3806 return OutOfRangeError(RANGE); \ 3807 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3808 3809 if (ID == ".amdhsa_group_segment_fixed_size") { 3810 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3811 return OutOfRangeError(ValRange); 3812 KD.group_segment_fixed_size = Val; 3813 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3814 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3815 return OutOfRangeError(ValRange); 3816 KD.private_segment_fixed_size = Val; 3817 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3818 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3819 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3820 Val, ValRange); 3821 if (Val) 3822 UserSGPRCount += 4; 3823 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3824 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3825 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3826 ValRange); 3827 if (Val) 3828 UserSGPRCount += 2; 3829 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3830 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3831 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3832 ValRange); 3833 if (Val) 3834 UserSGPRCount += 2; 3835 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3836 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3837 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3838 Val, ValRange); 3839 if (Val) 3840 UserSGPRCount += 2; 3841 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3842 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3843 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3844 ValRange); 3845 if (Val) 3846 UserSGPRCount += 2; 3847 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3848 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3849 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3850 ValRange); 3851 if (Val) 3852 UserSGPRCount += 2; 3853 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3854 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3855 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3856 Val, ValRange); 3857 if (Val) 3858 UserSGPRCount += 1; 3859 } else if (ID == ".amdhsa_wavefront_size32") { 3860 if (IVersion.Major < 10) 3861 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3862 IDRange); 3863 EnableWavefrontSize32 = Val; 3864 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3865 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3866 Val, ValRange); 3867 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3868 PARSE_BITS_ENTRY( 3869 KD.compute_pgm_rsrc2, 3870 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3871 ValRange); 3872 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3874 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3875 ValRange); 3876 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3877 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3878 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3879 ValRange); 3880 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3881 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3882 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3883 ValRange); 3884 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3885 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3886 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3887 ValRange); 3888 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3889 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3890 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3891 ValRange); 3892 } else if (ID == ".amdhsa_next_free_vgpr") { 3893 VGPRRange = ValRange; 3894 NextFreeVGPR = Val; 3895 } else if (ID == ".amdhsa_next_free_sgpr") { 3896 SGPRRange = ValRange; 3897 NextFreeSGPR = Val; 3898 } else if (ID == ".amdhsa_reserve_vcc") { 3899 if (!isUInt<1>(Val)) 3900 return OutOfRangeError(ValRange); 3901 ReserveVCC = Val; 3902 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3903 if (IVersion.Major < 7) 3904 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3905 IDRange); 3906 if (!isUInt<1>(Val)) 3907 return OutOfRangeError(ValRange); 3908 ReserveFlatScr = Val; 3909 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3910 if (IVersion.Major < 8) 3911 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3912 IDRange); 3913 if (!isUInt<1>(Val)) 3914 return OutOfRangeError(ValRange); 3915 ReserveXNACK = Val; 3916 } else if (ID == ".amdhsa_float_round_mode_32") { 3917 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3918 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3919 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3920 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3921 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3922 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3923 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3924 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3925 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3926 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3927 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3928 ValRange); 3929 } else if (ID == ".amdhsa_dx10_clamp") { 3930 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3931 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3932 } else if (ID == ".amdhsa_ieee_mode") { 3933 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3934 Val, ValRange); 3935 } else if (ID == ".amdhsa_fp16_overflow") { 3936 if (IVersion.Major < 9) 3937 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3938 IDRange); 3939 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3940 ValRange); 3941 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3942 if (IVersion.Major < 10) 3943 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3944 IDRange); 3945 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3946 ValRange); 3947 } else if (ID == ".amdhsa_memory_ordered") { 3948 if (IVersion.Major < 10) 3949 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3950 IDRange); 3951 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3952 ValRange); 3953 } else if (ID == ".amdhsa_forward_progress") { 3954 if (IVersion.Major < 10) 3955 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3956 IDRange); 3957 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3958 ValRange); 3959 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3960 PARSE_BITS_ENTRY( 3961 KD.compute_pgm_rsrc2, 3962 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3963 ValRange); 3964 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3965 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3966 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3967 Val, ValRange); 3968 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3969 PARSE_BITS_ENTRY( 3970 KD.compute_pgm_rsrc2, 3971 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3972 ValRange); 3973 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3974 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3975 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3976 Val, ValRange); 3977 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3978 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3979 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3980 Val, ValRange); 3981 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3982 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3983 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3984 Val, ValRange); 3985 } else if (ID == ".amdhsa_exception_int_div_zero") { 3986 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3987 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3988 Val, ValRange); 3989 } else { 3990 return getParser().Error(IDRange.Start, 3991 "unknown .amdhsa_kernel directive", IDRange); 3992 } 3993 3994 #undef PARSE_BITS_ENTRY 3995 } 3996 3997 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3998 return TokError(".amdhsa_next_free_vgpr directive is required"); 3999 4000 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4001 return TokError(".amdhsa_next_free_sgpr directive is required"); 4002 4003 unsigned VGPRBlocks; 4004 unsigned SGPRBlocks; 4005 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4006 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4007 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4008 SGPRBlocks)) 4009 return true; 4010 4011 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4012 VGPRBlocks)) 4013 return OutOfRangeError(VGPRRange); 4014 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4015 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4016 4017 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4018 SGPRBlocks)) 4019 return OutOfRangeError(SGPRRange); 4020 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4021 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4022 SGPRBlocks); 4023 4024 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4025 return TokError("too many user SGPRs enabled"); 4026 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4027 UserSGPRCount); 4028 4029 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4030 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4031 ReserveFlatScr, ReserveXNACK); 4032 return false; 4033 } 4034 4035 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4036 uint32_t Major; 4037 uint32_t Minor; 4038 4039 if (ParseDirectiveMajorMinor(Major, Minor)) 4040 return true; 4041 4042 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4043 return false; 4044 } 4045 4046 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4047 uint32_t Major; 4048 uint32_t Minor; 4049 uint32_t Stepping; 4050 StringRef VendorName; 4051 StringRef ArchName; 4052 4053 // If this directive has no arguments, then use the ISA version for the 4054 // targeted GPU. 4055 if (getLexer().is(AsmToken::EndOfStatement)) { 4056 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4057 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4058 ISA.Stepping, 4059 "AMD", "AMDGPU"); 4060 return false; 4061 } 4062 4063 if (ParseDirectiveMajorMinor(Major, Minor)) 4064 return true; 4065 4066 if (getLexer().isNot(AsmToken::Comma)) 4067 return TokError("stepping version number required, comma expected"); 4068 Lex(); 4069 4070 if (ParseAsAbsoluteExpression(Stepping)) 4071 return TokError("invalid stepping version"); 4072 4073 if (getLexer().isNot(AsmToken::Comma)) 4074 return TokError("vendor name required, comma expected"); 4075 Lex(); 4076 4077 if (getLexer().isNot(AsmToken::String)) 4078 return TokError("invalid vendor name"); 4079 4080 VendorName = getLexer().getTok().getStringContents(); 4081 Lex(); 4082 4083 if (getLexer().isNot(AsmToken::Comma)) 4084 return TokError("arch name required, comma expected"); 4085 Lex(); 4086 4087 if (getLexer().isNot(AsmToken::String)) 4088 return TokError("invalid arch name"); 4089 4090 ArchName = getLexer().getTok().getStringContents(); 4091 Lex(); 4092 4093 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4094 VendorName, ArchName); 4095 return false; 4096 } 4097 4098 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4099 amd_kernel_code_t &Header) { 4100 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4101 // assembly for backwards compatibility. 4102 if (ID == "max_scratch_backing_memory_byte_size") { 4103 Parser.eatToEndOfStatement(); 4104 return false; 4105 } 4106 4107 SmallString<40> ErrStr; 4108 raw_svector_ostream Err(ErrStr); 4109 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4110 return TokError(Err.str()); 4111 } 4112 Lex(); 4113 4114 if (ID == "enable_wavefront_size32") { 4115 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4116 if (!isGFX10()) 4117 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4118 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4119 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4120 } else { 4121 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4122 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4123 } 4124 } 4125 4126 if (ID == "wavefront_size") { 4127 if (Header.wavefront_size == 5) { 4128 if (!isGFX10()) 4129 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4130 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4131 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4132 } else if (Header.wavefront_size == 6) { 4133 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4134 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4135 } 4136 } 4137 4138 if (ID == "enable_wgp_mode") { 4139 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4140 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4141 } 4142 4143 if (ID == "enable_mem_ordered") { 4144 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4145 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4146 } 4147 4148 if (ID == "enable_fwd_progress") { 4149 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4150 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4151 } 4152 4153 return false; 4154 } 4155 4156 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4157 amd_kernel_code_t Header; 4158 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4159 4160 while (true) { 4161 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4162 // will set the current token to EndOfStatement. 4163 while(getLexer().is(AsmToken::EndOfStatement)) 4164 Lex(); 4165 4166 if (getLexer().isNot(AsmToken::Identifier)) 4167 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4168 4169 StringRef ID = getLexer().getTok().getIdentifier(); 4170 Lex(); 4171 4172 if (ID == ".end_amd_kernel_code_t") 4173 break; 4174 4175 if (ParseAMDKernelCodeTValue(ID, Header)) 4176 return true; 4177 } 4178 4179 getTargetStreamer().EmitAMDKernelCodeT(Header); 4180 4181 return false; 4182 } 4183 4184 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4185 if (getLexer().isNot(AsmToken::Identifier)) 4186 return TokError("expected symbol name"); 4187 4188 StringRef KernelName = Parser.getTok().getString(); 4189 4190 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4191 ELF::STT_AMDGPU_HSA_KERNEL); 4192 Lex(); 4193 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4194 KernelScope.initialize(getContext()); 4195 return false; 4196 } 4197 4198 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4199 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4200 return Error(getParser().getTok().getLoc(), 4201 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4202 "architectures"); 4203 } 4204 4205 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4206 4207 std::string ISAVersionStringFromSTI; 4208 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4209 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4210 4211 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4212 return Error(getParser().getTok().getLoc(), 4213 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4214 "arguments specified through the command line"); 4215 } 4216 4217 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4218 Lex(); 4219 4220 return false; 4221 } 4222 4223 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4224 const char *AssemblerDirectiveBegin; 4225 const char *AssemblerDirectiveEnd; 4226 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4227 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4228 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4229 HSAMD::V3::AssemblerDirectiveEnd) 4230 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4231 HSAMD::AssemblerDirectiveEnd); 4232 4233 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4234 return Error(getParser().getTok().getLoc(), 4235 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4236 "not available on non-amdhsa OSes")).str()); 4237 } 4238 4239 std::string HSAMetadataString; 4240 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4241 HSAMetadataString)) 4242 return true; 4243 4244 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4245 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4246 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4247 } else { 4248 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4249 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4250 } 4251 4252 return false; 4253 } 4254 4255 /// Common code to parse out a block of text (typically YAML) between start and 4256 /// end directives. 4257 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4258 const char *AssemblerDirectiveEnd, 4259 std::string &CollectString) { 4260 4261 raw_string_ostream CollectStream(CollectString); 4262 4263 getLexer().setSkipSpace(false); 4264 4265 bool FoundEnd = false; 4266 while (!getLexer().is(AsmToken::Eof)) { 4267 while (getLexer().is(AsmToken::Space)) { 4268 CollectStream << getLexer().getTok().getString(); 4269 Lex(); 4270 } 4271 4272 if (getLexer().is(AsmToken::Identifier)) { 4273 StringRef ID = getLexer().getTok().getIdentifier(); 4274 if (ID == AssemblerDirectiveEnd) { 4275 Lex(); 4276 FoundEnd = true; 4277 break; 4278 } 4279 } 4280 4281 CollectStream << Parser.parseStringToEndOfStatement() 4282 << getContext().getAsmInfo()->getSeparatorString(); 4283 4284 Parser.eatToEndOfStatement(); 4285 } 4286 4287 getLexer().setSkipSpace(true); 4288 4289 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4290 return TokError(Twine("expected directive ") + 4291 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4292 } 4293 4294 CollectStream.flush(); 4295 return false; 4296 } 4297 4298 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4299 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4300 std::string String; 4301 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4302 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4303 return true; 4304 4305 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4306 if (!PALMetadata->setFromString(String)) 4307 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4308 return false; 4309 } 4310 4311 /// Parse the assembler directive for old linear-format PAL metadata. 4312 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4313 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4314 return Error(getParser().getTok().getLoc(), 4315 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4316 "not available on non-amdpal OSes")).str()); 4317 } 4318 4319 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4320 PALMetadata->setLegacy(); 4321 for (;;) { 4322 uint32_t Key, Value; 4323 if (ParseAsAbsoluteExpression(Key)) { 4324 return TokError(Twine("invalid value in ") + 4325 Twine(PALMD::AssemblerDirective)); 4326 } 4327 if (getLexer().isNot(AsmToken::Comma)) { 4328 return TokError(Twine("expected an even number of values in ") + 4329 Twine(PALMD::AssemblerDirective)); 4330 } 4331 Lex(); 4332 if (ParseAsAbsoluteExpression(Value)) { 4333 return TokError(Twine("invalid value in ") + 4334 Twine(PALMD::AssemblerDirective)); 4335 } 4336 PALMetadata->setRegister(Key, Value); 4337 if (getLexer().isNot(AsmToken::Comma)) 4338 break; 4339 Lex(); 4340 } 4341 return false; 4342 } 4343 4344 /// ParseDirectiveAMDGPULDS 4345 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4346 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4347 if (getParser().checkForValidSection()) 4348 return true; 4349 4350 StringRef Name; 4351 SMLoc NameLoc = getLexer().getLoc(); 4352 if (getParser().parseIdentifier(Name)) 4353 return TokError("expected identifier in directive"); 4354 4355 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4356 if (parseToken(AsmToken::Comma, "expected ','")) 4357 return true; 4358 4359 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4360 4361 int64_t Size; 4362 SMLoc SizeLoc = getLexer().getLoc(); 4363 if (getParser().parseAbsoluteExpression(Size)) 4364 return true; 4365 if (Size < 0) 4366 return Error(SizeLoc, "size must be non-negative"); 4367 if (Size > LocalMemorySize) 4368 return Error(SizeLoc, "size is too large"); 4369 4370 int64_t Align = 4; 4371 if (getLexer().is(AsmToken::Comma)) { 4372 Lex(); 4373 SMLoc AlignLoc = getLexer().getLoc(); 4374 if (getParser().parseAbsoluteExpression(Align)) 4375 return true; 4376 if (Align < 0 || !isPowerOf2_64(Align)) 4377 return Error(AlignLoc, "alignment must be a power of two"); 4378 4379 // Alignment larger than the size of LDS is possible in theory, as long 4380 // as the linker manages to place to symbol at address 0, but we do want 4381 // to make sure the alignment fits nicely into a 32-bit integer. 4382 if (Align >= 1u << 31) 4383 return Error(AlignLoc, "alignment is too large"); 4384 } 4385 4386 if (parseToken(AsmToken::EndOfStatement, 4387 "unexpected token in '.amdgpu_lds' directive")) 4388 return true; 4389 4390 Symbol->redefineIfPossible(); 4391 if (!Symbol->isUndefined()) 4392 return Error(NameLoc, "invalid symbol redefinition"); 4393 4394 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4395 return false; 4396 } 4397 4398 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4399 StringRef IDVal = DirectiveID.getString(); 4400 4401 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4402 if (IDVal == ".amdgcn_target") 4403 return ParseDirectiveAMDGCNTarget(); 4404 4405 if (IDVal == ".amdhsa_kernel") 4406 return ParseDirectiveAMDHSAKernel(); 4407 4408 // TODO: Restructure/combine with PAL metadata directive. 4409 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4410 return ParseDirectiveHSAMetadata(); 4411 } else { 4412 if (IDVal == ".hsa_code_object_version") 4413 return ParseDirectiveHSACodeObjectVersion(); 4414 4415 if (IDVal == ".hsa_code_object_isa") 4416 return ParseDirectiveHSACodeObjectISA(); 4417 4418 if (IDVal == ".amd_kernel_code_t") 4419 return ParseDirectiveAMDKernelCodeT(); 4420 4421 if (IDVal == ".amdgpu_hsa_kernel") 4422 return ParseDirectiveAMDGPUHsaKernel(); 4423 4424 if (IDVal == ".amd_amdgpu_isa") 4425 return ParseDirectiveISAVersion(); 4426 4427 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4428 return ParseDirectiveHSAMetadata(); 4429 } 4430 4431 if (IDVal == ".amdgpu_lds") 4432 return ParseDirectiveAMDGPULDS(); 4433 4434 if (IDVal == PALMD::AssemblerDirectiveBegin) 4435 return ParseDirectivePALMetadataBegin(); 4436 4437 if (IDVal == PALMD::AssemblerDirective) 4438 return ParseDirectivePALMetadata(); 4439 4440 return true; 4441 } 4442 4443 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4444 unsigned RegNo) const { 4445 4446 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4447 R.isValid(); ++R) { 4448 if (*R == RegNo) 4449 return isGFX9() || isGFX10(); 4450 } 4451 4452 // GFX10 has 2 more SGPRs 104 and 105. 4453 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4454 R.isValid(); ++R) { 4455 if (*R == RegNo) 4456 return hasSGPR104_SGPR105(); 4457 } 4458 4459 switch (RegNo) { 4460 case AMDGPU::SRC_SHARED_BASE: 4461 case AMDGPU::SRC_SHARED_LIMIT: 4462 case AMDGPU::SRC_PRIVATE_BASE: 4463 case AMDGPU::SRC_PRIVATE_LIMIT: 4464 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4465 return !isCI() && !isSI() && !isVI(); 4466 case AMDGPU::TBA: 4467 case AMDGPU::TBA_LO: 4468 case AMDGPU::TBA_HI: 4469 case AMDGPU::TMA: 4470 case AMDGPU::TMA_LO: 4471 case AMDGPU::TMA_HI: 4472 return !isGFX9() && !isGFX10(); 4473 case AMDGPU::XNACK_MASK: 4474 case AMDGPU::XNACK_MASK_LO: 4475 case AMDGPU::XNACK_MASK_HI: 4476 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4477 case AMDGPU::SGPR_NULL: 4478 return isGFX10(); 4479 default: 4480 break; 4481 } 4482 4483 if (isCI()) 4484 return true; 4485 4486 if (isSI() || isGFX10()) { 4487 // No flat_scr on SI. 4488 // On GFX10 flat scratch is not a valid register operand and can only be 4489 // accessed with s_setreg/s_getreg. 4490 switch (RegNo) { 4491 case AMDGPU::FLAT_SCR: 4492 case AMDGPU::FLAT_SCR_LO: 4493 case AMDGPU::FLAT_SCR_HI: 4494 return false; 4495 default: 4496 return true; 4497 } 4498 } 4499 4500 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4501 // SI/CI have. 4502 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4503 R.isValid(); ++R) { 4504 if (*R == RegNo) 4505 return hasSGPR102_SGPR103(); 4506 } 4507 4508 return true; 4509 } 4510 4511 OperandMatchResultTy 4512 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4513 OperandMode Mode) { 4514 // Try to parse with a custom parser 4515 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4516 4517 // If we successfully parsed the operand or if there as an error parsing, 4518 // we are done. 4519 // 4520 // If we are parsing after we reach EndOfStatement then this means we 4521 // are appending default values to the Operands list. This is only done 4522 // by custom parser, so we shouldn't continue on to the generic parsing. 4523 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4524 getLexer().is(AsmToken::EndOfStatement)) 4525 return ResTy; 4526 4527 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4528 unsigned Prefix = Operands.size(); 4529 SMLoc LBraceLoc = getTok().getLoc(); 4530 Parser.Lex(); // eat the '[' 4531 4532 for (;;) { 4533 ResTy = parseReg(Operands); 4534 if (ResTy != MatchOperand_Success) 4535 return ResTy; 4536 4537 if (getLexer().is(AsmToken::RBrac)) 4538 break; 4539 4540 if (getLexer().isNot(AsmToken::Comma)) 4541 return MatchOperand_ParseFail; 4542 Parser.Lex(); 4543 } 4544 4545 if (Operands.size() - Prefix > 1) { 4546 Operands.insert(Operands.begin() + Prefix, 4547 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4548 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4549 getTok().getLoc())); 4550 } 4551 4552 Parser.Lex(); // eat the ']' 4553 return MatchOperand_Success; 4554 } 4555 4556 return parseRegOrImm(Operands); 4557 } 4558 4559 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4560 // Clear any forced encodings from the previous instruction. 4561 setForcedEncodingSize(0); 4562 setForcedDPP(false); 4563 setForcedSDWA(false); 4564 4565 if (Name.endswith("_e64")) { 4566 setForcedEncodingSize(64); 4567 return Name.substr(0, Name.size() - 4); 4568 } else if (Name.endswith("_e32")) { 4569 setForcedEncodingSize(32); 4570 return Name.substr(0, Name.size() - 4); 4571 } else if (Name.endswith("_dpp")) { 4572 setForcedDPP(true); 4573 return Name.substr(0, Name.size() - 4); 4574 } else if (Name.endswith("_sdwa")) { 4575 setForcedSDWA(true); 4576 return Name.substr(0, Name.size() - 5); 4577 } 4578 return Name; 4579 } 4580 4581 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4582 StringRef Name, 4583 SMLoc NameLoc, OperandVector &Operands) { 4584 // Add the instruction mnemonic 4585 Name = parseMnemonicSuffix(Name); 4586 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4587 4588 bool IsMIMG = Name.startswith("image_"); 4589 4590 while (!getLexer().is(AsmToken::EndOfStatement)) { 4591 OperandMode Mode = OperandMode_Default; 4592 if (IsMIMG && isGFX10() && Operands.size() == 2) 4593 Mode = OperandMode_NSA; 4594 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4595 4596 // Eat the comma or space if there is one. 4597 if (getLexer().is(AsmToken::Comma)) 4598 Parser.Lex(); 4599 4600 switch (Res) { 4601 case MatchOperand_Success: break; 4602 case MatchOperand_ParseFail: 4603 // FIXME: use real operand location rather than the current location. 4604 Error(getLexer().getLoc(), "failed parsing operand."); 4605 while (!getLexer().is(AsmToken::EndOfStatement)) { 4606 Parser.Lex(); 4607 } 4608 return true; 4609 case MatchOperand_NoMatch: 4610 // FIXME: use real operand location rather than the current location. 4611 Error(getLexer().getLoc(), "not a valid operand."); 4612 while (!getLexer().is(AsmToken::EndOfStatement)) { 4613 Parser.Lex(); 4614 } 4615 return true; 4616 } 4617 } 4618 4619 return false; 4620 } 4621 4622 //===----------------------------------------------------------------------===// 4623 // Utility functions 4624 //===----------------------------------------------------------------------===// 4625 4626 OperandMatchResultTy 4627 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4628 4629 if (!trySkipId(Prefix, AsmToken::Colon)) 4630 return MatchOperand_NoMatch; 4631 4632 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4633 } 4634 4635 OperandMatchResultTy 4636 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4637 AMDGPUOperand::ImmTy ImmTy, 4638 bool (*ConvertResult)(int64_t&)) { 4639 SMLoc S = getLoc(); 4640 int64_t Value = 0; 4641 4642 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4643 if (Res != MatchOperand_Success) 4644 return Res; 4645 4646 if (ConvertResult && !ConvertResult(Value)) { 4647 Error(S, "invalid " + StringRef(Prefix) + " value."); 4648 } 4649 4650 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4651 return MatchOperand_Success; 4652 } 4653 4654 OperandMatchResultTy 4655 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4656 OperandVector &Operands, 4657 AMDGPUOperand::ImmTy ImmTy, 4658 bool (*ConvertResult)(int64_t&)) { 4659 SMLoc S = getLoc(); 4660 if (!trySkipId(Prefix, AsmToken::Colon)) 4661 return MatchOperand_NoMatch; 4662 4663 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4664 return MatchOperand_ParseFail; 4665 4666 unsigned Val = 0; 4667 const unsigned MaxSize = 4; 4668 4669 // FIXME: How to verify the number of elements matches the number of src 4670 // operands? 4671 for (int I = 0; ; ++I) { 4672 int64_t Op; 4673 SMLoc Loc = getLoc(); 4674 if (!parseExpr(Op)) 4675 return MatchOperand_ParseFail; 4676 4677 if (Op != 0 && Op != 1) { 4678 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4679 return MatchOperand_ParseFail; 4680 } 4681 4682 Val |= (Op << I); 4683 4684 if (trySkipToken(AsmToken::RBrac)) 4685 break; 4686 4687 if (I + 1 == MaxSize) { 4688 Error(getLoc(), "expected a closing square bracket"); 4689 return MatchOperand_ParseFail; 4690 } 4691 4692 if (!skipToken(AsmToken::Comma, "expected a comma")) 4693 return MatchOperand_ParseFail; 4694 } 4695 4696 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4697 return MatchOperand_Success; 4698 } 4699 4700 OperandMatchResultTy 4701 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4702 AMDGPUOperand::ImmTy ImmTy) { 4703 int64_t Bit = 0; 4704 SMLoc S = Parser.getTok().getLoc(); 4705 4706 // We are at the end of the statement, and this is a default argument, so 4707 // use a default value. 4708 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4709 switch(getLexer().getKind()) { 4710 case AsmToken::Identifier: { 4711 StringRef Tok = Parser.getTok().getString(); 4712 if (Tok == Name) { 4713 if (Tok == "r128" && !hasMIMG_R128()) 4714 Error(S, "r128 modifier is not supported on this GPU"); 4715 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4716 Error(S, "a16 modifier is not supported on this GPU"); 4717 Bit = 1; 4718 Parser.Lex(); 4719 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4720 Bit = 0; 4721 Parser.Lex(); 4722 } else { 4723 return MatchOperand_NoMatch; 4724 } 4725 break; 4726 } 4727 default: 4728 return MatchOperand_NoMatch; 4729 } 4730 } 4731 4732 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4733 return MatchOperand_ParseFail; 4734 4735 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4736 ImmTy = AMDGPUOperand::ImmTyR128A16; 4737 4738 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4739 return MatchOperand_Success; 4740 } 4741 4742 static void addOptionalImmOperand( 4743 MCInst& Inst, const OperandVector& Operands, 4744 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4745 AMDGPUOperand::ImmTy ImmT, 4746 int64_t Default = 0) { 4747 auto i = OptionalIdx.find(ImmT); 4748 if (i != OptionalIdx.end()) { 4749 unsigned Idx = i->second; 4750 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4751 } else { 4752 Inst.addOperand(MCOperand::createImm(Default)); 4753 } 4754 } 4755 4756 OperandMatchResultTy 4757 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4758 if (getLexer().isNot(AsmToken::Identifier)) { 4759 return MatchOperand_NoMatch; 4760 } 4761 StringRef Tok = Parser.getTok().getString(); 4762 if (Tok != Prefix) { 4763 return MatchOperand_NoMatch; 4764 } 4765 4766 Parser.Lex(); 4767 if (getLexer().isNot(AsmToken::Colon)) { 4768 return MatchOperand_ParseFail; 4769 } 4770 4771 Parser.Lex(); 4772 if (getLexer().isNot(AsmToken::Identifier)) { 4773 return MatchOperand_ParseFail; 4774 } 4775 4776 Value = Parser.getTok().getString(); 4777 return MatchOperand_Success; 4778 } 4779 4780 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4781 // values to live in a joint format operand in the MCInst encoding. 4782 OperandMatchResultTy 4783 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4784 SMLoc S = Parser.getTok().getLoc(); 4785 int64_t Dfmt = 0, Nfmt = 0; 4786 // dfmt and nfmt can appear in either order, and each is optional. 4787 bool GotDfmt = false, GotNfmt = false; 4788 while (!GotDfmt || !GotNfmt) { 4789 if (!GotDfmt) { 4790 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4791 if (Res != MatchOperand_NoMatch) { 4792 if (Res != MatchOperand_Success) 4793 return Res; 4794 if (Dfmt >= 16) { 4795 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4796 return MatchOperand_ParseFail; 4797 } 4798 GotDfmt = true; 4799 Parser.Lex(); 4800 continue; 4801 } 4802 } 4803 if (!GotNfmt) { 4804 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4805 if (Res != MatchOperand_NoMatch) { 4806 if (Res != MatchOperand_Success) 4807 return Res; 4808 if (Nfmt >= 8) { 4809 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4810 return MatchOperand_ParseFail; 4811 } 4812 GotNfmt = true; 4813 Parser.Lex(); 4814 continue; 4815 } 4816 } 4817 break; 4818 } 4819 if (!GotDfmt && !GotNfmt) 4820 return MatchOperand_NoMatch; 4821 auto Format = Dfmt | Nfmt << 4; 4822 Operands.push_back( 4823 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4824 return MatchOperand_Success; 4825 } 4826 4827 //===----------------------------------------------------------------------===// 4828 // ds 4829 //===----------------------------------------------------------------------===// 4830 4831 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4832 const OperandVector &Operands) { 4833 OptionalImmIndexMap OptionalIdx; 4834 4835 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4836 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4837 4838 // Add the register arguments 4839 if (Op.isReg()) { 4840 Op.addRegOperands(Inst, 1); 4841 continue; 4842 } 4843 4844 // Handle optional arguments 4845 OptionalIdx[Op.getImmTy()] = i; 4846 } 4847 4848 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4849 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4850 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4851 4852 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4853 } 4854 4855 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4856 bool IsGdsHardcoded) { 4857 OptionalImmIndexMap OptionalIdx; 4858 4859 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4860 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4861 4862 // Add the register arguments 4863 if (Op.isReg()) { 4864 Op.addRegOperands(Inst, 1); 4865 continue; 4866 } 4867 4868 if (Op.isToken() && Op.getToken() == "gds") { 4869 IsGdsHardcoded = true; 4870 continue; 4871 } 4872 4873 // Handle optional arguments 4874 OptionalIdx[Op.getImmTy()] = i; 4875 } 4876 4877 AMDGPUOperand::ImmTy OffsetType = 4878 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4879 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4880 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4881 AMDGPUOperand::ImmTyOffset; 4882 4883 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4884 4885 if (!IsGdsHardcoded) { 4886 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4887 } 4888 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4889 } 4890 4891 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4892 OptionalImmIndexMap OptionalIdx; 4893 4894 unsigned OperandIdx[4]; 4895 unsigned EnMask = 0; 4896 int SrcIdx = 0; 4897 4898 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4899 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4900 4901 // Add the register arguments 4902 if (Op.isReg()) { 4903 assert(SrcIdx < 4); 4904 OperandIdx[SrcIdx] = Inst.size(); 4905 Op.addRegOperands(Inst, 1); 4906 ++SrcIdx; 4907 continue; 4908 } 4909 4910 if (Op.isOff()) { 4911 assert(SrcIdx < 4); 4912 OperandIdx[SrcIdx] = Inst.size(); 4913 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4914 ++SrcIdx; 4915 continue; 4916 } 4917 4918 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4919 Op.addImmOperands(Inst, 1); 4920 continue; 4921 } 4922 4923 if (Op.isToken() && Op.getToken() == "done") 4924 continue; 4925 4926 // Handle optional arguments 4927 OptionalIdx[Op.getImmTy()] = i; 4928 } 4929 4930 assert(SrcIdx == 4); 4931 4932 bool Compr = false; 4933 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4934 Compr = true; 4935 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4936 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4937 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4938 } 4939 4940 for (auto i = 0; i < SrcIdx; ++i) { 4941 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4942 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4943 } 4944 } 4945 4946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4948 4949 Inst.addOperand(MCOperand::createImm(EnMask)); 4950 } 4951 4952 //===----------------------------------------------------------------------===// 4953 // s_waitcnt 4954 //===----------------------------------------------------------------------===// 4955 4956 static bool 4957 encodeCnt( 4958 const AMDGPU::IsaVersion ISA, 4959 int64_t &IntVal, 4960 int64_t CntVal, 4961 bool Saturate, 4962 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4963 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4964 { 4965 bool Failed = false; 4966 4967 IntVal = encode(ISA, IntVal, CntVal); 4968 if (CntVal != decode(ISA, IntVal)) { 4969 if (Saturate) { 4970 IntVal = encode(ISA, IntVal, -1); 4971 } else { 4972 Failed = true; 4973 } 4974 } 4975 return Failed; 4976 } 4977 4978 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4979 4980 SMLoc CntLoc = getLoc(); 4981 StringRef CntName = getTokenStr(); 4982 4983 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4984 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4985 return false; 4986 4987 int64_t CntVal; 4988 SMLoc ValLoc = getLoc(); 4989 if (!parseExpr(CntVal)) 4990 return false; 4991 4992 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4993 4994 bool Failed = true; 4995 bool Sat = CntName.endswith("_sat"); 4996 4997 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4998 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4999 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5000 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5001 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5002 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5003 } else { 5004 Error(CntLoc, "invalid counter name " + CntName); 5005 return false; 5006 } 5007 5008 if (Failed) { 5009 Error(ValLoc, "too large value for " + CntName); 5010 return false; 5011 } 5012 5013 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5014 return false; 5015 5016 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5017 if (isToken(AsmToken::EndOfStatement)) { 5018 Error(getLoc(), "expected a counter name"); 5019 return false; 5020 } 5021 } 5022 5023 return true; 5024 } 5025 5026 OperandMatchResultTy 5027 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5028 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5029 int64_t Waitcnt = getWaitcntBitMask(ISA); 5030 SMLoc S = getLoc(); 5031 5032 // If parse failed, do not return error code 5033 // to avoid excessive error messages. 5034 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5035 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5036 } else { 5037 parseExpr(Waitcnt); 5038 } 5039 5040 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5041 return MatchOperand_Success; 5042 } 5043 5044 bool 5045 AMDGPUOperand::isSWaitCnt() const { 5046 return isImm(); 5047 } 5048 5049 //===----------------------------------------------------------------------===// 5050 // hwreg 5051 //===----------------------------------------------------------------------===// 5052 5053 bool 5054 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5055 int64_t &Offset, 5056 int64_t &Width) { 5057 using namespace llvm::AMDGPU::Hwreg; 5058 5059 // The register may be specified by name or using a numeric code 5060 if (isToken(AsmToken::Identifier) && 5061 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5062 HwReg.IsSymbolic = true; 5063 lex(); // skip message name 5064 } else if (!parseExpr(HwReg.Id)) { 5065 return false; 5066 } 5067 5068 if (trySkipToken(AsmToken::RParen)) 5069 return true; 5070 5071 // parse optional params 5072 return 5073 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5074 parseExpr(Offset) && 5075 skipToken(AsmToken::Comma, "expected a comma") && 5076 parseExpr(Width) && 5077 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5078 } 5079 5080 bool 5081 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5082 const int64_t Offset, 5083 const int64_t Width, 5084 const SMLoc Loc) { 5085 5086 using namespace llvm::AMDGPU::Hwreg; 5087 5088 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5089 Error(Loc, "specified hardware register is not supported on this GPU"); 5090 return false; 5091 } else if (!isValidHwreg(HwReg.Id)) { 5092 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5093 return false; 5094 } else if (!isValidHwregOffset(Offset)) { 5095 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5096 return false; 5097 } else if (!isValidHwregWidth(Width)) { 5098 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5099 return false; 5100 } 5101 return true; 5102 } 5103 5104 OperandMatchResultTy 5105 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5106 using namespace llvm::AMDGPU::Hwreg; 5107 5108 int64_t ImmVal = 0; 5109 SMLoc Loc = getLoc(); 5110 5111 // If parse failed, do not return error code 5112 // to avoid excessive error messages. 5113 if (trySkipId("hwreg", AsmToken::LParen)) { 5114 OperandInfoTy HwReg(ID_UNKNOWN_); 5115 int64_t Offset = OFFSET_DEFAULT_; 5116 int64_t Width = WIDTH_DEFAULT_; 5117 if (parseHwregBody(HwReg, Offset, Width) && 5118 validateHwreg(HwReg, Offset, Width, Loc)) { 5119 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5120 } 5121 } else if (parseExpr(ImmVal)) { 5122 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5123 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5124 } 5125 5126 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5127 return MatchOperand_Success; 5128 } 5129 5130 bool AMDGPUOperand::isHwreg() const { 5131 return isImmTy(ImmTyHwreg); 5132 } 5133 5134 //===----------------------------------------------------------------------===// 5135 // sendmsg 5136 //===----------------------------------------------------------------------===// 5137 5138 bool 5139 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5140 OperandInfoTy &Op, 5141 OperandInfoTy &Stream) { 5142 using namespace llvm::AMDGPU::SendMsg; 5143 5144 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5145 Msg.IsSymbolic = true; 5146 lex(); // skip message name 5147 } else if (!parseExpr(Msg.Id)) { 5148 return false; 5149 } 5150 5151 if (trySkipToken(AsmToken::Comma)) { 5152 Op.IsDefined = true; 5153 if (isToken(AsmToken::Identifier) && 5154 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5155 lex(); // skip operation name 5156 } else if (!parseExpr(Op.Id)) { 5157 return false; 5158 } 5159 5160 if (trySkipToken(AsmToken::Comma)) { 5161 Stream.IsDefined = true; 5162 if (!parseExpr(Stream.Id)) 5163 return false; 5164 } 5165 } 5166 5167 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5168 } 5169 5170 bool 5171 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5172 const OperandInfoTy &Op, 5173 const OperandInfoTy &Stream, 5174 const SMLoc S) { 5175 using namespace llvm::AMDGPU::SendMsg; 5176 5177 // Validation strictness depends on whether message is specified 5178 // in a symbolc or in a numeric form. In the latter case 5179 // only encoding possibility is checked. 5180 bool Strict = Msg.IsSymbolic; 5181 5182 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5183 Error(S, "invalid message id"); 5184 return false; 5185 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5186 Error(S, Op.IsDefined ? 5187 "message does not support operations" : 5188 "missing message operation"); 5189 return false; 5190 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5191 Error(S, "invalid operation id"); 5192 return false; 5193 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5194 Error(S, "message operation does not support streams"); 5195 return false; 5196 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5197 Error(S, "invalid message stream id"); 5198 return false; 5199 } 5200 return true; 5201 } 5202 5203 OperandMatchResultTy 5204 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5205 using namespace llvm::AMDGPU::SendMsg; 5206 5207 int64_t ImmVal = 0; 5208 SMLoc Loc = getLoc(); 5209 5210 // If parse failed, do not return error code 5211 // to avoid excessive error messages. 5212 if (trySkipId("sendmsg", AsmToken::LParen)) { 5213 OperandInfoTy Msg(ID_UNKNOWN_); 5214 OperandInfoTy Op(OP_NONE_); 5215 OperandInfoTy Stream(STREAM_ID_NONE_); 5216 if (parseSendMsgBody(Msg, Op, Stream) && 5217 validateSendMsg(Msg, Op, Stream, Loc)) { 5218 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5219 } 5220 } else if (parseExpr(ImmVal)) { 5221 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5222 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5223 } 5224 5225 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5226 return MatchOperand_Success; 5227 } 5228 5229 bool AMDGPUOperand::isSendMsg() const { 5230 return isImmTy(ImmTySendMsg); 5231 } 5232 5233 //===----------------------------------------------------------------------===// 5234 // v_interp 5235 //===----------------------------------------------------------------------===// 5236 5237 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5238 if (getLexer().getKind() != AsmToken::Identifier) 5239 return MatchOperand_NoMatch; 5240 5241 StringRef Str = Parser.getTok().getString(); 5242 int Slot = StringSwitch<int>(Str) 5243 .Case("p10", 0) 5244 .Case("p20", 1) 5245 .Case("p0", 2) 5246 .Default(-1); 5247 5248 SMLoc S = Parser.getTok().getLoc(); 5249 if (Slot == -1) 5250 return MatchOperand_ParseFail; 5251 5252 Parser.Lex(); 5253 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5254 AMDGPUOperand::ImmTyInterpSlot)); 5255 return MatchOperand_Success; 5256 } 5257 5258 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5259 if (getLexer().getKind() != AsmToken::Identifier) 5260 return MatchOperand_NoMatch; 5261 5262 StringRef Str = Parser.getTok().getString(); 5263 if (!Str.startswith("attr")) 5264 return MatchOperand_NoMatch; 5265 5266 StringRef Chan = Str.take_back(2); 5267 int AttrChan = StringSwitch<int>(Chan) 5268 .Case(".x", 0) 5269 .Case(".y", 1) 5270 .Case(".z", 2) 5271 .Case(".w", 3) 5272 .Default(-1); 5273 if (AttrChan == -1) 5274 return MatchOperand_ParseFail; 5275 5276 Str = Str.drop_back(2).drop_front(4); 5277 5278 uint8_t Attr; 5279 if (Str.getAsInteger(10, Attr)) 5280 return MatchOperand_ParseFail; 5281 5282 SMLoc S = Parser.getTok().getLoc(); 5283 Parser.Lex(); 5284 if (Attr > 63) { 5285 Error(S, "out of bounds attr"); 5286 return MatchOperand_Success; 5287 } 5288 5289 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5290 5291 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5292 AMDGPUOperand::ImmTyInterpAttr)); 5293 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5294 AMDGPUOperand::ImmTyAttrChan)); 5295 return MatchOperand_Success; 5296 } 5297 5298 //===----------------------------------------------------------------------===// 5299 // exp 5300 //===----------------------------------------------------------------------===// 5301 5302 void AMDGPUAsmParser::errorExpTgt() { 5303 Error(Parser.getTok().getLoc(), "invalid exp target"); 5304 } 5305 5306 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5307 uint8_t &Val) { 5308 if (Str == "null") { 5309 Val = 9; 5310 return MatchOperand_Success; 5311 } 5312 5313 if (Str.startswith("mrt")) { 5314 Str = Str.drop_front(3); 5315 if (Str == "z") { // == mrtz 5316 Val = 8; 5317 return MatchOperand_Success; 5318 } 5319 5320 if (Str.getAsInteger(10, Val)) 5321 return MatchOperand_ParseFail; 5322 5323 if (Val > 7) 5324 errorExpTgt(); 5325 5326 return MatchOperand_Success; 5327 } 5328 5329 if (Str.startswith("pos")) { 5330 Str = Str.drop_front(3); 5331 if (Str.getAsInteger(10, Val)) 5332 return MatchOperand_ParseFail; 5333 5334 if (Val > 4 || (Val == 4 && !isGFX10())) 5335 errorExpTgt(); 5336 5337 Val += 12; 5338 return MatchOperand_Success; 5339 } 5340 5341 if (isGFX10() && Str == "prim") { 5342 Val = 20; 5343 return MatchOperand_Success; 5344 } 5345 5346 if (Str.startswith("param")) { 5347 Str = Str.drop_front(5); 5348 if (Str.getAsInteger(10, Val)) 5349 return MatchOperand_ParseFail; 5350 5351 if (Val >= 32) 5352 errorExpTgt(); 5353 5354 Val += 32; 5355 return MatchOperand_Success; 5356 } 5357 5358 if (Str.startswith("invalid_target_")) { 5359 Str = Str.drop_front(15); 5360 if (Str.getAsInteger(10, Val)) 5361 return MatchOperand_ParseFail; 5362 5363 errorExpTgt(); 5364 return MatchOperand_Success; 5365 } 5366 5367 return MatchOperand_NoMatch; 5368 } 5369 5370 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5371 uint8_t Val; 5372 StringRef Str = Parser.getTok().getString(); 5373 5374 auto Res = parseExpTgtImpl(Str, Val); 5375 if (Res != MatchOperand_Success) 5376 return Res; 5377 5378 SMLoc S = Parser.getTok().getLoc(); 5379 Parser.Lex(); 5380 5381 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5382 AMDGPUOperand::ImmTyExpTgt)); 5383 return MatchOperand_Success; 5384 } 5385 5386 //===----------------------------------------------------------------------===// 5387 // parser helpers 5388 //===----------------------------------------------------------------------===// 5389 5390 bool 5391 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5392 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5393 } 5394 5395 bool 5396 AMDGPUAsmParser::isId(const StringRef Id) const { 5397 return isId(getToken(), Id); 5398 } 5399 5400 bool 5401 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5402 return getTokenKind() == Kind; 5403 } 5404 5405 bool 5406 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5407 if (isId(Id)) { 5408 lex(); 5409 return true; 5410 } 5411 return false; 5412 } 5413 5414 bool 5415 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5416 if (isId(Id) && peekToken().is(Kind)) { 5417 lex(); 5418 lex(); 5419 return true; 5420 } 5421 return false; 5422 } 5423 5424 bool 5425 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5426 if (isToken(Kind)) { 5427 lex(); 5428 return true; 5429 } 5430 return false; 5431 } 5432 5433 bool 5434 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5435 const StringRef ErrMsg) { 5436 if (!trySkipToken(Kind)) { 5437 Error(getLoc(), ErrMsg); 5438 return false; 5439 } 5440 return true; 5441 } 5442 5443 bool 5444 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5445 return !getParser().parseAbsoluteExpression(Imm); 5446 } 5447 5448 bool 5449 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5450 SMLoc S = getLoc(); 5451 5452 const MCExpr *Expr; 5453 if (Parser.parseExpression(Expr)) 5454 return false; 5455 5456 int64_t IntVal; 5457 if (Expr->evaluateAsAbsolute(IntVal)) { 5458 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5459 } else { 5460 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5461 } 5462 return true; 5463 } 5464 5465 bool 5466 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5467 if (isToken(AsmToken::String)) { 5468 Val = getToken().getStringContents(); 5469 lex(); 5470 return true; 5471 } else { 5472 Error(getLoc(), ErrMsg); 5473 return false; 5474 } 5475 } 5476 5477 AsmToken 5478 AMDGPUAsmParser::getToken() const { 5479 return Parser.getTok(); 5480 } 5481 5482 AsmToken 5483 AMDGPUAsmParser::peekToken() { 5484 return getLexer().peekTok(); 5485 } 5486 5487 void 5488 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5489 auto TokCount = getLexer().peekTokens(Tokens); 5490 5491 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5492 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5493 } 5494 5495 AsmToken::TokenKind 5496 AMDGPUAsmParser::getTokenKind() const { 5497 return getLexer().getKind(); 5498 } 5499 5500 SMLoc 5501 AMDGPUAsmParser::getLoc() const { 5502 return getToken().getLoc(); 5503 } 5504 5505 StringRef 5506 AMDGPUAsmParser::getTokenStr() const { 5507 return getToken().getString(); 5508 } 5509 5510 void 5511 AMDGPUAsmParser::lex() { 5512 Parser.Lex(); 5513 } 5514 5515 //===----------------------------------------------------------------------===// 5516 // swizzle 5517 //===----------------------------------------------------------------------===// 5518 5519 LLVM_READNONE 5520 static unsigned 5521 encodeBitmaskPerm(const unsigned AndMask, 5522 const unsigned OrMask, 5523 const unsigned XorMask) { 5524 using namespace llvm::AMDGPU::Swizzle; 5525 5526 return BITMASK_PERM_ENC | 5527 (AndMask << BITMASK_AND_SHIFT) | 5528 (OrMask << BITMASK_OR_SHIFT) | 5529 (XorMask << BITMASK_XOR_SHIFT); 5530 } 5531 5532 bool 5533 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5534 const unsigned MinVal, 5535 const unsigned MaxVal, 5536 const StringRef ErrMsg) { 5537 for (unsigned i = 0; i < OpNum; ++i) { 5538 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5539 return false; 5540 } 5541 SMLoc ExprLoc = Parser.getTok().getLoc(); 5542 if (!parseExpr(Op[i])) { 5543 return false; 5544 } 5545 if (Op[i] < MinVal || Op[i] > MaxVal) { 5546 Error(ExprLoc, ErrMsg); 5547 return false; 5548 } 5549 } 5550 5551 return true; 5552 } 5553 5554 bool 5555 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5556 using namespace llvm::AMDGPU::Swizzle; 5557 5558 int64_t Lane[LANE_NUM]; 5559 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5560 "expected a 2-bit lane id")) { 5561 Imm = QUAD_PERM_ENC; 5562 for (unsigned I = 0; I < LANE_NUM; ++I) { 5563 Imm |= Lane[I] << (LANE_SHIFT * I); 5564 } 5565 return true; 5566 } 5567 return false; 5568 } 5569 5570 bool 5571 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5572 using namespace llvm::AMDGPU::Swizzle; 5573 5574 SMLoc S = Parser.getTok().getLoc(); 5575 int64_t GroupSize; 5576 int64_t LaneIdx; 5577 5578 if (!parseSwizzleOperands(1, &GroupSize, 5579 2, 32, 5580 "group size must be in the interval [2,32]")) { 5581 return false; 5582 } 5583 if (!isPowerOf2_64(GroupSize)) { 5584 Error(S, "group size must be a power of two"); 5585 return false; 5586 } 5587 if (parseSwizzleOperands(1, &LaneIdx, 5588 0, GroupSize - 1, 5589 "lane id must be in the interval [0,group size - 1]")) { 5590 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5591 return true; 5592 } 5593 return false; 5594 } 5595 5596 bool 5597 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5598 using namespace llvm::AMDGPU::Swizzle; 5599 5600 SMLoc S = Parser.getTok().getLoc(); 5601 int64_t GroupSize; 5602 5603 if (!parseSwizzleOperands(1, &GroupSize, 5604 2, 32, "group size must be in the interval [2,32]")) { 5605 return false; 5606 } 5607 if (!isPowerOf2_64(GroupSize)) { 5608 Error(S, "group size must be a power of two"); 5609 return false; 5610 } 5611 5612 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5613 return true; 5614 } 5615 5616 bool 5617 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5618 using namespace llvm::AMDGPU::Swizzle; 5619 5620 SMLoc S = Parser.getTok().getLoc(); 5621 int64_t GroupSize; 5622 5623 if (!parseSwizzleOperands(1, &GroupSize, 5624 1, 16, "group size must be in the interval [1,16]")) { 5625 return false; 5626 } 5627 if (!isPowerOf2_64(GroupSize)) { 5628 Error(S, "group size must be a power of two"); 5629 return false; 5630 } 5631 5632 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5633 return true; 5634 } 5635 5636 bool 5637 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5638 using namespace llvm::AMDGPU::Swizzle; 5639 5640 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5641 return false; 5642 } 5643 5644 StringRef Ctl; 5645 SMLoc StrLoc = Parser.getTok().getLoc(); 5646 if (!parseString(Ctl)) { 5647 return false; 5648 } 5649 if (Ctl.size() != BITMASK_WIDTH) { 5650 Error(StrLoc, "expected a 5-character mask"); 5651 return false; 5652 } 5653 5654 unsigned AndMask = 0; 5655 unsigned OrMask = 0; 5656 unsigned XorMask = 0; 5657 5658 for (size_t i = 0; i < Ctl.size(); ++i) { 5659 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5660 switch(Ctl[i]) { 5661 default: 5662 Error(StrLoc, "invalid mask"); 5663 return false; 5664 case '0': 5665 break; 5666 case '1': 5667 OrMask |= Mask; 5668 break; 5669 case 'p': 5670 AndMask |= Mask; 5671 break; 5672 case 'i': 5673 AndMask |= Mask; 5674 XorMask |= Mask; 5675 break; 5676 } 5677 } 5678 5679 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5680 return true; 5681 } 5682 5683 bool 5684 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5685 5686 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5687 5688 if (!parseExpr(Imm)) { 5689 return false; 5690 } 5691 if (!isUInt<16>(Imm)) { 5692 Error(OffsetLoc, "expected a 16-bit offset"); 5693 return false; 5694 } 5695 return true; 5696 } 5697 5698 bool 5699 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5700 using namespace llvm::AMDGPU::Swizzle; 5701 5702 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5703 5704 SMLoc ModeLoc = Parser.getTok().getLoc(); 5705 bool Ok = false; 5706 5707 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5708 Ok = parseSwizzleQuadPerm(Imm); 5709 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5710 Ok = parseSwizzleBitmaskPerm(Imm); 5711 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5712 Ok = parseSwizzleBroadcast(Imm); 5713 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5714 Ok = parseSwizzleSwap(Imm); 5715 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5716 Ok = parseSwizzleReverse(Imm); 5717 } else { 5718 Error(ModeLoc, "expected a swizzle mode"); 5719 } 5720 5721 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5722 } 5723 5724 return false; 5725 } 5726 5727 OperandMatchResultTy 5728 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5729 SMLoc S = Parser.getTok().getLoc(); 5730 int64_t Imm = 0; 5731 5732 if (trySkipId("offset")) { 5733 5734 bool Ok = false; 5735 if (skipToken(AsmToken::Colon, "expected a colon")) { 5736 if (trySkipId("swizzle")) { 5737 Ok = parseSwizzleMacro(Imm); 5738 } else { 5739 Ok = parseSwizzleOffset(Imm); 5740 } 5741 } 5742 5743 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5744 5745 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5746 } else { 5747 // Swizzle "offset" operand is optional. 5748 // If it is omitted, try parsing other optional operands. 5749 return parseOptionalOpr(Operands); 5750 } 5751 } 5752 5753 bool 5754 AMDGPUOperand::isSwizzle() const { 5755 return isImmTy(ImmTySwizzle); 5756 } 5757 5758 //===----------------------------------------------------------------------===// 5759 // VGPR Index Mode 5760 //===----------------------------------------------------------------------===// 5761 5762 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5763 5764 using namespace llvm::AMDGPU::VGPRIndexMode; 5765 5766 if (trySkipToken(AsmToken::RParen)) { 5767 return OFF; 5768 } 5769 5770 int64_t Imm = 0; 5771 5772 while (true) { 5773 unsigned Mode = 0; 5774 SMLoc S = Parser.getTok().getLoc(); 5775 5776 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5777 if (trySkipId(IdSymbolic[ModeId])) { 5778 Mode = 1 << ModeId; 5779 break; 5780 } 5781 } 5782 5783 if (Mode == 0) { 5784 Error(S, (Imm == 0)? 5785 "expected a VGPR index mode or a closing parenthesis" : 5786 "expected a VGPR index mode"); 5787 break; 5788 } 5789 5790 if (Imm & Mode) { 5791 Error(S, "duplicate VGPR index mode"); 5792 break; 5793 } 5794 Imm |= Mode; 5795 5796 if (trySkipToken(AsmToken::RParen)) 5797 break; 5798 if (!skipToken(AsmToken::Comma, 5799 "expected a comma or a closing parenthesis")) 5800 break; 5801 } 5802 5803 return Imm; 5804 } 5805 5806 OperandMatchResultTy 5807 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5808 5809 int64_t Imm = 0; 5810 SMLoc S = Parser.getTok().getLoc(); 5811 5812 if (getLexer().getKind() == AsmToken::Identifier && 5813 Parser.getTok().getString() == "gpr_idx" && 5814 getLexer().peekTok().is(AsmToken::LParen)) { 5815 5816 Parser.Lex(); 5817 Parser.Lex(); 5818 5819 // If parse failed, trigger an error but do not return error code 5820 // to avoid excessive error messages. 5821 Imm = parseGPRIdxMacro(); 5822 5823 } else { 5824 if (getParser().parseAbsoluteExpression(Imm)) 5825 return MatchOperand_NoMatch; 5826 if (Imm < 0 || !isUInt<4>(Imm)) { 5827 Error(S, "invalid immediate: only 4-bit values are legal"); 5828 } 5829 } 5830 5831 Operands.push_back( 5832 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5833 return MatchOperand_Success; 5834 } 5835 5836 bool AMDGPUOperand::isGPRIdxMode() const { 5837 return isImmTy(ImmTyGprIdxMode); 5838 } 5839 5840 //===----------------------------------------------------------------------===// 5841 // sopp branch targets 5842 //===----------------------------------------------------------------------===// 5843 5844 OperandMatchResultTy 5845 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5846 5847 // Make sure we are not parsing something 5848 // that looks like a label or an expression but is not. 5849 // This will improve error messages. 5850 if (isRegister() || isModifier()) 5851 return MatchOperand_NoMatch; 5852 5853 if (parseExpr(Operands)) { 5854 5855 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5856 assert(Opr.isImm() || Opr.isExpr()); 5857 SMLoc Loc = Opr.getStartLoc(); 5858 5859 // Currently we do not support arbitrary expressions as branch targets. 5860 // Only labels and absolute expressions are accepted. 5861 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5862 Error(Loc, "expected an absolute expression or a label"); 5863 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5864 Error(Loc, "expected a 16-bit signed jump offset"); 5865 } 5866 } 5867 5868 return MatchOperand_Success; // avoid excessive error messages 5869 } 5870 5871 //===----------------------------------------------------------------------===// 5872 // Boolean holding registers 5873 //===----------------------------------------------------------------------===// 5874 5875 OperandMatchResultTy 5876 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5877 return parseReg(Operands); 5878 } 5879 5880 //===----------------------------------------------------------------------===// 5881 // mubuf 5882 //===----------------------------------------------------------------------===// 5883 5884 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5885 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5886 } 5887 5888 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5889 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5890 } 5891 5892 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5893 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5894 } 5895 5896 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5897 const OperandVector &Operands, 5898 bool IsAtomic, 5899 bool IsAtomicReturn, 5900 bool IsLds) { 5901 bool IsLdsOpcode = IsLds; 5902 bool HasLdsModifier = false; 5903 OptionalImmIndexMap OptionalIdx; 5904 assert(IsAtomicReturn ? IsAtomic : true); 5905 unsigned FirstOperandIdx = 1; 5906 5907 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5908 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5909 5910 // Add the register arguments 5911 if (Op.isReg()) { 5912 Op.addRegOperands(Inst, 1); 5913 // Insert a tied src for atomic return dst. 5914 // This cannot be postponed as subsequent calls to 5915 // addImmOperands rely on correct number of MC operands. 5916 if (IsAtomicReturn && i == FirstOperandIdx) 5917 Op.addRegOperands(Inst, 1); 5918 continue; 5919 } 5920 5921 // Handle the case where soffset is an immediate 5922 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5923 Op.addImmOperands(Inst, 1); 5924 continue; 5925 } 5926 5927 HasLdsModifier |= Op.isLDS(); 5928 5929 // Handle tokens like 'offen' which are sometimes hard-coded into the 5930 // asm string. There are no MCInst operands for these. 5931 if (Op.isToken()) { 5932 continue; 5933 } 5934 assert(Op.isImm()); 5935 5936 // Handle optional arguments 5937 OptionalIdx[Op.getImmTy()] = i; 5938 } 5939 5940 // This is a workaround for an llvm quirk which may result in an 5941 // incorrect instruction selection. Lds and non-lds versions of 5942 // MUBUF instructions are identical except that lds versions 5943 // have mandatory 'lds' modifier. However this modifier follows 5944 // optional modifiers and llvm asm matcher regards this 'lds' 5945 // modifier as an optional one. As a result, an lds version 5946 // of opcode may be selected even if it has no 'lds' modifier. 5947 if (IsLdsOpcode && !HasLdsModifier) { 5948 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5949 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5950 Inst.setOpcode(NoLdsOpcode); 5951 IsLdsOpcode = false; 5952 } 5953 } 5954 5955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5956 if (!IsAtomic) { // glc is hard-coded. 5957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5958 } 5959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5960 5961 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5963 } 5964 5965 if (isGFX10()) 5966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5967 } 5968 5969 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5970 OptionalImmIndexMap OptionalIdx; 5971 5972 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5973 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5974 5975 // Add the register arguments 5976 if (Op.isReg()) { 5977 Op.addRegOperands(Inst, 1); 5978 continue; 5979 } 5980 5981 // Handle the case where soffset is an immediate 5982 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5983 Op.addImmOperands(Inst, 1); 5984 continue; 5985 } 5986 5987 // Handle tokens like 'offen' which are sometimes hard-coded into the 5988 // asm string. There are no MCInst operands for these. 5989 if (Op.isToken()) { 5990 continue; 5991 } 5992 assert(Op.isImm()); 5993 5994 // Handle optional arguments 5995 OptionalIdx[Op.getImmTy()] = i; 5996 } 5997 5998 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5999 AMDGPUOperand::ImmTyOffset); 6000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6004 6005 if (isGFX10()) 6006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6007 } 6008 6009 //===----------------------------------------------------------------------===// 6010 // mimg 6011 //===----------------------------------------------------------------------===// 6012 6013 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6014 bool IsAtomic) { 6015 unsigned I = 1; 6016 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6017 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6018 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6019 } 6020 6021 if (IsAtomic) { 6022 // Add src, same as dst 6023 assert(Desc.getNumDefs() == 1); 6024 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6025 } 6026 6027 OptionalImmIndexMap OptionalIdx; 6028 6029 for (unsigned E = Operands.size(); I != E; ++I) { 6030 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6031 6032 // Add the register arguments 6033 if (Op.isReg()) { 6034 Op.addRegOperands(Inst, 1); 6035 } else if (Op.isImmModifier()) { 6036 OptionalIdx[Op.getImmTy()] = I; 6037 } else if (!Op.isToken()) { 6038 llvm_unreachable("unexpected operand type"); 6039 } 6040 } 6041 6042 bool IsGFX10 = isGFX10(); 6043 6044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6045 if (IsGFX10) 6046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6048 if (IsGFX10) 6049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6053 if (IsGFX10) 6054 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6057 if (!IsGFX10) 6058 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6060 } 6061 6062 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6063 cvtMIMG(Inst, Operands, true); 6064 } 6065 6066 //===----------------------------------------------------------------------===// 6067 // smrd 6068 //===----------------------------------------------------------------------===// 6069 6070 bool AMDGPUOperand::isSMRDOffset8() const { 6071 return isImm() && isUInt<8>(getImm()); 6072 } 6073 6074 bool AMDGPUOperand::isSMRDOffset20() const { 6075 return isImm() && isUInt<20>(getImm()); 6076 } 6077 6078 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6079 // 32-bit literals are only supported on CI and we only want to use them 6080 // when the offset is > 8-bits. 6081 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6082 } 6083 6084 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6085 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6086 } 6087 6088 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 6089 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6090 } 6091 6092 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6093 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6094 } 6095 6096 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6097 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6098 } 6099 6100 //===----------------------------------------------------------------------===// 6101 // vop3 6102 //===----------------------------------------------------------------------===// 6103 6104 static bool ConvertOmodMul(int64_t &Mul) { 6105 if (Mul != 1 && Mul != 2 && Mul != 4) 6106 return false; 6107 6108 Mul >>= 1; 6109 return true; 6110 } 6111 6112 static bool ConvertOmodDiv(int64_t &Div) { 6113 if (Div == 1) { 6114 Div = 0; 6115 return true; 6116 } 6117 6118 if (Div == 2) { 6119 Div = 3; 6120 return true; 6121 } 6122 6123 return false; 6124 } 6125 6126 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6127 if (BoundCtrl == 0) { 6128 BoundCtrl = 1; 6129 return true; 6130 } 6131 6132 if (BoundCtrl == -1) { 6133 BoundCtrl = 0; 6134 return true; 6135 } 6136 6137 return false; 6138 } 6139 6140 // Note: the order in this table matches the order of operands in AsmString. 6141 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6142 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6143 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6144 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6145 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6146 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6147 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6148 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6149 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6150 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6151 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6152 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6153 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6154 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6155 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6156 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6157 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6158 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6159 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6160 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6161 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6162 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6163 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6164 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6165 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6166 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6167 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6168 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6169 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6170 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6171 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6172 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6173 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6174 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6175 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6176 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6177 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6178 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6179 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6180 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6181 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6182 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6183 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6184 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6185 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6186 }; 6187 6188 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6189 6190 OperandMatchResultTy res = parseOptionalOpr(Operands); 6191 6192 // This is a hack to enable hardcoded mandatory operands which follow 6193 // optional operands. 6194 // 6195 // Current design assumes that all operands after the first optional operand 6196 // are also optional. However implementation of some instructions violates 6197 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6198 // 6199 // To alleviate this problem, we have to (implicitly) parse extra operands 6200 // to make sure autogenerated parser of custom operands never hit hardcoded 6201 // mandatory operands. 6202 6203 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6204 if (res != MatchOperand_Success || 6205 isToken(AsmToken::EndOfStatement)) 6206 break; 6207 6208 trySkipToken(AsmToken::Comma); 6209 res = parseOptionalOpr(Operands); 6210 } 6211 6212 return res; 6213 } 6214 6215 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6216 OperandMatchResultTy res; 6217 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6218 // try to parse any optional operand here 6219 if (Op.IsBit) { 6220 res = parseNamedBit(Op.Name, Operands, Op.Type); 6221 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6222 res = parseOModOperand(Operands); 6223 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6224 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6225 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6226 res = parseSDWASel(Operands, Op.Name, Op.Type); 6227 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6228 res = parseSDWADstUnused(Operands); 6229 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6230 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6231 Op.Type == AMDGPUOperand::ImmTyNegLo || 6232 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6233 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6234 Op.ConvertResult); 6235 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6236 res = parseDim(Operands); 6237 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6238 res = parseDfmtNfmt(Operands); 6239 } else { 6240 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6241 } 6242 if (res != MatchOperand_NoMatch) { 6243 return res; 6244 } 6245 } 6246 return MatchOperand_NoMatch; 6247 } 6248 6249 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6250 StringRef Name = Parser.getTok().getString(); 6251 if (Name == "mul") { 6252 return parseIntWithPrefix("mul", Operands, 6253 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6254 } 6255 6256 if (Name == "div") { 6257 return parseIntWithPrefix("div", Operands, 6258 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6259 } 6260 6261 return MatchOperand_NoMatch; 6262 } 6263 6264 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6265 cvtVOP3P(Inst, Operands); 6266 6267 int Opc = Inst.getOpcode(); 6268 6269 int SrcNum; 6270 const int Ops[] = { AMDGPU::OpName::src0, 6271 AMDGPU::OpName::src1, 6272 AMDGPU::OpName::src2 }; 6273 for (SrcNum = 0; 6274 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6275 ++SrcNum); 6276 assert(SrcNum > 0); 6277 6278 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6279 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6280 6281 if ((OpSel & (1 << SrcNum)) != 0) { 6282 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6283 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6284 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6285 } 6286 } 6287 6288 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6289 // 1. This operand is input modifiers 6290 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6291 // 2. This is not last operand 6292 && Desc.NumOperands > (OpNum + 1) 6293 // 3. Next operand is register class 6294 && Desc.OpInfo[OpNum + 1].RegClass != -1 6295 // 4. Next register is not tied to any other operand 6296 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6297 } 6298 6299 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6300 { 6301 OptionalImmIndexMap OptionalIdx; 6302 unsigned Opc = Inst.getOpcode(); 6303 6304 unsigned I = 1; 6305 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6306 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6307 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6308 } 6309 6310 for (unsigned E = Operands.size(); I != E; ++I) { 6311 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6312 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6313 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6314 } else if (Op.isInterpSlot() || 6315 Op.isInterpAttr() || 6316 Op.isAttrChan()) { 6317 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6318 } else if (Op.isImmModifier()) { 6319 OptionalIdx[Op.getImmTy()] = I; 6320 } else { 6321 llvm_unreachable("unhandled operand type"); 6322 } 6323 } 6324 6325 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6327 } 6328 6329 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6331 } 6332 6333 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6334 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6335 } 6336 } 6337 6338 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6339 OptionalImmIndexMap &OptionalIdx) { 6340 unsigned Opc = Inst.getOpcode(); 6341 6342 unsigned I = 1; 6343 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6344 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6345 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6346 } 6347 6348 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6349 // This instruction has src modifiers 6350 for (unsigned E = Operands.size(); I != E; ++I) { 6351 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6352 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6353 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6354 } else if (Op.isImmModifier()) { 6355 OptionalIdx[Op.getImmTy()] = I; 6356 } else if (Op.isRegOrImm()) { 6357 Op.addRegOrImmOperands(Inst, 1); 6358 } else { 6359 llvm_unreachable("unhandled operand type"); 6360 } 6361 } 6362 } else { 6363 // No src modifiers 6364 for (unsigned E = Operands.size(); I != E; ++I) { 6365 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6366 if (Op.isMod()) { 6367 OptionalIdx[Op.getImmTy()] = I; 6368 } else { 6369 Op.addRegOrImmOperands(Inst, 1); 6370 } 6371 } 6372 } 6373 6374 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6376 } 6377 6378 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6380 } 6381 6382 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6383 // it has src2 register operand that is tied to dst operand 6384 // we don't allow modifiers for this operand in assembler so src2_modifiers 6385 // should be 0. 6386 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6387 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6388 Opc == AMDGPU::V_MAC_F32_e64_vi || 6389 Opc == AMDGPU::V_MAC_F16_e64_vi || 6390 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6391 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6392 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6393 auto it = Inst.begin(); 6394 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6395 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6396 ++it; 6397 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6398 } 6399 } 6400 6401 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6402 OptionalImmIndexMap OptionalIdx; 6403 cvtVOP3(Inst, Operands, OptionalIdx); 6404 } 6405 6406 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6407 const OperandVector &Operands) { 6408 OptionalImmIndexMap OptIdx; 6409 const int Opc = Inst.getOpcode(); 6410 const MCInstrDesc &Desc = MII.get(Opc); 6411 6412 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6413 6414 cvtVOP3(Inst, Operands, OptIdx); 6415 6416 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6417 assert(!IsPacked); 6418 Inst.addOperand(Inst.getOperand(0)); 6419 } 6420 6421 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6422 // instruction, and then figure out where to actually put the modifiers 6423 6424 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6425 6426 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6427 if (OpSelHiIdx != -1) { 6428 int DefaultVal = IsPacked ? -1 : 0; 6429 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6430 DefaultVal); 6431 } 6432 6433 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6434 if (NegLoIdx != -1) { 6435 assert(IsPacked); 6436 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6437 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6438 } 6439 6440 const int Ops[] = { AMDGPU::OpName::src0, 6441 AMDGPU::OpName::src1, 6442 AMDGPU::OpName::src2 }; 6443 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6444 AMDGPU::OpName::src1_modifiers, 6445 AMDGPU::OpName::src2_modifiers }; 6446 6447 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6448 6449 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6450 unsigned OpSelHi = 0; 6451 unsigned NegLo = 0; 6452 unsigned NegHi = 0; 6453 6454 if (OpSelHiIdx != -1) { 6455 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6456 } 6457 6458 if (NegLoIdx != -1) { 6459 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6460 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6461 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6462 } 6463 6464 for (int J = 0; J < 3; ++J) { 6465 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6466 if (OpIdx == -1) 6467 break; 6468 6469 uint32_t ModVal = 0; 6470 6471 if ((OpSel & (1 << J)) != 0) 6472 ModVal |= SISrcMods::OP_SEL_0; 6473 6474 if ((OpSelHi & (1 << J)) != 0) 6475 ModVal |= SISrcMods::OP_SEL_1; 6476 6477 if ((NegLo & (1 << J)) != 0) 6478 ModVal |= SISrcMods::NEG; 6479 6480 if ((NegHi & (1 << J)) != 0) 6481 ModVal |= SISrcMods::NEG_HI; 6482 6483 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6484 6485 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6486 } 6487 } 6488 6489 //===----------------------------------------------------------------------===// 6490 // dpp 6491 //===----------------------------------------------------------------------===// 6492 6493 bool AMDGPUOperand::isDPP8() const { 6494 return isImmTy(ImmTyDPP8); 6495 } 6496 6497 bool AMDGPUOperand::isDPPCtrl() const { 6498 using namespace AMDGPU::DPP; 6499 6500 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6501 if (result) { 6502 int64_t Imm = getImm(); 6503 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6504 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6505 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6506 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6507 (Imm == DppCtrl::WAVE_SHL1) || 6508 (Imm == DppCtrl::WAVE_ROL1) || 6509 (Imm == DppCtrl::WAVE_SHR1) || 6510 (Imm == DppCtrl::WAVE_ROR1) || 6511 (Imm == DppCtrl::ROW_MIRROR) || 6512 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6513 (Imm == DppCtrl::BCAST15) || 6514 (Imm == DppCtrl::BCAST31) || 6515 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6516 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6517 } 6518 return false; 6519 } 6520 6521 //===----------------------------------------------------------------------===// 6522 // mAI 6523 //===----------------------------------------------------------------------===// 6524 6525 bool AMDGPUOperand::isBLGP() const { 6526 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6527 } 6528 6529 bool AMDGPUOperand::isCBSZ() const { 6530 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6531 } 6532 6533 bool AMDGPUOperand::isABID() const { 6534 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6535 } 6536 6537 bool AMDGPUOperand::isS16Imm() const { 6538 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6539 } 6540 6541 bool AMDGPUOperand::isU16Imm() const { 6542 return isImm() && isUInt<16>(getImm()); 6543 } 6544 6545 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6546 if (!isGFX10()) 6547 return MatchOperand_NoMatch; 6548 6549 SMLoc S = Parser.getTok().getLoc(); 6550 6551 if (getLexer().isNot(AsmToken::Identifier)) 6552 return MatchOperand_NoMatch; 6553 if (getLexer().getTok().getString() != "dim") 6554 return MatchOperand_NoMatch; 6555 6556 Parser.Lex(); 6557 if (getLexer().isNot(AsmToken::Colon)) 6558 return MatchOperand_ParseFail; 6559 6560 Parser.Lex(); 6561 6562 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6563 // integer. 6564 std::string Token; 6565 if (getLexer().is(AsmToken::Integer)) { 6566 SMLoc Loc = getLexer().getTok().getEndLoc(); 6567 Token = std::string(getLexer().getTok().getString()); 6568 Parser.Lex(); 6569 if (getLexer().getTok().getLoc() != Loc) 6570 return MatchOperand_ParseFail; 6571 } 6572 if (getLexer().isNot(AsmToken::Identifier)) 6573 return MatchOperand_ParseFail; 6574 Token += getLexer().getTok().getString(); 6575 6576 StringRef DimId = Token; 6577 if (DimId.startswith("SQ_RSRC_IMG_")) 6578 DimId = DimId.substr(12); 6579 6580 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6581 if (!DimInfo) 6582 return MatchOperand_ParseFail; 6583 6584 Parser.Lex(); 6585 6586 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6587 AMDGPUOperand::ImmTyDim)); 6588 return MatchOperand_Success; 6589 } 6590 6591 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6592 SMLoc S = Parser.getTok().getLoc(); 6593 StringRef Prefix; 6594 6595 if (getLexer().getKind() == AsmToken::Identifier) { 6596 Prefix = Parser.getTok().getString(); 6597 } else { 6598 return MatchOperand_NoMatch; 6599 } 6600 6601 if (Prefix != "dpp8") 6602 return parseDPPCtrl(Operands); 6603 if (!isGFX10()) 6604 return MatchOperand_NoMatch; 6605 6606 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6607 6608 int64_t Sels[8]; 6609 6610 Parser.Lex(); 6611 if (getLexer().isNot(AsmToken::Colon)) 6612 return MatchOperand_ParseFail; 6613 6614 Parser.Lex(); 6615 if (getLexer().isNot(AsmToken::LBrac)) 6616 return MatchOperand_ParseFail; 6617 6618 Parser.Lex(); 6619 if (getParser().parseAbsoluteExpression(Sels[0])) 6620 return MatchOperand_ParseFail; 6621 if (0 > Sels[0] || 7 < Sels[0]) 6622 return MatchOperand_ParseFail; 6623 6624 for (size_t i = 1; i < 8; ++i) { 6625 if (getLexer().isNot(AsmToken::Comma)) 6626 return MatchOperand_ParseFail; 6627 6628 Parser.Lex(); 6629 if (getParser().parseAbsoluteExpression(Sels[i])) 6630 return MatchOperand_ParseFail; 6631 if (0 > Sels[i] || 7 < Sels[i]) 6632 return MatchOperand_ParseFail; 6633 } 6634 6635 if (getLexer().isNot(AsmToken::RBrac)) 6636 return MatchOperand_ParseFail; 6637 Parser.Lex(); 6638 6639 unsigned DPP8 = 0; 6640 for (size_t i = 0; i < 8; ++i) 6641 DPP8 |= (Sels[i] << (i * 3)); 6642 6643 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6644 return MatchOperand_Success; 6645 } 6646 6647 OperandMatchResultTy 6648 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6649 using namespace AMDGPU::DPP; 6650 6651 SMLoc S = Parser.getTok().getLoc(); 6652 StringRef Prefix; 6653 int64_t Int; 6654 6655 if (getLexer().getKind() == AsmToken::Identifier) { 6656 Prefix = Parser.getTok().getString(); 6657 } else { 6658 return MatchOperand_NoMatch; 6659 } 6660 6661 if (Prefix == "row_mirror") { 6662 Int = DppCtrl::ROW_MIRROR; 6663 Parser.Lex(); 6664 } else if (Prefix == "row_half_mirror") { 6665 Int = DppCtrl::ROW_HALF_MIRROR; 6666 Parser.Lex(); 6667 } else { 6668 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6669 if (Prefix != "quad_perm" 6670 && Prefix != "row_shl" 6671 && Prefix != "row_shr" 6672 && Prefix != "row_ror" 6673 && Prefix != "wave_shl" 6674 && Prefix != "wave_rol" 6675 && Prefix != "wave_shr" 6676 && Prefix != "wave_ror" 6677 && Prefix != "row_bcast" 6678 && Prefix != "row_share" 6679 && Prefix != "row_xmask") { 6680 return MatchOperand_NoMatch; 6681 } 6682 6683 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6684 return MatchOperand_NoMatch; 6685 6686 if (!isVI() && !isGFX9() && 6687 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6688 Prefix == "wave_rol" || Prefix == "wave_ror" || 6689 Prefix == "row_bcast")) 6690 return MatchOperand_NoMatch; 6691 6692 Parser.Lex(); 6693 if (getLexer().isNot(AsmToken::Colon)) 6694 return MatchOperand_ParseFail; 6695 6696 if (Prefix == "quad_perm") { 6697 // quad_perm:[%d,%d,%d,%d] 6698 Parser.Lex(); 6699 if (getLexer().isNot(AsmToken::LBrac)) 6700 return MatchOperand_ParseFail; 6701 Parser.Lex(); 6702 6703 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6704 return MatchOperand_ParseFail; 6705 6706 for (int i = 0; i < 3; ++i) { 6707 if (getLexer().isNot(AsmToken::Comma)) 6708 return MatchOperand_ParseFail; 6709 Parser.Lex(); 6710 6711 int64_t Temp; 6712 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6713 return MatchOperand_ParseFail; 6714 const int shift = i*2 + 2; 6715 Int += (Temp << shift); 6716 } 6717 6718 if (getLexer().isNot(AsmToken::RBrac)) 6719 return MatchOperand_ParseFail; 6720 Parser.Lex(); 6721 } else { 6722 // sel:%d 6723 Parser.Lex(); 6724 if (getParser().parseAbsoluteExpression(Int)) 6725 return MatchOperand_ParseFail; 6726 6727 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6728 Int |= DppCtrl::ROW_SHL0; 6729 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6730 Int |= DppCtrl::ROW_SHR0; 6731 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6732 Int |= DppCtrl::ROW_ROR0; 6733 } else if (Prefix == "wave_shl" && 1 == Int) { 6734 Int = DppCtrl::WAVE_SHL1; 6735 } else if (Prefix == "wave_rol" && 1 == Int) { 6736 Int = DppCtrl::WAVE_ROL1; 6737 } else if (Prefix == "wave_shr" && 1 == Int) { 6738 Int = DppCtrl::WAVE_SHR1; 6739 } else if (Prefix == "wave_ror" && 1 == Int) { 6740 Int = DppCtrl::WAVE_ROR1; 6741 } else if (Prefix == "row_bcast") { 6742 if (Int == 15) { 6743 Int = DppCtrl::BCAST15; 6744 } else if (Int == 31) { 6745 Int = DppCtrl::BCAST31; 6746 } else { 6747 return MatchOperand_ParseFail; 6748 } 6749 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6750 Int |= DppCtrl::ROW_SHARE_FIRST; 6751 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6752 Int |= DppCtrl::ROW_XMASK_FIRST; 6753 } else { 6754 return MatchOperand_ParseFail; 6755 } 6756 } 6757 } 6758 6759 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6760 return MatchOperand_Success; 6761 } 6762 6763 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6764 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6765 } 6766 6767 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6768 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6769 } 6770 6771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6772 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6773 } 6774 6775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6776 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6777 } 6778 6779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6780 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6781 } 6782 6783 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6784 OptionalImmIndexMap OptionalIdx; 6785 6786 unsigned I = 1; 6787 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6788 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6789 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6790 } 6791 6792 int Fi = 0; 6793 for (unsigned E = Operands.size(); I != E; ++I) { 6794 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6795 MCOI::TIED_TO); 6796 if (TiedTo != -1) { 6797 assert((unsigned)TiedTo < Inst.getNumOperands()); 6798 // handle tied old or src2 for MAC instructions 6799 Inst.addOperand(Inst.getOperand(TiedTo)); 6800 } 6801 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6802 // Add the register arguments 6803 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6804 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6805 // Skip it. 6806 continue; 6807 } 6808 6809 if (IsDPP8) { 6810 if (Op.isDPP8()) { 6811 Op.addImmOperands(Inst, 1); 6812 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6813 Op.addRegWithFPInputModsOperands(Inst, 2); 6814 } else if (Op.isFI()) { 6815 Fi = Op.getImm(); 6816 } else if (Op.isReg()) { 6817 Op.addRegOperands(Inst, 1); 6818 } else { 6819 llvm_unreachable("Invalid operand type"); 6820 } 6821 } else { 6822 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6823 Op.addRegWithFPInputModsOperands(Inst, 2); 6824 } else if (Op.isDPPCtrl()) { 6825 Op.addImmOperands(Inst, 1); 6826 } else if (Op.isImm()) { 6827 // Handle optional arguments 6828 OptionalIdx[Op.getImmTy()] = I; 6829 } else { 6830 llvm_unreachable("Invalid operand type"); 6831 } 6832 } 6833 } 6834 6835 if (IsDPP8) { 6836 using namespace llvm::AMDGPU::DPP; 6837 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6838 } else { 6839 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6840 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6842 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6843 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6844 } 6845 } 6846 } 6847 6848 //===----------------------------------------------------------------------===// 6849 // sdwa 6850 //===----------------------------------------------------------------------===// 6851 6852 OperandMatchResultTy 6853 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6854 AMDGPUOperand::ImmTy Type) { 6855 using namespace llvm::AMDGPU::SDWA; 6856 6857 SMLoc S = Parser.getTok().getLoc(); 6858 StringRef Value; 6859 OperandMatchResultTy res; 6860 6861 res = parseStringWithPrefix(Prefix, Value); 6862 if (res != MatchOperand_Success) { 6863 return res; 6864 } 6865 6866 int64_t Int; 6867 Int = StringSwitch<int64_t>(Value) 6868 .Case("BYTE_0", SdwaSel::BYTE_0) 6869 .Case("BYTE_1", SdwaSel::BYTE_1) 6870 .Case("BYTE_2", SdwaSel::BYTE_2) 6871 .Case("BYTE_3", SdwaSel::BYTE_3) 6872 .Case("WORD_0", SdwaSel::WORD_0) 6873 .Case("WORD_1", SdwaSel::WORD_1) 6874 .Case("DWORD", SdwaSel::DWORD) 6875 .Default(0xffffffff); 6876 Parser.Lex(); // eat last token 6877 6878 if (Int == 0xffffffff) { 6879 return MatchOperand_ParseFail; 6880 } 6881 6882 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6883 return MatchOperand_Success; 6884 } 6885 6886 OperandMatchResultTy 6887 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6888 using namespace llvm::AMDGPU::SDWA; 6889 6890 SMLoc S = Parser.getTok().getLoc(); 6891 StringRef Value; 6892 OperandMatchResultTy res; 6893 6894 res = parseStringWithPrefix("dst_unused", Value); 6895 if (res != MatchOperand_Success) { 6896 return res; 6897 } 6898 6899 int64_t Int; 6900 Int = StringSwitch<int64_t>(Value) 6901 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6902 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6903 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6904 .Default(0xffffffff); 6905 Parser.Lex(); // eat last token 6906 6907 if (Int == 0xffffffff) { 6908 return MatchOperand_ParseFail; 6909 } 6910 6911 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6912 return MatchOperand_Success; 6913 } 6914 6915 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6916 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6917 } 6918 6919 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6920 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6921 } 6922 6923 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6924 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6925 } 6926 6927 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 6928 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 6929 } 6930 6931 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6932 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6933 } 6934 6935 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6936 uint64_t BasicInstType, 6937 bool SkipDstVcc, 6938 bool SkipSrcVcc) { 6939 using namespace llvm::AMDGPU::SDWA; 6940 6941 OptionalImmIndexMap OptionalIdx; 6942 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 6943 bool SkippedVcc = false; 6944 6945 unsigned I = 1; 6946 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6947 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6948 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6949 } 6950 6951 for (unsigned E = Operands.size(); I != E; ++I) { 6952 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6953 if (SkipVcc && !SkippedVcc && Op.isReg() && 6954 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6955 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6956 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6957 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6958 // Skip VCC only if we didn't skip it on previous iteration. 6959 // Note that src0 and src1 occupy 2 slots each because of modifiers. 6960 if (BasicInstType == SIInstrFlags::VOP2 && 6961 ((SkipDstVcc && Inst.getNumOperands() == 1) || 6962 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 6963 SkippedVcc = true; 6964 continue; 6965 } else if (BasicInstType == SIInstrFlags::VOPC && 6966 Inst.getNumOperands() == 0) { 6967 SkippedVcc = true; 6968 continue; 6969 } 6970 } 6971 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6972 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6973 } else if (Op.isImm()) { 6974 // Handle optional arguments 6975 OptionalIdx[Op.getImmTy()] = I; 6976 } else { 6977 llvm_unreachable("Invalid operand type"); 6978 } 6979 SkippedVcc = false; 6980 } 6981 6982 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6983 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6984 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6985 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6986 switch (BasicInstType) { 6987 case SIInstrFlags::VOP1: 6988 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6989 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6991 } 6992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6995 break; 6996 6997 case SIInstrFlags::VOP2: 6998 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6999 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7001 } 7002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7004 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7006 break; 7007 7008 case SIInstrFlags::VOPC: 7009 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7012 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7013 break; 7014 7015 default: 7016 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7017 } 7018 } 7019 7020 // special case v_mac_{f16, f32}: 7021 // it has src2 register operand that is tied to dst operand 7022 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7023 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7024 auto it = Inst.begin(); 7025 std::advance( 7026 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7027 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7028 } 7029 } 7030 7031 //===----------------------------------------------------------------------===// 7032 // mAI 7033 //===----------------------------------------------------------------------===// 7034 7035 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7036 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7037 } 7038 7039 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7040 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7041 } 7042 7043 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7044 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7045 } 7046 7047 /// Force static initialization. 7048 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7049 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7050 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7051 } 7052 7053 #define GET_REGISTER_MATCHER 7054 #define GET_MATCHER_IMPLEMENTATION 7055 #define GET_MNEMONIC_SPELL_CHECKER 7056 #include "AMDGPUGenAsmMatcher.inc" 7057 7058 // This fuction should be defined after auto-generated include so that we have 7059 // MatchClassKind enum defined 7060 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7061 unsigned Kind) { 7062 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7063 // But MatchInstructionImpl() expects to meet token and fails to validate 7064 // operand. This method checks if we are given immediate operand but expect to 7065 // get corresponding token. 7066 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7067 switch (Kind) { 7068 case MCK_addr64: 7069 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7070 case MCK_gds: 7071 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7072 case MCK_lds: 7073 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7074 case MCK_glc: 7075 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7076 case MCK_idxen: 7077 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7078 case MCK_offen: 7079 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7080 case MCK_SSrcB32: 7081 // When operands have expression values, they will return true for isToken, 7082 // because it is not possible to distinguish between a token and an 7083 // expression at parse time. MatchInstructionImpl() will always try to 7084 // match an operand as a token, when isToken returns true, and when the 7085 // name of the expression is not a valid token, the match will fail, 7086 // so we need to handle it here. 7087 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7088 case MCK_SSrcF32: 7089 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7090 case MCK_SoppBrTarget: 7091 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7092 case MCK_VReg32OrOff: 7093 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7094 case MCK_InterpSlot: 7095 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7096 case MCK_Attr: 7097 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7098 case MCK_AttrChan: 7099 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7100 case MCK_SReg_64: 7101 case MCK_SReg_64_XEXEC: 7102 // Null is defined as a 32-bit register but 7103 // it should also be enabled with 64-bit operands. 7104 // The following code enables it for SReg_64 operands 7105 // used as source and destination. Remaining source 7106 // operands are handled in isInlinableImm. 7107 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7108 default: 7109 return Match_InvalidOperand; 7110 } 7111 } 7112 7113 //===----------------------------------------------------------------------===// 7114 // endpgm 7115 //===----------------------------------------------------------------------===// 7116 7117 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7118 SMLoc S = Parser.getTok().getLoc(); 7119 int64_t Imm = 0; 7120 7121 if (!parseExpr(Imm)) { 7122 // The operand is optional, if not present default to 0 7123 Imm = 0; 7124 } 7125 7126 if (!isUInt<16>(Imm)) { 7127 Error(S, "expected a 16-bit value"); 7128 return MatchOperand_ParseFail; 7129 } 7130 7131 Operands.push_back( 7132 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7133 return MatchOperand_Success; 7134 } 7135 7136 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7137