1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcF16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 ImmTy getImmTy() const { 693 assert(isImm()); 694 return Imm.Type; 695 } 696 697 unsigned getReg() const override { 698 assert(isRegKind()); 699 return Reg.RegNo; 700 } 701 702 SMLoc getStartLoc() const override { 703 return StartLoc; 704 } 705 706 SMLoc getEndLoc() const override { 707 return EndLoc; 708 } 709 710 SMRange getLocRange() const { 711 return SMRange(StartLoc, EndLoc); 712 } 713 714 Modifiers getModifiers() const { 715 assert(isRegKind() || isImmTy(ImmTyNone)); 716 return isRegKind() ? Reg.Mods : Imm.Mods; 717 } 718 719 void setModifiers(Modifiers Mods) { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 if (isRegKind()) 722 Reg.Mods = Mods; 723 else 724 Imm.Mods = Mods; 725 } 726 727 bool hasModifiers() const { 728 return getModifiers().hasModifiers(); 729 } 730 731 bool hasFPModifiers() const { 732 return getModifiers().hasFPModifiers(); 733 } 734 735 bool hasIntModifiers() const { 736 return getModifiers().hasIntModifiers(); 737 } 738 739 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 740 741 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 742 743 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 744 745 template <unsigned Bitwidth> 746 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 747 748 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 749 addKImmFPOperands<16>(Inst, N); 750 } 751 752 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 753 addKImmFPOperands<32>(Inst, N); 754 } 755 756 void addRegOperands(MCInst &Inst, unsigned N) const; 757 758 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 759 addRegOperands(Inst, N); 760 } 761 762 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 763 if (isRegKind()) 764 addRegOperands(Inst, N); 765 else if (isExpr()) 766 Inst.addOperand(MCOperand::createExpr(Expr)); 767 else 768 addImmOperands(Inst, N); 769 } 770 771 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 772 Modifiers Mods = getModifiers(); 773 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 774 if (isRegKind()) { 775 addRegOperands(Inst, N); 776 } else { 777 addImmOperands(Inst, N, false); 778 } 779 } 780 781 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 782 assert(!hasIntModifiers()); 783 addRegOrImmWithInputModsOperands(Inst, N); 784 } 785 786 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasFPModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 792 Modifiers Mods = getModifiers(); 793 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 794 assert(isRegKind()); 795 addRegOperands(Inst, N); 796 } 797 798 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 799 assert(!hasIntModifiers()); 800 addRegWithInputModsOperands(Inst, N); 801 } 802 803 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasFPModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 809 if (isImm()) 810 addImmOperands(Inst, N); 811 else { 812 assert(isExpr()); 813 Inst.addOperand(MCOperand::createExpr(Expr)); 814 } 815 } 816 817 static void printImmTy(raw_ostream& OS, ImmTy Type) { 818 switch (Type) { 819 case ImmTyNone: OS << "None"; break; 820 case ImmTyGDS: OS << "GDS"; break; 821 case ImmTyLDS: OS << "LDS"; break; 822 case ImmTyOffen: OS << "Offen"; break; 823 case ImmTyIdxen: OS << "Idxen"; break; 824 case ImmTyAddr64: OS << "Addr64"; break; 825 case ImmTyOffset: OS << "Offset"; break; 826 case ImmTyInstOffset: OS << "InstOffset"; break; 827 case ImmTyOffset0: OS << "Offset0"; break; 828 case ImmTyOffset1: OS << "Offset1"; break; 829 case ImmTyDLC: OS << "DLC"; break; 830 case ImmTyGLC: OS << "GLC"; break; 831 case ImmTySLC: OS << "SLC"; break; 832 case ImmTySWZ: OS << "SWZ"; break; 833 case ImmTyTFE: OS << "TFE"; break; 834 case ImmTyD16: OS << "D16"; break; 835 case ImmTyFORMAT: OS << "FORMAT"; break; 836 case ImmTyClampSI: OS << "ClampSI"; break; 837 case ImmTyOModSI: OS << "OModSI"; break; 838 case ImmTyDPP8: OS << "DPP8"; break; 839 case ImmTyDppCtrl: OS << "DppCtrl"; break; 840 case ImmTyDppRowMask: OS << "DppRowMask"; break; 841 case ImmTyDppBankMask: OS << "DppBankMask"; break; 842 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 843 case ImmTyDppFi: OS << "FI"; break; 844 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 845 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 846 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 847 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 848 case ImmTyDMask: OS << "DMask"; break; 849 case ImmTyDim: OS << "Dim"; break; 850 case ImmTyUNorm: OS << "UNorm"; break; 851 case ImmTyDA: OS << "DA"; break; 852 case ImmTyR128A16: OS << "R128A16"; break; 853 case ImmTyA16: OS << "A16"; break; 854 case ImmTyLWE: OS << "LWE"; break; 855 case ImmTyOff: OS << "Off"; break; 856 case ImmTyExpTgt: OS << "ExpTgt"; break; 857 case ImmTyExpCompr: OS << "ExpCompr"; break; 858 case ImmTyExpVM: OS << "ExpVM"; break; 859 case ImmTyHwreg: OS << "Hwreg"; break; 860 case ImmTySendMsg: OS << "SendMsg"; break; 861 case ImmTyInterpSlot: OS << "InterpSlot"; break; 862 case ImmTyInterpAttr: OS << "InterpAttr"; break; 863 case ImmTyAttrChan: OS << "AttrChan"; break; 864 case ImmTyOpSel: OS << "OpSel"; break; 865 case ImmTyOpSelHi: OS << "OpSelHi"; break; 866 case ImmTyNegLo: OS << "NegLo"; break; 867 case ImmTyNegHi: OS << "NegHi"; break; 868 case ImmTySwizzle: OS << "Swizzle"; break; 869 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 870 case ImmTyHigh: OS << "High"; break; 871 case ImmTyBLGP: OS << "BLGP"; break; 872 case ImmTyCBSZ: OS << "CBSZ"; break; 873 case ImmTyABID: OS << "ABID"; break; 874 case ImmTyEndpgm: OS << "Endpgm"; break; 875 } 876 } 877 878 void print(raw_ostream &OS) const override { 879 switch (Kind) { 880 case Register: 881 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 882 break; 883 case Immediate: 884 OS << '<' << getImm(); 885 if (getImmTy() != ImmTyNone) { 886 OS << " type: "; printImmTy(OS, getImmTy()); 887 } 888 OS << " mods: " << Imm.Mods << '>'; 889 break; 890 case Token: 891 OS << '\'' << getToken() << '\''; 892 break; 893 case Expression: 894 OS << "<expr " << *Expr << '>'; 895 break; 896 } 897 } 898 899 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 900 int64_t Val, SMLoc Loc, 901 ImmTy Type = ImmTyNone, 902 bool IsFPImm = false) { 903 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 904 Op->Imm.Val = Val; 905 Op->Imm.IsFPImm = IsFPImm; 906 Op->Imm.Type = Type; 907 Op->Imm.Mods = Modifiers(); 908 Op->StartLoc = Loc; 909 Op->EndLoc = Loc; 910 return Op; 911 } 912 913 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 914 StringRef Str, SMLoc Loc, 915 bool HasExplicitEncodingSize = true) { 916 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 917 Res->Tok.Data = Str.data(); 918 Res->Tok.Length = Str.size(); 919 Res->StartLoc = Loc; 920 Res->EndLoc = Loc; 921 return Res; 922 } 923 924 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 925 unsigned RegNo, SMLoc S, 926 SMLoc E) { 927 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 928 Op->Reg.RegNo = RegNo; 929 Op->Reg.Mods = Modifiers(); 930 Op->StartLoc = S; 931 Op->EndLoc = E; 932 return Op; 933 } 934 935 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 936 const class MCExpr *Expr, SMLoc S) { 937 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 938 Op->Expr = Expr; 939 Op->StartLoc = S; 940 Op->EndLoc = S; 941 return Op; 942 } 943 }; 944 945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 946 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 947 return OS; 948 } 949 950 //===----------------------------------------------------------------------===// 951 // AsmParser 952 //===----------------------------------------------------------------------===// 953 954 // Holds info related to the current kernel, e.g. count of SGPRs used. 955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 956 // .amdgpu_hsa_kernel or at EOF. 957 class KernelScopeInfo { 958 int SgprIndexUnusedMin = -1; 959 int VgprIndexUnusedMin = -1; 960 MCContext *Ctx = nullptr; 961 962 void usesSgprAt(int i) { 963 if (i >= SgprIndexUnusedMin) { 964 SgprIndexUnusedMin = ++i; 965 if (Ctx) { 966 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 967 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 968 } 969 } 970 } 971 972 void usesVgprAt(int i) { 973 if (i >= VgprIndexUnusedMin) { 974 VgprIndexUnusedMin = ++i; 975 if (Ctx) { 976 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 977 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 978 } 979 } 980 } 981 982 public: 983 KernelScopeInfo() = default; 984 985 void initialize(MCContext &Context) { 986 Ctx = &Context; 987 usesSgprAt(SgprIndexUnusedMin = -1); 988 usesVgprAt(VgprIndexUnusedMin = -1); 989 } 990 991 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 992 switch (RegKind) { 993 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 994 case IS_AGPR: // fall through 995 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 996 default: break; 997 } 998 } 999 }; 1000 1001 class AMDGPUAsmParser : public MCTargetAsmParser { 1002 MCAsmParser &Parser; 1003 1004 // Number of extra operands parsed after the first optional operand. 1005 // This may be necessary to skip hardcoded mandatory operands. 1006 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1007 1008 unsigned ForcedEncodingSize = 0; 1009 bool ForcedDPP = false; 1010 bool ForcedSDWA = false; 1011 KernelScopeInfo KernelScope; 1012 1013 /// @name Auto-generated Match Functions 1014 /// { 1015 1016 #define GET_ASSEMBLER_HEADER 1017 #include "AMDGPUGenAsmMatcher.inc" 1018 1019 /// } 1020 1021 private: 1022 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1023 bool OutOfRangeError(SMRange Range); 1024 /// Calculate VGPR/SGPR blocks required for given target, reserved 1025 /// registers, and user-specified NextFreeXGPR values. 1026 /// 1027 /// \param Features [in] Target features, used for bug corrections. 1028 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1029 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1030 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1031 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1032 /// descriptor field, if valid. 1033 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1034 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1035 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1036 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1037 /// \param VGPRBlocks [out] Result VGPR block count. 1038 /// \param SGPRBlocks [out] Result SGPR block count. 1039 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1040 bool FlatScrUsed, bool XNACKUsed, 1041 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1042 SMRange VGPRRange, unsigned NextFreeSGPR, 1043 SMRange SGPRRange, unsigned &VGPRBlocks, 1044 unsigned &SGPRBlocks); 1045 bool ParseDirectiveAMDGCNTarget(); 1046 bool ParseDirectiveAMDHSAKernel(); 1047 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1048 bool ParseDirectiveHSACodeObjectVersion(); 1049 bool ParseDirectiveHSACodeObjectISA(); 1050 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1051 bool ParseDirectiveAMDKernelCodeT(); 1052 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1053 bool ParseDirectiveAMDGPUHsaKernel(); 1054 1055 bool ParseDirectiveISAVersion(); 1056 bool ParseDirectiveHSAMetadata(); 1057 bool ParseDirectivePALMetadataBegin(); 1058 bool ParseDirectivePALMetadata(); 1059 bool ParseDirectiveAMDGPULDS(); 1060 1061 /// Common code to parse out a block of text (typically YAML) between start and 1062 /// end directives. 1063 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1064 const char *AssemblerDirectiveEnd, 1065 std::string &CollectString); 1066 1067 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1068 RegisterKind RegKind, unsigned Reg1); 1069 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1070 unsigned &RegNum, unsigned &RegWidth, 1071 bool RestoreOnFailure = false); 1072 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1073 unsigned &RegNum, unsigned &RegWidth, 1074 SmallVectorImpl<AsmToken> &Tokens); 1075 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1076 unsigned &RegWidth, 1077 SmallVectorImpl<AsmToken> &Tokens); 1078 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1079 unsigned &RegWidth, 1080 SmallVectorImpl<AsmToken> &Tokens); 1081 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1082 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1083 bool ParseRegRange(unsigned& Num, unsigned& Width); 1084 unsigned getRegularReg(RegisterKind RegKind, 1085 unsigned RegNum, 1086 unsigned RegWidth); 1087 1088 bool isRegister(); 1089 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1090 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1091 void initializeGprCountSymbol(RegisterKind RegKind); 1092 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1093 unsigned RegWidth); 1094 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1095 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1096 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1097 bool IsGdsHardcoded); 1098 1099 public: 1100 enum AMDGPUMatchResultTy { 1101 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1102 }; 1103 enum OperandMode { 1104 OperandMode_Default, 1105 OperandMode_NSA, 1106 }; 1107 1108 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1109 1110 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1111 const MCInstrInfo &MII, 1112 const MCTargetOptions &Options) 1113 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1114 MCAsmParserExtension::Initialize(Parser); 1115 1116 if (getFeatureBits().none()) { 1117 // Set default features. 1118 copySTI().ToggleFeature("southern-islands"); 1119 } 1120 1121 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1122 1123 { 1124 // TODO: make those pre-defined variables read-only. 1125 // Currently there is none suitable machinery in the core llvm-mc for this. 1126 // MCSymbol::isRedefinable is intended for another purpose, and 1127 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1129 MCContext &Ctx = getContext(); 1130 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1131 MCSymbol *Sym = 1132 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1134 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1135 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1136 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1138 } else { 1139 MCSymbol *Sym = 1140 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1144 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1145 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1146 } 1147 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1148 initializeGprCountSymbol(IS_VGPR); 1149 initializeGprCountSymbol(IS_SGPR); 1150 } else 1151 KernelScope.initialize(getContext()); 1152 } 1153 } 1154 1155 bool hasXNACK() const { 1156 return AMDGPU::hasXNACK(getSTI()); 1157 } 1158 1159 bool hasMIMG_R128() const { 1160 return AMDGPU::hasMIMG_R128(getSTI()); 1161 } 1162 1163 bool hasPackedD16() const { 1164 return AMDGPU::hasPackedD16(getSTI()); 1165 } 1166 1167 bool hasGFX10A16() const { 1168 return AMDGPU::hasGFX10A16(getSTI()); 1169 } 1170 1171 bool isSI() const { 1172 return AMDGPU::isSI(getSTI()); 1173 } 1174 1175 bool isCI() const { 1176 return AMDGPU::isCI(getSTI()); 1177 } 1178 1179 bool isVI() const { 1180 return AMDGPU::isVI(getSTI()); 1181 } 1182 1183 bool isGFX9() const { 1184 return AMDGPU::isGFX9(getSTI()); 1185 } 1186 1187 bool isGFX10() const { 1188 return AMDGPU::isGFX10(getSTI()); 1189 } 1190 1191 bool hasInv2PiInlineImm() const { 1192 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1193 } 1194 1195 bool hasFlatOffsets() const { 1196 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1197 } 1198 1199 bool hasSGPR102_SGPR103() const { 1200 return !isVI() && !isGFX9(); 1201 } 1202 1203 bool hasSGPR104_SGPR105() const { 1204 return isGFX10(); 1205 } 1206 1207 bool hasIntClamp() const { 1208 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1209 } 1210 1211 AMDGPUTargetStreamer &getTargetStreamer() { 1212 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1213 return static_cast<AMDGPUTargetStreamer &>(TS); 1214 } 1215 1216 const MCRegisterInfo *getMRI() const { 1217 // We need this const_cast because for some reason getContext() is not const 1218 // in MCAsmParser. 1219 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1220 } 1221 1222 const MCInstrInfo *getMII() const { 1223 return &MII; 1224 } 1225 1226 const FeatureBitset &getFeatureBits() const { 1227 return getSTI().getFeatureBits(); 1228 } 1229 1230 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1231 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1232 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1233 1234 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1235 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1236 bool isForcedDPP() const { return ForcedDPP; } 1237 bool isForcedSDWA() const { return ForcedSDWA; } 1238 ArrayRef<unsigned> getMatchedVariants() const; 1239 1240 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1241 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1242 bool RestoreOnFailure); 1243 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1244 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1245 SMLoc &EndLoc) override; 1246 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1247 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1248 unsigned Kind) override; 1249 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1250 OperandVector &Operands, MCStreamer &Out, 1251 uint64_t &ErrorInfo, 1252 bool MatchingInlineAsm) override; 1253 bool ParseDirective(AsmToken DirectiveID) override; 1254 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1255 OperandMode Mode = OperandMode_Default); 1256 StringRef parseMnemonicSuffix(StringRef Name); 1257 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1258 SMLoc NameLoc, OperandVector &Operands) override; 1259 //bool ProcessInstruction(MCInst &Inst); 1260 1261 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1262 1263 OperandMatchResultTy 1264 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1265 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1266 bool (*ConvertResult)(int64_t &) = nullptr); 1267 1268 OperandMatchResultTy 1269 parseOperandArrayWithPrefix(const char *Prefix, 1270 OperandVector &Operands, 1271 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1272 bool (*ConvertResult)(int64_t&) = nullptr); 1273 1274 OperandMatchResultTy 1275 parseNamedBit(const char *Name, OperandVector &Operands, 1276 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1277 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1278 StringRef &Value); 1279 1280 bool isModifier(); 1281 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1282 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1283 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1284 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1285 bool parseSP3NegModifier(); 1286 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1287 OperandMatchResultTy parseReg(OperandVector &Operands); 1288 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1289 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1290 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1291 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1292 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1293 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1294 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1295 1296 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1297 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1298 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1299 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1300 1301 bool parseCnt(int64_t &IntVal); 1302 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1303 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1304 1305 private: 1306 struct OperandInfoTy { 1307 int64_t Id; 1308 bool IsSymbolic = false; 1309 bool IsDefined = false; 1310 1311 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1312 }; 1313 1314 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1315 bool validateSendMsg(const OperandInfoTy &Msg, 1316 const OperandInfoTy &Op, 1317 const OperandInfoTy &Stream, 1318 const SMLoc Loc); 1319 1320 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1321 bool validateHwreg(const OperandInfoTy &HwReg, 1322 const int64_t Offset, 1323 const int64_t Width, 1324 const SMLoc Loc); 1325 1326 void errorExpTgt(); 1327 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1328 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1329 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1330 1331 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1332 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1333 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1334 bool validateSOPLiteral(const MCInst &Inst) const; 1335 bool validateConstantBusLimitations(const MCInst &Inst); 1336 bool validateEarlyClobberLimitations(const MCInst &Inst); 1337 bool validateIntClampSupported(const MCInst &Inst); 1338 bool validateMIMGAtomicDMask(const MCInst &Inst); 1339 bool validateMIMGGatherDMask(const MCInst &Inst); 1340 bool validateMovrels(const MCInst &Inst); 1341 bool validateMIMGDataSize(const MCInst &Inst); 1342 bool validateMIMGAddrSize(const MCInst &Inst); 1343 bool validateMIMGD16(const MCInst &Inst); 1344 bool validateMIMGDim(const MCInst &Inst); 1345 bool validateLdsDirect(const MCInst &Inst); 1346 bool validateOpSel(const MCInst &Inst); 1347 bool validateVccOperand(unsigned Reg) const; 1348 bool validateVOP3Literal(const MCInst &Inst) const; 1349 unsigned getConstantBusLimit(unsigned Opcode) const; 1350 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1351 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1352 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1353 1354 bool isId(const StringRef Id) const; 1355 bool isId(const AsmToken &Token, const StringRef Id) const; 1356 bool isToken(const AsmToken::TokenKind Kind) const; 1357 bool trySkipId(const StringRef Id); 1358 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1359 bool trySkipToken(const AsmToken::TokenKind Kind); 1360 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1361 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1362 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1363 AsmToken::TokenKind getTokenKind() const; 1364 bool parseExpr(int64_t &Imm); 1365 bool parseExpr(OperandVector &Operands); 1366 StringRef getTokenStr() const; 1367 AsmToken peekToken(); 1368 AsmToken getToken() const; 1369 SMLoc getLoc() const; 1370 void lex(); 1371 1372 public: 1373 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1374 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1375 1376 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1377 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1378 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1379 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1380 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1381 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1382 1383 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1384 const unsigned MinVal, 1385 const unsigned MaxVal, 1386 const StringRef ErrMsg); 1387 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1388 bool parseSwizzleOffset(int64_t &Imm); 1389 bool parseSwizzleMacro(int64_t &Imm); 1390 bool parseSwizzleQuadPerm(int64_t &Imm); 1391 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1392 bool parseSwizzleBroadcast(int64_t &Imm); 1393 bool parseSwizzleSwap(int64_t &Imm); 1394 bool parseSwizzleReverse(int64_t &Imm); 1395 1396 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1397 int64_t parseGPRIdxMacro(); 1398 1399 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1400 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1401 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1402 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1403 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1404 1405 AMDGPUOperand::Ptr defaultDLC() const; 1406 AMDGPUOperand::Ptr defaultGLC() const; 1407 AMDGPUOperand::Ptr defaultSLC() const; 1408 1409 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1410 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1411 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1412 AMDGPUOperand::Ptr defaultFlatOffset() const; 1413 1414 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1415 1416 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1417 OptionalImmIndexMap &OptionalIdx); 1418 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1419 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1420 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1421 1422 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1423 1424 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1425 bool IsAtomic = false); 1426 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1427 1428 OperandMatchResultTy parseDim(OperandVector &Operands); 1429 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1430 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1431 AMDGPUOperand::Ptr defaultRowMask() const; 1432 AMDGPUOperand::Ptr defaultBankMask() const; 1433 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1434 AMDGPUOperand::Ptr defaultFI() const; 1435 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1436 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1437 1438 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1439 AMDGPUOperand::ImmTy Type); 1440 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1441 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1442 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1443 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1444 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1445 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1446 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1447 uint64_t BasicInstType, 1448 bool SkipDstVcc = false, 1449 bool SkipSrcVcc = false); 1450 1451 AMDGPUOperand::Ptr defaultBLGP() const; 1452 AMDGPUOperand::Ptr defaultCBSZ() const; 1453 AMDGPUOperand::Ptr defaultABID() const; 1454 1455 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1456 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1457 }; 1458 1459 struct OptionalOperand { 1460 const char *Name; 1461 AMDGPUOperand::ImmTy Type; 1462 bool IsBit; 1463 bool (*ConvertResult)(int64_t&); 1464 }; 1465 1466 } // end anonymous namespace 1467 1468 // May be called with integer type with equivalent bitwidth. 1469 static const fltSemantics *getFltSemantics(unsigned Size) { 1470 switch (Size) { 1471 case 4: 1472 return &APFloat::IEEEsingle(); 1473 case 8: 1474 return &APFloat::IEEEdouble(); 1475 case 2: 1476 return &APFloat::IEEEhalf(); 1477 default: 1478 llvm_unreachable("unsupported fp type"); 1479 } 1480 } 1481 1482 static const fltSemantics *getFltSemantics(MVT VT) { 1483 return getFltSemantics(VT.getSizeInBits() / 8); 1484 } 1485 1486 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1487 switch (OperandType) { 1488 case AMDGPU::OPERAND_REG_IMM_INT32: 1489 case AMDGPU::OPERAND_REG_IMM_FP32: 1490 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1491 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1492 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1493 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1494 return &APFloat::IEEEsingle(); 1495 case AMDGPU::OPERAND_REG_IMM_INT64: 1496 case AMDGPU::OPERAND_REG_IMM_FP64: 1497 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1498 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1499 return &APFloat::IEEEdouble(); 1500 case AMDGPU::OPERAND_REG_IMM_INT16: 1501 case AMDGPU::OPERAND_REG_IMM_FP16: 1502 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1503 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1504 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1505 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1506 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1507 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1508 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1509 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1510 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1511 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1512 return &APFloat::IEEEhalf(); 1513 default: 1514 llvm_unreachable("unsupported fp type"); 1515 } 1516 } 1517 1518 //===----------------------------------------------------------------------===// 1519 // Operand 1520 //===----------------------------------------------------------------------===// 1521 1522 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1523 bool Lost; 1524 1525 // Convert literal to single precision 1526 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1527 APFloat::rmNearestTiesToEven, 1528 &Lost); 1529 // We allow precision lost but not overflow or underflow 1530 if (Status != APFloat::opOK && 1531 Lost && 1532 ((Status & APFloat::opOverflow) != 0 || 1533 (Status & APFloat::opUnderflow) != 0)) { 1534 return false; 1535 } 1536 1537 return true; 1538 } 1539 1540 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1541 return isUIntN(Size, Val) || isIntN(Size, Val); 1542 } 1543 1544 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1545 1546 // This is a hack to enable named inline values like 1547 // shared_base with both 32-bit and 64-bit operands. 1548 // Note that these values are defined as 1549 // 32-bit operands only. 1550 if (isInlineValue()) { 1551 return true; 1552 } 1553 1554 if (!isImmTy(ImmTyNone)) { 1555 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1556 return false; 1557 } 1558 // TODO: We should avoid using host float here. It would be better to 1559 // check the float bit values which is what a few other places do. 1560 // We've had bot failures before due to weird NaN support on mips hosts. 1561 1562 APInt Literal(64, Imm.Val); 1563 1564 if (Imm.IsFPImm) { // We got fp literal token 1565 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1566 return AMDGPU::isInlinableLiteral64(Imm.Val, 1567 AsmParser->hasInv2PiInlineImm()); 1568 } 1569 1570 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1571 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1572 return false; 1573 1574 if (type.getScalarSizeInBits() == 16) { 1575 return AMDGPU::isInlinableLiteral16( 1576 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1577 AsmParser->hasInv2PiInlineImm()); 1578 } 1579 1580 // Check if single precision literal is inlinable 1581 return AMDGPU::isInlinableLiteral32( 1582 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1583 AsmParser->hasInv2PiInlineImm()); 1584 } 1585 1586 // We got int literal token. 1587 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1588 return AMDGPU::isInlinableLiteral64(Imm.Val, 1589 AsmParser->hasInv2PiInlineImm()); 1590 } 1591 1592 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1593 return false; 1594 } 1595 1596 if (type.getScalarSizeInBits() == 16) { 1597 return AMDGPU::isInlinableLiteral16( 1598 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1599 AsmParser->hasInv2PiInlineImm()); 1600 } 1601 1602 return AMDGPU::isInlinableLiteral32( 1603 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1604 AsmParser->hasInv2PiInlineImm()); 1605 } 1606 1607 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1608 // Check that this immediate can be added as literal 1609 if (!isImmTy(ImmTyNone)) { 1610 return false; 1611 } 1612 1613 if (!Imm.IsFPImm) { 1614 // We got int literal token. 1615 1616 if (type == MVT::f64 && hasFPModifiers()) { 1617 // Cannot apply fp modifiers to int literals preserving the same semantics 1618 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1619 // disable these cases. 1620 return false; 1621 } 1622 1623 unsigned Size = type.getSizeInBits(); 1624 if (Size == 64) 1625 Size = 32; 1626 1627 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1628 // types. 1629 return isSafeTruncation(Imm.Val, Size); 1630 } 1631 1632 // We got fp literal token 1633 if (type == MVT::f64) { // Expected 64-bit fp operand 1634 // We would set low 64-bits of literal to zeroes but we accept this literals 1635 return true; 1636 } 1637 1638 if (type == MVT::i64) { // Expected 64-bit int operand 1639 // We don't allow fp literals in 64-bit integer instructions. It is 1640 // unclear how we should encode them. 1641 return false; 1642 } 1643 1644 // We allow fp literals with f16x2 operands assuming that the specified 1645 // literal goes into the lower half and the upper half is zero. We also 1646 // require that the literal may be losslesly converted to f16. 1647 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1648 (type == MVT::v2i16)? MVT::i16 : type; 1649 1650 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1651 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1652 } 1653 1654 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1655 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1656 } 1657 1658 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1659 if (AsmParser->isVI()) 1660 return isVReg32(); 1661 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1662 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1663 else 1664 return false; 1665 } 1666 1667 bool AMDGPUOperand::isSDWAFP16Operand() const { 1668 return isSDWAOperand(MVT::f16); 1669 } 1670 1671 bool AMDGPUOperand::isSDWAFP32Operand() const { 1672 return isSDWAOperand(MVT::f32); 1673 } 1674 1675 bool AMDGPUOperand::isSDWAInt16Operand() const { 1676 return isSDWAOperand(MVT::i16); 1677 } 1678 1679 bool AMDGPUOperand::isSDWAInt32Operand() const { 1680 return isSDWAOperand(MVT::i32); 1681 } 1682 1683 bool AMDGPUOperand::isBoolReg() const { 1684 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1685 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1686 } 1687 1688 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1689 { 1690 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1691 assert(Size == 2 || Size == 4 || Size == 8); 1692 1693 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1694 1695 if (Imm.Mods.Abs) { 1696 Val &= ~FpSignMask; 1697 } 1698 if (Imm.Mods.Neg) { 1699 Val ^= FpSignMask; 1700 } 1701 1702 return Val; 1703 } 1704 1705 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1706 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1707 Inst.getNumOperands())) { 1708 addLiteralImmOperand(Inst, Imm.Val, 1709 ApplyModifiers & 1710 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1711 } else { 1712 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1713 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1714 } 1715 } 1716 1717 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1718 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1719 auto OpNum = Inst.getNumOperands(); 1720 // Check that this operand accepts literals 1721 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1722 1723 if (ApplyModifiers) { 1724 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1725 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1726 Val = applyInputFPModifiers(Val, Size); 1727 } 1728 1729 APInt Literal(64, Val); 1730 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1731 1732 if (Imm.IsFPImm) { // We got fp literal token 1733 switch (OpTy) { 1734 case AMDGPU::OPERAND_REG_IMM_INT64: 1735 case AMDGPU::OPERAND_REG_IMM_FP64: 1736 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1737 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1738 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1739 AsmParser->hasInv2PiInlineImm())) { 1740 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1741 return; 1742 } 1743 1744 // Non-inlineable 1745 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1746 // For fp operands we check if low 32 bits are zeros 1747 if (Literal.getLoBits(32) != 0) { 1748 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1749 "Can't encode literal as exact 64-bit floating-point operand. " 1750 "Low 32-bits will be set to zero"); 1751 } 1752 1753 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1754 return; 1755 } 1756 1757 // We don't allow fp literals in 64-bit integer instructions. It is 1758 // unclear how we should encode them. This case should be checked earlier 1759 // in predicate methods (isLiteralImm()) 1760 llvm_unreachable("fp literal in 64-bit integer instruction."); 1761 1762 case AMDGPU::OPERAND_REG_IMM_INT32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32: 1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1768 case AMDGPU::OPERAND_REG_IMM_INT16: 1769 case AMDGPU::OPERAND_REG_IMM_FP16: 1770 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1771 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1772 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1773 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1774 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1775 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1776 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1777 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1778 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1779 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1780 bool lost; 1781 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1782 // Convert literal to single precision 1783 FPLiteral.convert(*getOpFltSemantics(OpTy), 1784 APFloat::rmNearestTiesToEven, &lost); 1785 // We allow precision lost but not overflow or underflow. This should be 1786 // checked earlier in isLiteralImm() 1787 1788 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1789 Inst.addOperand(MCOperand::createImm(ImmVal)); 1790 return; 1791 } 1792 default: 1793 llvm_unreachable("invalid operand size"); 1794 } 1795 1796 return; 1797 } 1798 1799 // We got int literal token. 1800 // Only sign extend inline immediates. 1801 switch (OpTy) { 1802 case AMDGPU::OPERAND_REG_IMM_INT32: 1803 case AMDGPU::OPERAND_REG_IMM_FP32: 1804 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1805 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1810 if (isSafeTruncation(Val, 32) && 1811 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1812 AsmParser->hasInv2PiInlineImm())) { 1813 Inst.addOperand(MCOperand::createImm(Val)); 1814 return; 1815 } 1816 1817 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1818 return; 1819 1820 case AMDGPU::OPERAND_REG_IMM_INT64: 1821 case AMDGPU::OPERAND_REG_IMM_FP64: 1822 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1823 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1824 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1825 Inst.addOperand(MCOperand::createImm(Val)); 1826 return; 1827 } 1828 1829 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1830 return; 1831 1832 case AMDGPU::OPERAND_REG_IMM_INT16: 1833 case AMDGPU::OPERAND_REG_IMM_FP16: 1834 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1835 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1836 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1838 if (isSafeTruncation(Val, 16) && 1839 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1840 AsmParser->hasInv2PiInlineImm())) { 1841 Inst.addOperand(MCOperand::createImm(Val)); 1842 return; 1843 } 1844 1845 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1846 return; 1847 1848 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1849 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1852 assert(isSafeTruncation(Val, 16)); 1853 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1854 AsmParser->hasInv2PiInlineImm())); 1855 1856 Inst.addOperand(MCOperand::createImm(Val)); 1857 return; 1858 } 1859 default: 1860 llvm_unreachable("invalid operand size"); 1861 } 1862 } 1863 1864 template <unsigned Bitwidth> 1865 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1866 APInt Literal(64, Imm.Val); 1867 1868 if (!Imm.IsFPImm) { 1869 // We got int literal token. 1870 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1871 return; 1872 } 1873 1874 bool Lost; 1875 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1876 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1877 APFloat::rmNearestTiesToEven, &Lost); 1878 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1879 } 1880 1881 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1882 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1883 } 1884 1885 static bool isInlineValue(unsigned Reg) { 1886 switch (Reg) { 1887 case AMDGPU::SRC_SHARED_BASE: 1888 case AMDGPU::SRC_SHARED_LIMIT: 1889 case AMDGPU::SRC_PRIVATE_BASE: 1890 case AMDGPU::SRC_PRIVATE_LIMIT: 1891 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1892 return true; 1893 case AMDGPU::SRC_VCCZ: 1894 case AMDGPU::SRC_EXECZ: 1895 case AMDGPU::SRC_SCC: 1896 return true; 1897 case AMDGPU::SGPR_NULL: 1898 return true; 1899 default: 1900 return false; 1901 } 1902 } 1903 1904 bool AMDGPUOperand::isInlineValue() const { 1905 return isRegKind() && ::isInlineValue(getReg()); 1906 } 1907 1908 //===----------------------------------------------------------------------===// 1909 // AsmParser 1910 //===----------------------------------------------------------------------===// 1911 1912 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1913 if (Is == IS_VGPR) { 1914 switch (RegWidth) { 1915 default: return -1; 1916 case 1: return AMDGPU::VGPR_32RegClassID; 1917 case 2: return AMDGPU::VReg_64RegClassID; 1918 case 3: return AMDGPU::VReg_96RegClassID; 1919 case 4: return AMDGPU::VReg_128RegClassID; 1920 case 5: return AMDGPU::VReg_160RegClassID; 1921 case 6: return AMDGPU::VReg_192RegClassID; 1922 case 8: return AMDGPU::VReg_256RegClassID; 1923 case 16: return AMDGPU::VReg_512RegClassID; 1924 case 32: return AMDGPU::VReg_1024RegClassID; 1925 } 1926 } else if (Is == IS_TTMP) { 1927 switch (RegWidth) { 1928 default: return -1; 1929 case 1: return AMDGPU::TTMP_32RegClassID; 1930 case 2: return AMDGPU::TTMP_64RegClassID; 1931 case 4: return AMDGPU::TTMP_128RegClassID; 1932 case 8: return AMDGPU::TTMP_256RegClassID; 1933 case 16: return AMDGPU::TTMP_512RegClassID; 1934 } 1935 } else if (Is == IS_SGPR) { 1936 switch (RegWidth) { 1937 default: return -1; 1938 case 1: return AMDGPU::SGPR_32RegClassID; 1939 case 2: return AMDGPU::SGPR_64RegClassID; 1940 case 3: return AMDGPU::SGPR_96RegClassID; 1941 case 4: return AMDGPU::SGPR_128RegClassID; 1942 case 5: return AMDGPU::SGPR_160RegClassID; 1943 case 6: return AMDGPU::SGPR_192RegClassID; 1944 case 8: return AMDGPU::SGPR_256RegClassID; 1945 case 16: return AMDGPU::SGPR_512RegClassID; 1946 } 1947 } else if (Is == IS_AGPR) { 1948 switch (RegWidth) { 1949 default: return -1; 1950 case 1: return AMDGPU::AGPR_32RegClassID; 1951 case 2: return AMDGPU::AReg_64RegClassID; 1952 case 3: return AMDGPU::AReg_96RegClassID; 1953 case 4: return AMDGPU::AReg_128RegClassID; 1954 case 5: return AMDGPU::AReg_160RegClassID; 1955 case 6: return AMDGPU::AReg_192RegClassID; 1956 case 8: return AMDGPU::AReg_256RegClassID; 1957 case 16: return AMDGPU::AReg_512RegClassID; 1958 case 32: return AMDGPU::AReg_1024RegClassID; 1959 } 1960 } 1961 return -1; 1962 } 1963 1964 static unsigned getSpecialRegForName(StringRef RegName) { 1965 return StringSwitch<unsigned>(RegName) 1966 .Case("exec", AMDGPU::EXEC) 1967 .Case("vcc", AMDGPU::VCC) 1968 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1969 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1970 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1971 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1972 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1973 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1974 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1975 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1976 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1977 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1978 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1979 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1980 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1981 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1982 .Case("m0", AMDGPU::M0) 1983 .Case("vccz", AMDGPU::SRC_VCCZ) 1984 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1985 .Case("execz", AMDGPU::SRC_EXECZ) 1986 .Case("src_execz", AMDGPU::SRC_EXECZ) 1987 .Case("scc", AMDGPU::SRC_SCC) 1988 .Case("src_scc", AMDGPU::SRC_SCC) 1989 .Case("tba", AMDGPU::TBA) 1990 .Case("tma", AMDGPU::TMA) 1991 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1992 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1993 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1994 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1995 .Case("vcc_lo", AMDGPU::VCC_LO) 1996 .Case("vcc_hi", AMDGPU::VCC_HI) 1997 .Case("exec_lo", AMDGPU::EXEC_LO) 1998 .Case("exec_hi", AMDGPU::EXEC_HI) 1999 .Case("tma_lo", AMDGPU::TMA_LO) 2000 .Case("tma_hi", AMDGPU::TMA_HI) 2001 .Case("tba_lo", AMDGPU::TBA_LO) 2002 .Case("tba_hi", AMDGPU::TBA_HI) 2003 .Case("pc", AMDGPU::PC_REG) 2004 .Case("null", AMDGPU::SGPR_NULL) 2005 .Default(AMDGPU::NoRegister); 2006 } 2007 2008 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2009 SMLoc &EndLoc, bool RestoreOnFailure) { 2010 auto R = parseRegister(); 2011 if (!R) return true; 2012 assert(R->isReg()); 2013 RegNo = R->getReg(); 2014 StartLoc = R->getStartLoc(); 2015 EndLoc = R->getEndLoc(); 2016 return false; 2017 } 2018 2019 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2020 SMLoc &EndLoc) { 2021 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2022 } 2023 2024 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2025 SMLoc &StartLoc, 2026 SMLoc &EndLoc) { 2027 bool Result = 2028 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2029 bool PendingErrors = getParser().hasPendingError(); 2030 getParser().clearPendingErrors(); 2031 if (PendingErrors) 2032 return MatchOperand_ParseFail; 2033 if (Result) 2034 return MatchOperand_NoMatch; 2035 return MatchOperand_Success; 2036 } 2037 2038 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2039 RegisterKind RegKind, unsigned Reg1) { 2040 switch (RegKind) { 2041 case IS_SPECIAL: 2042 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2043 Reg = AMDGPU::EXEC; 2044 RegWidth = 2; 2045 return true; 2046 } 2047 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2048 Reg = AMDGPU::FLAT_SCR; 2049 RegWidth = 2; 2050 return true; 2051 } 2052 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2053 Reg = AMDGPU::XNACK_MASK; 2054 RegWidth = 2; 2055 return true; 2056 } 2057 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2058 Reg = AMDGPU::VCC; 2059 RegWidth = 2; 2060 return true; 2061 } 2062 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2063 Reg = AMDGPU::TBA; 2064 RegWidth = 2; 2065 return true; 2066 } 2067 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2068 Reg = AMDGPU::TMA; 2069 RegWidth = 2; 2070 return true; 2071 } 2072 return false; 2073 case IS_VGPR: 2074 case IS_SGPR: 2075 case IS_AGPR: 2076 case IS_TTMP: 2077 if (Reg1 != Reg + RegWidth) { 2078 return false; 2079 } 2080 RegWidth++; 2081 return true; 2082 default: 2083 llvm_unreachable("unexpected register kind"); 2084 } 2085 } 2086 2087 struct RegInfo { 2088 StringLiteral Name; 2089 RegisterKind Kind; 2090 }; 2091 2092 static constexpr RegInfo RegularRegisters[] = { 2093 {{"v"}, IS_VGPR}, 2094 {{"s"}, IS_SGPR}, 2095 {{"ttmp"}, IS_TTMP}, 2096 {{"acc"}, IS_AGPR}, 2097 {{"a"}, IS_AGPR}, 2098 }; 2099 2100 static bool isRegularReg(RegisterKind Kind) { 2101 return Kind == IS_VGPR || 2102 Kind == IS_SGPR || 2103 Kind == IS_TTMP || 2104 Kind == IS_AGPR; 2105 } 2106 2107 static const RegInfo* getRegularRegInfo(StringRef Str) { 2108 for (const RegInfo &Reg : RegularRegisters) 2109 if (Str.startswith(Reg.Name)) 2110 return &Reg; 2111 return nullptr; 2112 } 2113 2114 static bool getRegNum(StringRef Str, unsigned& Num) { 2115 return !Str.getAsInteger(10, Num); 2116 } 2117 2118 bool 2119 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2120 const AsmToken &NextToken) const { 2121 2122 // A list of consecutive registers: [s0,s1,s2,s3] 2123 if (Token.is(AsmToken::LBrac)) 2124 return true; 2125 2126 if (!Token.is(AsmToken::Identifier)) 2127 return false; 2128 2129 // A single register like s0 or a range of registers like s[0:1] 2130 2131 StringRef Str = Token.getString(); 2132 const RegInfo *Reg = getRegularRegInfo(Str); 2133 if (Reg) { 2134 StringRef RegName = Reg->Name; 2135 StringRef RegSuffix = Str.substr(RegName.size()); 2136 if (!RegSuffix.empty()) { 2137 unsigned Num; 2138 // A single register with an index: rXX 2139 if (getRegNum(RegSuffix, Num)) 2140 return true; 2141 } else { 2142 // A range of registers: r[XX:YY]. 2143 if (NextToken.is(AsmToken::LBrac)) 2144 return true; 2145 } 2146 } 2147 2148 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2149 } 2150 2151 bool 2152 AMDGPUAsmParser::isRegister() 2153 { 2154 return isRegister(getToken(), peekToken()); 2155 } 2156 2157 unsigned 2158 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2159 unsigned RegNum, 2160 unsigned RegWidth) { 2161 2162 assert(isRegularReg(RegKind)); 2163 2164 unsigned AlignSize = 1; 2165 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2166 // SGPR and TTMP registers must be aligned. 2167 // Max required alignment is 4 dwords. 2168 AlignSize = std::min(RegWidth, 4u); 2169 } 2170 2171 if (RegNum % AlignSize != 0) 2172 return AMDGPU::NoRegister; 2173 2174 unsigned RegIdx = RegNum / AlignSize; 2175 int RCID = getRegClass(RegKind, RegWidth); 2176 if (RCID == -1) 2177 return AMDGPU::NoRegister; 2178 2179 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2180 const MCRegisterClass RC = TRI->getRegClass(RCID); 2181 if (RegIdx >= RC.getNumRegs()) 2182 return AMDGPU::NoRegister; 2183 2184 return RC.getRegister(RegIdx); 2185 } 2186 2187 bool 2188 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2189 int64_t RegLo, RegHi; 2190 if (!trySkipToken(AsmToken::LBrac)) 2191 return false; 2192 2193 if (!parseExpr(RegLo)) 2194 return false; 2195 2196 if (trySkipToken(AsmToken::Colon)) { 2197 if (!parseExpr(RegHi)) 2198 return false; 2199 } else { 2200 RegHi = RegLo; 2201 } 2202 2203 if (!trySkipToken(AsmToken::RBrac)) 2204 return false; 2205 2206 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2207 return false; 2208 2209 Num = static_cast<unsigned>(RegLo); 2210 Width = (RegHi - RegLo) + 1; 2211 return true; 2212 } 2213 2214 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2215 unsigned &RegNum, unsigned &RegWidth, 2216 SmallVectorImpl<AsmToken> &Tokens) { 2217 assert(isToken(AsmToken::Identifier)); 2218 unsigned Reg = getSpecialRegForName(getTokenStr()); 2219 if (Reg) { 2220 RegNum = 0; 2221 RegWidth = 1; 2222 RegKind = IS_SPECIAL; 2223 Tokens.push_back(getToken()); 2224 lex(); // skip register name 2225 } 2226 return Reg; 2227 } 2228 2229 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2230 unsigned &RegNum, unsigned &RegWidth, 2231 SmallVectorImpl<AsmToken> &Tokens) { 2232 assert(isToken(AsmToken::Identifier)); 2233 StringRef RegName = getTokenStr(); 2234 2235 const RegInfo *RI = getRegularRegInfo(RegName); 2236 if (!RI) 2237 return AMDGPU::NoRegister; 2238 Tokens.push_back(getToken()); 2239 lex(); // skip register name 2240 2241 RegKind = RI->Kind; 2242 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2243 if (!RegSuffix.empty()) { 2244 // Single 32-bit register: vXX. 2245 if (!getRegNum(RegSuffix, RegNum)) 2246 return AMDGPU::NoRegister; 2247 RegWidth = 1; 2248 } else { 2249 // Range of registers: v[XX:YY]. ":YY" is optional. 2250 if (!ParseRegRange(RegNum, RegWidth)) 2251 return AMDGPU::NoRegister; 2252 } 2253 2254 return getRegularReg(RegKind, RegNum, RegWidth); 2255 } 2256 2257 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2258 unsigned &RegWidth, 2259 SmallVectorImpl<AsmToken> &Tokens) { 2260 unsigned Reg = AMDGPU::NoRegister; 2261 2262 if (!trySkipToken(AsmToken::LBrac)) 2263 return AMDGPU::NoRegister; 2264 2265 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2266 2267 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2268 return AMDGPU::NoRegister; 2269 if (RegWidth != 1) 2270 return AMDGPU::NoRegister; 2271 2272 for (; trySkipToken(AsmToken::Comma); ) { 2273 RegisterKind NextRegKind; 2274 unsigned NextReg, NextRegNum, NextRegWidth; 2275 2276 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2277 Tokens)) 2278 return AMDGPU::NoRegister; 2279 if (NextRegWidth != 1) 2280 return AMDGPU::NoRegister; 2281 if (NextRegKind != RegKind) 2282 return AMDGPU::NoRegister; 2283 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2284 return AMDGPU::NoRegister; 2285 } 2286 2287 if (!trySkipToken(AsmToken::RBrac)) 2288 return AMDGPU::NoRegister; 2289 2290 if (isRegularReg(RegKind)) 2291 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2292 2293 return Reg; 2294 } 2295 2296 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2297 unsigned &RegNum, unsigned &RegWidth, 2298 SmallVectorImpl<AsmToken> &Tokens) { 2299 Reg = AMDGPU::NoRegister; 2300 2301 if (isToken(AsmToken::Identifier)) { 2302 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2303 if (Reg == AMDGPU::NoRegister) 2304 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2305 } else { 2306 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2307 } 2308 2309 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2310 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2311 } 2312 2313 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2314 unsigned &RegNum, unsigned &RegWidth, 2315 bool RestoreOnFailure) { 2316 Reg = AMDGPU::NoRegister; 2317 2318 SmallVector<AsmToken, 1> Tokens; 2319 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2320 if (RestoreOnFailure) { 2321 while (!Tokens.empty()) { 2322 getLexer().UnLex(Tokens.pop_back_val()); 2323 } 2324 } 2325 return true; 2326 } 2327 return false; 2328 } 2329 2330 Optional<StringRef> 2331 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2332 switch (RegKind) { 2333 case IS_VGPR: 2334 return StringRef(".amdgcn.next_free_vgpr"); 2335 case IS_SGPR: 2336 return StringRef(".amdgcn.next_free_sgpr"); 2337 default: 2338 return None; 2339 } 2340 } 2341 2342 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2343 auto SymbolName = getGprCountSymbolName(RegKind); 2344 assert(SymbolName && "initializing invalid register kind"); 2345 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2346 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2347 } 2348 2349 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2350 unsigned DwordRegIndex, 2351 unsigned RegWidth) { 2352 // Symbols are only defined for GCN targets 2353 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2354 return true; 2355 2356 auto SymbolName = getGprCountSymbolName(RegKind); 2357 if (!SymbolName) 2358 return true; 2359 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2360 2361 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2362 int64_t OldCount; 2363 2364 if (!Sym->isVariable()) 2365 return !Error(getParser().getTok().getLoc(), 2366 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2367 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2368 return !Error( 2369 getParser().getTok().getLoc(), 2370 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2371 2372 if (OldCount <= NewMax) 2373 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2374 2375 return true; 2376 } 2377 2378 std::unique_ptr<AMDGPUOperand> 2379 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2380 const auto &Tok = Parser.getTok(); 2381 SMLoc StartLoc = Tok.getLoc(); 2382 SMLoc EndLoc = Tok.getEndLoc(); 2383 RegisterKind RegKind; 2384 unsigned Reg, RegNum, RegWidth; 2385 2386 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2387 //FIXME: improve error messages (bug 41303). 2388 Error(StartLoc, "not a valid operand."); 2389 return nullptr; 2390 } 2391 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2392 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2393 return nullptr; 2394 } else 2395 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2396 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2397 } 2398 2399 OperandMatchResultTy 2400 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2401 // TODO: add syntactic sugar for 1/(2*PI) 2402 2403 assert(!isRegister()); 2404 assert(!isModifier()); 2405 2406 const auto& Tok = getToken(); 2407 const auto& NextTok = peekToken(); 2408 bool IsReal = Tok.is(AsmToken::Real); 2409 SMLoc S = getLoc(); 2410 bool Negate = false; 2411 2412 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2413 lex(); 2414 IsReal = true; 2415 Negate = true; 2416 } 2417 2418 if (IsReal) { 2419 // Floating-point expressions are not supported. 2420 // Can only allow floating-point literals with an 2421 // optional sign. 2422 2423 StringRef Num = getTokenStr(); 2424 lex(); 2425 2426 APFloat RealVal(APFloat::IEEEdouble()); 2427 auto roundMode = APFloat::rmNearestTiesToEven; 2428 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2429 return MatchOperand_ParseFail; 2430 } 2431 if (Negate) 2432 RealVal.changeSign(); 2433 2434 Operands.push_back( 2435 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2436 AMDGPUOperand::ImmTyNone, true)); 2437 2438 return MatchOperand_Success; 2439 2440 } else { 2441 int64_t IntVal; 2442 const MCExpr *Expr; 2443 SMLoc S = getLoc(); 2444 2445 if (HasSP3AbsModifier) { 2446 // This is a workaround for handling expressions 2447 // as arguments of SP3 'abs' modifier, for example: 2448 // |1.0| 2449 // |-1| 2450 // |1+x| 2451 // This syntax is not compatible with syntax of standard 2452 // MC expressions (due to the trailing '|'). 2453 SMLoc EndLoc; 2454 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2455 return MatchOperand_ParseFail; 2456 } else { 2457 if (Parser.parseExpression(Expr)) 2458 return MatchOperand_ParseFail; 2459 } 2460 2461 if (Expr->evaluateAsAbsolute(IntVal)) { 2462 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2463 } else { 2464 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2465 } 2466 2467 return MatchOperand_Success; 2468 } 2469 2470 return MatchOperand_NoMatch; 2471 } 2472 2473 OperandMatchResultTy 2474 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2475 if (!isRegister()) 2476 return MatchOperand_NoMatch; 2477 2478 if (auto R = parseRegister()) { 2479 assert(R->isReg()); 2480 Operands.push_back(std::move(R)); 2481 return MatchOperand_Success; 2482 } 2483 return MatchOperand_ParseFail; 2484 } 2485 2486 OperandMatchResultTy 2487 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2488 auto res = parseReg(Operands); 2489 if (res != MatchOperand_NoMatch) { 2490 return res; 2491 } else if (isModifier()) { 2492 return MatchOperand_NoMatch; 2493 } else { 2494 return parseImm(Operands, HasSP3AbsMod); 2495 } 2496 } 2497 2498 bool 2499 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2500 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2501 const auto &str = Token.getString(); 2502 return str == "abs" || str == "neg" || str == "sext"; 2503 } 2504 return false; 2505 } 2506 2507 bool 2508 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2509 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2510 } 2511 2512 bool 2513 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2514 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2515 } 2516 2517 bool 2518 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2519 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2520 } 2521 2522 // Check if this is an operand modifier or an opcode modifier 2523 // which may look like an expression but it is not. We should 2524 // avoid parsing these modifiers as expressions. Currently 2525 // recognized sequences are: 2526 // |...| 2527 // abs(...) 2528 // neg(...) 2529 // sext(...) 2530 // -reg 2531 // -|...| 2532 // -abs(...) 2533 // name:... 2534 // Note that simple opcode modifiers like 'gds' may be parsed as 2535 // expressions; this is a special case. See getExpressionAsToken. 2536 // 2537 bool 2538 AMDGPUAsmParser::isModifier() { 2539 2540 AsmToken Tok = getToken(); 2541 AsmToken NextToken[2]; 2542 peekTokens(NextToken); 2543 2544 return isOperandModifier(Tok, NextToken[0]) || 2545 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2546 isOpcodeModifierWithVal(Tok, NextToken[0]); 2547 } 2548 2549 // Check if the current token is an SP3 'neg' modifier. 2550 // Currently this modifier is allowed in the following context: 2551 // 2552 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2553 // 2. Before an 'abs' modifier: -abs(...) 2554 // 3. Before an SP3 'abs' modifier: -|...| 2555 // 2556 // In all other cases "-" is handled as a part 2557 // of an expression that follows the sign. 2558 // 2559 // Note: When "-" is followed by an integer literal, 2560 // this is interpreted as integer negation rather 2561 // than a floating-point NEG modifier applied to N. 2562 // Beside being contr-intuitive, such use of floating-point 2563 // NEG modifier would have resulted in different meaning 2564 // of integer literals used with VOP1/2/C and VOP3, 2565 // for example: 2566 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2567 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2568 // Negative fp literals with preceding "-" are 2569 // handled likewise for unifomtity 2570 // 2571 bool 2572 AMDGPUAsmParser::parseSP3NegModifier() { 2573 2574 AsmToken NextToken[2]; 2575 peekTokens(NextToken); 2576 2577 if (isToken(AsmToken::Minus) && 2578 (isRegister(NextToken[0], NextToken[1]) || 2579 NextToken[0].is(AsmToken::Pipe) || 2580 isId(NextToken[0], "abs"))) { 2581 lex(); 2582 return true; 2583 } 2584 2585 return false; 2586 } 2587 2588 OperandMatchResultTy 2589 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2590 bool AllowImm) { 2591 bool Neg, SP3Neg; 2592 bool Abs, SP3Abs; 2593 SMLoc Loc; 2594 2595 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2596 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2597 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2598 return MatchOperand_ParseFail; 2599 } 2600 2601 SP3Neg = parseSP3NegModifier(); 2602 2603 Loc = getLoc(); 2604 Neg = trySkipId("neg"); 2605 if (Neg && SP3Neg) { 2606 Error(Loc, "expected register or immediate"); 2607 return MatchOperand_ParseFail; 2608 } 2609 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2610 return MatchOperand_ParseFail; 2611 2612 Abs = trySkipId("abs"); 2613 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2614 return MatchOperand_ParseFail; 2615 2616 Loc = getLoc(); 2617 SP3Abs = trySkipToken(AsmToken::Pipe); 2618 if (Abs && SP3Abs) { 2619 Error(Loc, "expected register or immediate"); 2620 return MatchOperand_ParseFail; 2621 } 2622 2623 OperandMatchResultTy Res; 2624 if (AllowImm) { 2625 Res = parseRegOrImm(Operands, SP3Abs); 2626 } else { 2627 Res = parseReg(Operands); 2628 } 2629 if (Res != MatchOperand_Success) { 2630 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2631 } 2632 2633 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2634 return MatchOperand_ParseFail; 2635 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2636 return MatchOperand_ParseFail; 2637 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2638 return MatchOperand_ParseFail; 2639 2640 AMDGPUOperand::Modifiers Mods; 2641 Mods.Abs = Abs || SP3Abs; 2642 Mods.Neg = Neg || SP3Neg; 2643 2644 if (Mods.hasFPModifiers()) { 2645 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2646 if (Op.isExpr()) { 2647 Error(Op.getStartLoc(), "expected an absolute expression"); 2648 return MatchOperand_ParseFail; 2649 } 2650 Op.setModifiers(Mods); 2651 } 2652 return MatchOperand_Success; 2653 } 2654 2655 OperandMatchResultTy 2656 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2657 bool AllowImm) { 2658 bool Sext = trySkipId("sext"); 2659 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2660 return MatchOperand_ParseFail; 2661 2662 OperandMatchResultTy Res; 2663 if (AllowImm) { 2664 Res = parseRegOrImm(Operands); 2665 } else { 2666 Res = parseReg(Operands); 2667 } 2668 if (Res != MatchOperand_Success) { 2669 return Sext? MatchOperand_ParseFail : Res; 2670 } 2671 2672 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2673 return MatchOperand_ParseFail; 2674 2675 AMDGPUOperand::Modifiers Mods; 2676 Mods.Sext = Sext; 2677 2678 if (Mods.hasIntModifiers()) { 2679 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2680 if (Op.isExpr()) { 2681 Error(Op.getStartLoc(), "expected an absolute expression"); 2682 return MatchOperand_ParseFail; 2683 } 2684 Op.setModifiers(Mods); 2685 } 2686 2687 return MatchOperand_Success; 2688 } 2689 2690 OperandMatchResultTy 2691 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2692 return parseRegOrImmWithFPInputMods(Operands, false); 2693 } 2694 2695 OperandMatchResultTy 2696 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2697 return parseRegOrImmWithIntInputMods(Operands, false); 2698 } 2699 2700 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2701 auto Loc = getLoc(); 2702 if (trySkipId("off")) { 2703 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2704 AMDGPUOperand::ImmTyOff, false)); 2705 return MatchOperand_Success; 2706 } 2707 2708 if (!isRegister()) 2709 return MatchOperand_NoMatch; 2710 2711 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2712 if (Reg) { 2713 Operands.push_back(std::move(Reg)); 2714 return MatchOperand_Success; 2715 } 2716 2717 return MatchOperand_ParseFail; 2718 2719 } 2720 2721 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2722 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2723 2724 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2725 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2726 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2727 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2728 return Match_InvalidOperand; 2729 2730 if ((TSFlags & SIInstrFlags::VOP3) && 2731 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2732 getForcedEncodingSize() != 64) 2733 return Match_PreferE32; 2734 2735 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2736 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2737 // v_mac_f32/16 allow only dst_sel == DWORD; 2738 auto OpNum = 2739 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2740 const auto &Op = Inst.getOperand(OpNum); 2741 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2742 return Match_InvalidOperand; 2743 } 2744 } 2745 2746 return Match_Success; 2747 } 2748 2749 // What asm variants we should check 2750 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2751 if (getForcedEncodingSize() == 32) { 2752 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2753 return makeArrayRef(Variants); 2754 } 2755 2756 if (isForcedVOP3()) { 2757 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2758 return makeArrayRef(Variants); 2759 } 2760 2761 if (isForcedSDWA()) { 2762 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2763 AMDGPUAsmVariants::SDWA9}; 2764 return makeArrayRef(Variants); 2765 } 2766 2767 if (isForcedDPP()) { 2768 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2769 return makeArrayRef(Variants); 2770 } 2771 2772 static const unsigned Variants[] = { 2773 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2774 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2775 }; 2776 2777 return makeArrayRef(Variants); 2778 } 2779 2780 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2781 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2782 const unsigned Num = Desc.getNumImplicitUses(); 2783 for (unsigned i = 0; i < Num; ++i) { 2784 unsigned Reg = Desc.ImplicitUses[i]; 2785 switch (Reg) { 2786 case AMDGPU::FLAT_SCR: 2787 case AMDGPU::VCC: 2788 case AMDGPU::VCC_LO: 2789 case AMDGPU::VCC_HI: 2790 case AMDGPU::M0: 2791 return Reg; 2792 default: 2793 break; 2794 } 2795 } 2796 return AMDGPU::NoRegister; 2797 } 2798 2799 // NB: This code is correct only when used to check constant 2800 // bus limitations because GFX7 support no f16 inline constants. 2801 // Note that there are no cases when a GFX7 opcode violates 2802 // constant bus limitations due to the use of an f16 constant. 2803 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2804 unsigned OpIdx) const { 2805 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2806 2807 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2808 return false; 2809 } 2810 2811 const MCOperand &MO = Inst.getOperand(OpIdx); 2812 2813 int64_t Val = MO.getImm(); 2814 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2815 2816 switch (OpSize) { // expected operand size 2817 case 8: 2818 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2819 case 4: 2820 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2821 case 2: { 2822 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2823 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2824 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2825 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2826 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2827 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2828 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2829 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2830 } else { 2831 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2832 } 2833 } 2834 default: 2835 llvm_unreachable("invalid operand size"); 2836 } 2837 } 2838 2839 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2840 if (!isGFX10()) 2841 return 1; 2842 2843 switch (Opcode) { 2844 // 64-bit shift instructions can use only one scalar value input 2845 case AMDGPU::V_LSHLREV_B64: 2846 case AMDGPU::V_LSHLREV_B64_gfx10: 2847 case AMDGPU::V_LSHL_B64: 2848 case AMDGPU::V_LSHRREV_B64: 2849 case AMDGPU::V_LSHRREV_B64_gfx10: 2850 case AMDGPU::V_LSHR_B64: 2851 case AMDGPU::V_ASHRREV_I64: 2852 case AMDGPU::V_ASHRREV_I64_gfx10: 2853 case AMDGPU::V_ASHR_I64: 2854 return 1; 2855 default: 2856 return 2; 2857 } 2858 } 2859 2860 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2861 const MCOperand &MO = Inst.getOperand(OpIdx); 2862 if (MO.isImm()) { 2863 return !isInlineConstant(Inst, OpIdx); 2864 } else if (MO.isReg()) { 2865 auto Reg = MO.getReg(); 2866 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2867 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2868 } else { 2869 return true; 2870 } 2871 } 2872 2873 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2874 const unsigned Opcode = Inst.getOpcode(); 2875 const MCInstrDesc &Desc = MII.get(Opcode); 2876 unsigned ConstantBusUseCount = 0; 2877 unsigned NumLiterals = 0; 2878 unsigned LiteralSize; 2879 2880 if (Desc.TSFlags & 2881 (SIInstrFlags::VOPC | 2882 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2883 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2884 SIInstrFlags::SDWA)) { 2885 // Check special imm operands (used by madmk, etc) 2886 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2887 ++ConstantBusUseCount; 2888 } 2889 2890 SmallDenseSet<unsigned> SGPRsUsed; 2891 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2892 if (SGPRUsed != AMDGPU::NoRegister) { 2893 SGPRsUsed.insert(SGPRUsed); 2894 ++ConstantBusUseCount; 2895 } 2896 2897 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2898 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2899 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2900 2901 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2902 2903 for (int OpIdx : OpIndices) { 2904 if (OpIdx == -1) break; 2905 2906 const MCOperand &MO = Inst.getOperand(OpIdx); 2907 if (usesConstantBus(Inst, OpIdx)) { 2908 if (MO.isReg()) { 2909 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2910 // Pairs of registers with a partial intersections like these 2911 // s0, s[0:1] 2912 // flat_scratch_lo, flat_scratch 2913 // flat_scratch_lo, flat_scratch_hi 2914 // are theoretically valid but they are disabled anyway. 2915 // Note that this code mimics SIInstrInfo::verifyInstruction 2916 if (!SGPRsUsed.count(Reg)) { 2917 SGPRsUsed.insert(Reg); 2918 ++ConstantBusUseCount; 2919 } 2920 } else { // Expression or a literal 2921 2922 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2923 continue; // special operand like VINTERP attr_chan 2924 2925 // An instruction may use only one literal. 2926 // This has been validated on the previous step. 2927 // See validateVOP3Literal. 2928 // This literal may be used as more than one operand. 2929 // If all these operands are of the same size, 2930 // this literal counts as one scalar value. 2931 // Otherwise it counts as 2 scalar values. 2932 // See "GFX10 Shader Programming", section 3.6.2.3. 2933 2934 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2935 if (Size < 4) Size = 4; 2936 2937 if (NumLiterals == 0) { 2938 NumLiterals = 1; 2939 LiteralSize = Size; 2940 } else if (LiteralSize != Size) { 2941 NumLiterals = 2; 2942 } 2943 } 2944 } 2945 } 2946 } 2947 ConstantBusUseCount += NumLiterals; 2948 2949 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2950 } 2951 2952 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2953 const unsigned Opcode = Inst.getOpcode(); 2954 const MCInstrDesc &Desc = MII.get(Opcode); 2955 2956 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2957 if (DstIdx == -1 || 2958 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2959 return true; 2960 } 2961 2962 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2963 2964 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2965 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2966 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2967 2968 assert(DstIdx != -1); 2969 const MCOperand &Dst = Inst.getOperand(DstIdx); 2970 assert(Dst.isReg()); 2971 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2972 2973 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2974 2975 for (int SrcIdx : SrcIndices) { 2976 if (SrcIdx == -1) break; 2977 const MCOperand &Src = Inst.getOperand(SrcIdx); 2978 if (Src.isReg()) { 2979 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2980 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2981 return false; 2982 } 2983 } 2984 } 2985 2986 return true; 2987 } 2988 2989 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2990 2991 const unsigned Opc = Inst.getOpcode(); 2992 const MCInstrDesc &Desc = MII.get(Opc); 2993 2994 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2995 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2996 assert(ClampIdx != -1); 2997 return Inst.getOperand(ClampIdx).getImm() == 0; 2998 } 2999 3000 return true; 3001 } 3002 3003 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3004 3005 const unsigned Opc = Inst.getOpcode(); 3006 const MCInstrDesc &Desc = MII.get(Opc); 3007 3008 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3009 return true; 3010 3011 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3012 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3013 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3014 3015 assert(VDataIdx != -1); 3016 assert(DMaskIdx != -1); 3017 assert(TFEIdx != -1); 3018 3019 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3020 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3021 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3022 if (DMask == 0) 3023 DMask = 1; 3024 3025 unsigned DataSize = 3026 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3027 if (hasPackedD16()) { 3028 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3029 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3030 DataSize = (DataSize + 1) / 2; 3031 } 3032 3033 return (VDataSize / 4) == DataSize + TFESize; 3034 } 3035 3036 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3037 const unsigned Opc = Inst.getOpcode(); 3038 const MCInstrDesc &Desc = MII.get(Opc); 3039 3040 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3041 return true; 3042 3043 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3044 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3045 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3046 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3047 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3048 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3049 3050 assert(VAddr0Idx != -1); 3051 assert(SrsrcIdx != -1); 3052 assert(DimIdx != -1); 3053 assert(SrsrcIdx > VAddr0Idx); 3054 3055 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3056 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3057 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3058 unsigned VAddrSize = 3059 IsNSA ? SrsrcIdx - VAddr0Idx 3060 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3061 3062 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3063 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3064 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3065 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3066 if (!IsNSA) { 3067 if (AddrSize > 8) 3068 AddrSize = 16; 3069 else if (AddrSize > 4) 3070 AddrSize = 8; 3071 } 3072 3073 return VAddrSize == AddrSize; 3074 } 3075 3076 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3077 3078 const unsigned Opc = Inst.getOpcode(); 3079 const MCInstrDesc &Desc = MII.get(Opc); 3080 3081 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3082 return true; 3083 if (!Desc.mayLoad() || !Desc.mayStore()) 3084 return true; // Not atomic 3085 3086 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3087 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3088 3089 // This is an incomplete check because image_atomic_cmpswap 3090 // may only use 0x3 and 0xf while other atomic operations 3091 // may use 0x1 and 0x3. However these limitations are 3092 // verified when we check that dmask matches dst size. 3093 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3094 } 3095 3096 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3097 3098 const unsigned Opc = Inst.getOpcode(); 3099 const MCInstrDesc &Desc = MII.get(Opc); 3100 3101 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3102 return true; 3103 3104 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3105 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3106 3107 // GATHER4 instructions use dmask in a different fashion compared to 3108 // other MIMG instructions. The only useful DMASK values are 3109 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3110 // (red,red,red,red) etc.) The ISA document doesn't mention 3111 // this. 3112 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3113 } 3114 3115 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3116 { 3117 switch (Opcode) { 3118 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3119 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3120 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3121 return true; 3122 default: 3123 return false; 3124 } 3125 } 3126 3127 // movrels* opcodes should only allow VGPRS as src0. 3128 // This is specified in .td description for vop1/vop3, 3129 // but sdwa is handled differently. See isSDWAOperand. 3130 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3131 3132 const unsigned Opc = Inst.getOpcode(); 3133 const MCInstrDesc &Desc = MII.get(Opc); 3134 3135 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3136 return true; 3137 3138 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3139 assert(Src0Idx != -1); 3140 3141 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3142 if (!Src0.isReg()) 3143 return false; 3144 3145 auto Reg = Src0.getReg(); 3146 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3147 return !isSGPR(mc2PseudoReg(Reg), TRI); 3148 } 3149 3150 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3151 3152 const unsigned Opc = Inst.getOpcode(); 3153 const MCInstrDesc &Desc = MII.get(Opc); 3154 3155 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3156 return true; 3157 3158 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3159 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3160 if (isCI() || isSI()) 3161 return false; 3162 } 3163 3164 return true; 3165 } 3166 3167 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3168 const unsigned Opc = Inst.getOpcode(); 3169 const MCInstrDesc &Desc = MII.get(Opc); 3170 3171 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3172 return true; 3173 3174 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3175 if (DimIdx < 0) 3176 return true; 3177 3178 long Imm = Inst.getOperand(DimIdx).getImm(); 3179 if (Imm < 0 || Imm >= 8) 3180 return false; 3181 3182 return true; 3183 } 3184 3185 static bool IsRevOpcode(const unsigned Opcode) 3186 { 3187 switch (Opcode) { 3188 case AMDGPU::V_SUBREV_F32_e32: 3189 case AMDGPU::V_SUBREV_F32_e64: 3190 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3191 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3192 case AMDGPU::V_SUBREV_F32_e32_vi: 3193 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3194 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3195 case AMDGPU::V_SUBREV_F32_e64_vi: 3196 3197 case AMDGPU::V_SUBREV_I32_e32: 3198 case AMDGPU::V_SUBREV_I32_e64: 3199 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3200 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3201 3202 case AMDGPU::V_SUBBREV_U32_e32: 3203 case AMDGPU::V_SUBBREV_U32_e64: 3204 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3205 case AMDGPU::V_SUBBREV_U32_e32_vi: 3206 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3207 case AMDGPU::V_SUBBREV_U32_e64_vi: 3208 3209 case AMDGPU::V_SUBREV_U32_e32: 3210 case AMDGPU::V_SUBREV_U32_e64: 3211 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3212 case AMDGPU::V_SUBREV_U32_e32_vi: 3213 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3214 case AMDGPU::V_SUBREV_U32_e64_vi: 3215 3216 case AMDGPU::V_SUBREV_F16_e32: 3217 case AMDGPU::V_SUBREV_F16_e64: 3218 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3219 case AMDGPU::V_SUBREV_F16_e32_vi: 3220 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3221 case AMDGPU::V_SUBREV_F16_e64_vi: 3222 3223 case AMDGPU::V_SUBREV_U16_e32: 3224 case AMDGPU::V_SUBREV_U16_e64: 3225 case AMDGPU::V_SUBREV_U16_e32_vi: 3226 case AMDGPU::V_SUBREV_U16_e64_vi: 3227 3228 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3229 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3230 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3231 3232 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3233 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3234 3235 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3236 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3237 3238 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3239 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3240 3241 case AMDGPU::V_LSHRREV_B32_e32: 3242 case AMDGPU::V_LSHRREV_B32_e64: 3243 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3244 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3245 case AMDGPU::V_LSHRREV_B32_e32_vi: 3246 case AMDGPU::V_LSHRREV_B32_e64_vi: 3247 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3248 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3249 3250 case AMDGPU::V_ASHRREV_I32_e32: 3251 case AMDGPU::V_ASHRREV_I32_e64: 3252 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3253 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3254 case AMDGPU::V_ASHRREV_I32_e32_vi: 3255 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3256 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3257 case AMDGPU::V_ASHRREV_I32_e64_vi: 3258 3259 case AMDGPU::V_LSHLREV_B32_e32: 3260 case AMDGPU::V_LSHLREV_B32_e64: 3261 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3262 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3263 case AMDGPU::V_LSHLREV_B32_e32_vi: 3264 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3265 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3266 case AMDGPU::V_LSHLREV_B32_e64_vi: 3267 3268 case AMDGPU::V_LSHLREV_B16_e32: 3269 case AMDGPU::V_LSHLREV_B16_e64: 3270 case AMDGPU::V_LSHLREV_B16_e32_vi: 3271 case AMDGPU::V_LSHLREV_B16_e64_vi: 3272 case AMDGPU::V_LSHLREV_B16_gfx10: 3273 3274 case AMDGPU::V_LSHRREV_B16_e32: 3275 case AMDGPU::V_LSHRREV_B16_e64: 3276 case AMDGPU::V_LSHRREV_B16_e32_vi: 3277 case AMDGPU::V_LSHRREV_B16_e64_vi: 3278 case AMDGPU::V_LSHRREV_B16_gfx10: 3279 3280 case AMDGPU::V_ASHRREV_I16_e32: 3281 case AMDGPU::V_ASHRREV_I16_e64: 3282 case AMDGPU::V_ASHRREV_I16_e32_vi: 3283 case AMDGPU::V_ASHRREV_I16_e64_vi: 3284 case AMDGPU::V_ASHRREV_I16_gfx10: 3285 3286 case AMDGPU::V_LSHLREV_B64: 3287 case AMDGPU::V_LSHLREV_B64_gfx10: 3288 case AMDGPU::V_LSHLREV_B64_vi: 3289 3290 case AMDGPU::V_LSHRREV_B64: 3291 case AMDGPU::V_LSHRREV_B64_gfx10: 3292 case AMDGPU::V_LSHRREV_B64_vi: 3293 3294 case AMDGPU::V_ASHRREV_I64: 3295 case AMDGPU::V_ASHRREV_I64_gfx10: 3296 case AMDGPU::V_ASHRREV_I64_vi: 3297 3298 case AMDGPU::V_PK_LSHLREV_B16: 3299 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3300 case AMDGPU::V_PK_LSHLREV_B16_vi: 3301 3302 case AMDGPU::V_PK_LSHRREV_B16: 3303 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3304 case AMDGPU::V_PK_LSHRREV_B16_vi: 3305 case AMDGPU::V_PK_ASHRREV_I16: 3306 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3307 case AMDGPU::V_PK_ASHRREV_I16_vi: 3308 return true; 3309 default: 3310 return false; 3311 } 3312 } 3313 3314 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3315 3316 using namespace SIInstrFlags; 3317 const unsigned Opcode = Inst.getOpcode(); 3318 const MCInstrDesc &Desc = MII.get(Opcode); 3319 3320 // lds_direct register is defined so that it can be used 3321 // with 9-bit operands only. Ignore encodings which do not accept these. 3322 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3323 return true; 3324 3325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3326 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3327 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3328 3329 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3330 3331 // lds_direct cannot be specified as either src1 or src2. 3332 for (int SrcIdx : SrcIndices) { 3333 if (SrcIdx == -1) break; 3334 const MCOperand &Src = Inst.getOperand(SrcIdx); 3335 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3336 return false; 3337 } 3338 } 3339 3340 if (Src0Idx == -1) 3341 return true; 3342 3343 const MCOperand &Src = Inst.getOperand(Src0Idx); 3344 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3345 return true; 3346 3347 // lds_direct is specified as src0. Check additional limitations. 3348 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3349 } 3350 3351 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3352 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3353 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3354 if (Op.isFlatOffset()) 3355 return Op.getStartLoc(); 3356 } 3357 return getLoc(); 3358 } 3359 3360 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3361 const OperandVector &Operands) { 3362 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3363 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3364 return true; 3365 3366 auto Opcode = Inst.getOpcode(); 3367 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3368 assert(OpNum != -1); 3369 3370 const auto &Op = Inst.getOperand(OpNum); 3371 if (!hasFlatOffsets() && Op.getImm() != 0) { 3372 Error(getFlatOffsetLoc(Operands), 3373 "flat offset modifier is not supported on this GPU"); 3374 return false; 3375 } 3376 3377 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3378 // For FLAT segment the offset must be positive; 3379 // MSB is ignored and forced to zero. 3380 unsigned OffsetSize = isGFX9() ? 13 : 12; 3381 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3382 if (!isIntN(OffsetSize, Op.getImm())) { 3383 Error(getFlatOffsetLoc(Operands), 3384 isGFX9() ? "expected a 13-bit signed offset" : 3385 "expected a 12-bit signed offset"); 3386 return false; 3387 } 3388 } else { 3389 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3390 Error(getFlatOffsetLoc(Operands), 3391 isGFX9() ? "expected a 12-bit unsigned offset" : 3392 "expected an 11-bit unsigned offset"); 3393 return false; 3394 } 3395 } 3396 3397 return true; 3398 } 3399 3400 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3401 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3402 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3403 if (Op.isSMEMOffset()) 3404 return Op.getStartLoc(); 3405 } 3406 return getLoc(); 3407 } 3408 3409 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3410 const OperandVector &Operands) { 3411 if (isCI() || isSI()) 3412 return true; 3413 3414 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3415 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3416 return true; 3417 3418 auto Opcode = Inst.getOpcode(); 3419 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3420 if (OpNum == -1) 3421 return true; 3422 3423 const auto &Op = Inst.getOperand(OpNum); 3424 if (!Op.isImm()) 3425 return true; 3426 3427 uint64_t Offset = Op.getImm(); 3428 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3429 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3430 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3431 return true; 3432 3433 Error(getSMEMOffsetLoc(Operands), 3434 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3435 "expected a 21-bit signed offset"); 3436 3437 return false; 3438 } 3439 3440 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3441 unsigned Opcode = Inst.getOpcode(); 3442 const MCInstrDesc &Desc = MII.get(Opcode); 3443 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3444 return true; 3445 3446 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3447 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3448 3449 const int OpIndices[] = { Src0Idx, Src1Idx }; 3450 3451 unsigned NumExprs = 0; 3452 unsigned NumLiterals = 0; 3453 uint32_t LiteralValue; 3454 3455 for (int OpIdx : OpIndices) { 3456 if (OpIdx == -1) break; 3457 3458 const MCOperand &MO = Inst.getOperand(OpIdx); 3459 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3460 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3461 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3462 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3463 if (NumLiterals == 0 || LiteralValue != Value) { 3464 LiteralValue = Value; 3465 ++NumLiterals; 3466 } 3467 } else if (MO.isExpr()) { 3468 ++NumExprs; 3469 } 3470 } 3471 } 3472 3473 return NumLiterals + NumExprs <= 1; 3474 } 3475 3476 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3477 const unsigned Opc = Inst.getOpcode(); 3478 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3479 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3480 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3481 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3482 3483 if (OpSel & ~3) 3484 return false; 3485 } 3486 return true; 3487 } 3488 3489 // Check if VCC register matches wavefront size 3490 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3491 auto FB = getFeatureBits(); 3492 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3493 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3494 } 3495 3496 // VOP3 literal is only allowed in GFX10+ and only one can be used 3497 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3498 unsigned Opcode = Inst.getOpcode(); 3499 const MCInstrDesc &Desc = MII.get(Opcode); 3500 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3501 return true; 3502 3503 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3504 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3505 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3506 3507 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3508 3509 unsigned NumExprs = 0; 3510 unsigned NumLiterals = 0; 3511 uint32_t LiteralValue; 3512 3513 for (int OpIdx : OpIndices) { 3514 if (OpIdx == -1) break; 3515 3516 const MCOperand &MO = Inst.getOperand(OpIdx); 3517 if (!MO.isImm() && !MO.isExpr()) 3518 continue; 3519 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3520 continue; 3521 3522 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3523 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3524 return false; 3525 3526 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3527 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3528 if (NumLiterals == 0 || LiteralValue != Value) { 3529 LiteralValue = Value; 3530 ++NumLiterals; 3531 } 3532 } else if (MO.isExpr()) { 3533 ++NumExprs; 3534 } 3535 } 3536 NumLiterals += NumExprs; 3537 3538 return !NumLiterals || 3539 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3540 } 3541 3542 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3543 const SMLoc &IDLoc, 3544 const OperandVector &Operands) { 3545 if (!validateLdsDirect(Inst)) { 3546 Error(IDLoc, 3547 "invalid use of lds_direct"); 3548 return false; 3549 } 3550 if (!validateSOPLiteral(Inst)) { 3551 Error(IDLoc, 3552 "only one literal operand is allowed"); 3553 return false; 3554 } 3555 if (!validateVOP3Literal(Inst)) { 3556 Error(IDLoc, 3557 "invalid literal operand"); 3558 return false; 3559 } 3560 if (!validateConstantBusLimitations(Inst)) { 3561 Error(IDLoc, 3562 "invalid operand (violates constant bus restrictions)"); 3563 return false; 3564 } 3565 if (!validateEarlyClobberLimitations(Inst)) { 3566 Error(IDLoc, 3567 "destination must be different than all sources"); 3568 return false; 3569 } 3570 if (!validateIntClampSupported(Inst)) { 3571 Error(IDLoc, 3572 "integer clamping is not supported on this GPU"); 3573 return false; 3574 } 3575 if (!validateOpSel(Inst)) { 3576 Error(IDLoc, 3577 "invalid op_sel operand"); 3578 return false; 3579 } 3580 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3581 if (!validateMIMGD16(Inst)) { 3582 Error(IDLoc, 3583 "d16 modifier is not supported on this GPU"); 3584 return false; 3585 } 3586 if (!validateMIMGDim(Inst)) { 3587 Error(IDLoc, "dim modifier is required on this GPU"); 3588 return false; 3589 } 3590 if (!validateMIMGDataSize(Inst)) { 3591 Error(IDLoc, 3592 "image data size does not match dmask and tfe"); 3593 return false; 3594 } 3595 if (!validateMIMGAddrSize(Inst)) { 3596 Error(IDLoc, 3597 "image address size does not match dim and a16"); 3598 return false; 3599 } 3600 if (!validateMIMGAtomicDMask(Inst)) { 3601 Error(IDLoc, 3602 "invalid atomic image dmask"); 3603 return false; 3604 } 3605 if (!validateMIMGGatherDMask(Inst)) { 3606 Error(IDLoc, 3607 "invalid image_gather dmask: only one bit must be set"); 3608 return false; 3609 } 3610 if (!validateMovrels(Inst)) { 3611 Error(IDLoc, "source operand must be a VGPR"); 3612 return false; 3613 } 3614 if (!validateFlatOffset(Inst, Operands)) { 3615 return false; 3616 } 3617 if (!validateSMEMOffset(Inst, Operands)) { 3618 return false; 3619 } 3620 3621 return true; 3622 } 3623 3624 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3625 const FeatureBitset &FBS, 3626 unsigned VariantID = 0); 3627 3628 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3629 OperandVector &Operands, 3630 MCStreamer &Out, 3631 uint64_t &ErrorInfo, 3632 bool MatchingInlineAsm) { 3633 MCInst Inst; 3634 unsigned Result = Match_Success; 3635 for (auto Variant : getMatchedVariants()) { 3636 uint64_t EI; 3637 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3638 Variant); 3639 // We order match statuses from least to most specific. We use most specific 3640 // status as resulting 3641 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3642 if ((R == Match_Success) || 3643 (R == Match_PreferE32) || 3644 (R == Match_MissingFeature && Result != Match_PreferE32) || 3645 (R == Match_InvalidOperand && Result != Match_MissingFeature 3646 && Result != Match_PreferE32) || 3647 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3648 && Result != Match_MissingFeature 3649 && Result != Match_PreferE32)) { 3650 Result = R; 3651 ErrorInfo = EI; 3652 } 3653 if (R == Match_Success) 3654 break; 3655 } 3656 3657 switch (Result) { 3658 default: break; 3659 case Match_Success: 3660 if (!validateInstruction(Inst, IDLoc, Operands)) { 3661 return true; 3662 } 3663 Inst.setLoc(IDLoc); 3664 Out.emitInstruction(Inst, getSTI()); 3665 return false; 3666 3667 case Match_MissingFeature: 3668 return Error(IDLoc, "instruction not supported on this GPU"); 3669 3670 case Match_MnemonicFail: { 3671 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3672 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3673 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3674 return Error(IDLoc, "invalid instruction" + Suggestion, 3675 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3676 } 3677 3678 case Match_InvalidOperand: { 3679 SMLoc ErrorLoc = IDLoc; 3680 if (ErrorInfo != ~0ULL) { 3681 if (ErrorInfo >= Operands.size()) { 3682 return Error(IDLoc, "too few operands for instruction"); 3683 } 3684 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3685 if (ErrorLoc == SMLoc()) 3686 ErrorLoc = IDLoc; 3687 } 3688 return Error(ErrorLoc, "invalid operand for instruction"); 3689 } 3690 3691 case Match_PreferE32: 3692 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3693 "should be encoded as e32"); 3694 } 3695 llvm_unreachable("Implement any new match types added!"); 3696 } 3697 3698 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3699 int64_t Tmp = -1; 3700 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3701 return true; 3702 } 3703 if (getParser().parseAbsoluteExpression(Tmp)) { 3704 return true; 3705 } 3706 Ret = static_cast<uint32_t>(Tmp); 3707 return false; 3708 } 3709 3710 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3711 uint32_t &Minor) { 3712 if (ParseAsAbsoluteExpression(Major)) 3713 return TokError("invalid major version"); 3714 3715 if (getLexer().isNot(AsmToken::Comma)) 3716 return TokError("minor version number required, comma expected"); 3717 Lex(); 3718 3719 if (ParseAsAbsoluteExpression(Minor)) 3720 return TokError("invalid minor version"); 3721 3722 return false; 3723 } 3724 3725 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3726 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3727 return TokError("directive only supported for amdgcn architecture"); 3728 3729 std::string Target; 3730 3731 SMLoc TargetStart = getTok().getLoc(); 3732 if (getParser().parseEscapedString(Target)) 3733 return true; 3734 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3735 3736 std::string ExpectedTarget; 3737 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3738 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3739 3740 if (Target != ExpectedTargetOS.str()) 3741 return getParser().Error(TargetRange.Start, "target must match options", 3742 TargetRange); 3743 3744 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3745 return false; 3746 } 3747 3748 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3749 return getParser().Error(Range.Start, "value out of range", Range); 3750 } 3751 3752 bool AMDGPUAsmParser::calculateGPRBlocks( 3753 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3754 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3755 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3756 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3757 // TODO(scott.linder): These calculations are duplicated from 3758 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3759 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3760 3761 unsigned NumVGPRs = NextFreeVGPR; 3762 unsigned NumSGPRs = NextFreeSGPR; 3763 3764 if (Version.Major >= 10) 3765 NumSGPRs = 0; 3766 else { 3767 unsigned MaxAddressableNumSGPRs = 3768 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3769 3770 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3771 NumSGPRs > MaxAddressableNumSGPRs) 3772 return OutOfRangeError(SGPRRange); 3773 3774 NumSGPRs += 3775 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3776 3777 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3778 NumSGPRs > MaxAddressableNumSGPRs) 3779 return OutOfRangeError(SGPRRange); 3780 3781 if (Features.test(FeatureSGPRInitBug)) 3782 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3783 } 3784 3785 VGPRBlocks = 3786 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3787 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3788 3789 return false; 3790 } 3791 3792 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3793 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3794 return TokError("directive only supported for amdgcn architecture"); 3795 3796 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3797 return TokError("directive only supported for amdhsa OS"); 3798 3799 StringRef KernelName; 3800 if (getParser().parseIdentifier(KernelName)) 3801 return true; 3802 3803 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3804 3805 StringSet<> Seen; 3806 3807 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3808 3809 SMRange VGPRRange; 3810 uint64_t NextFreeVGPR = 0; 3811 SMRange SGPRRange; 3812 uint64_t NextFreeSGPR = 0; 3813 unsigned UserSGPRCount = 0; 3814 bool ReserveVCC = true; 3815 bool ReserveFlatScr = true; 3816 bool ReserveXNACK = hasXNACK(); 3817 Optional<bool> EnableWavefrontSize32; 3818 3819 while (true) { 3820 while (getLexer().is(AsmToken::EndOfStatement)) 3821 Lex(); 3822 3823 if (getLexer().isNot(AsmToken::Identifier)) 3824 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3825 3826 StringRef ID = getTok().getIdentifier(); 3827 SMRange IDRange = getTok().getLocRange(); 3828 Lex(); 3829 3830 if (ID == ".end_amdhsa_kernel") 3831 break; 3832 3833 if (Seen.find(ID) != Seen.end()) 3834 return TokError(".amdhsa_ directives cannot be repeated"); 3835 Seen.insert(ID); 3836 3837 SMLoc ValStart = getTok().getLoc(); 3838 int64_t IVal; 3839 if (getParser().parseAbsoluteExpression(IVal)) 3840 return true; 3841 SMLoc ValEnd = getTok().getLoc(); 3842 SMRange ValRange = SMRange(ValStart, ValEnd); 3843 3844 if (IVal < 0) 3845 return OutOfRangeError(ValRange); 3846 3847 uint64_t Val = IVal; 3848 3849 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3850 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3851 return OutOfRangeError(RANGE); \ 3852 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3853 3854 if (ID == ".amdhsa_group_segment_fixed_size") { 3855 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3856 return OutOfRangeError(ValRange); 3857 KD.group_segment_fixed_size = Val; 3858 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3859 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3860 return OutOfRangeError(ValRange); 3861 KD.private_segment_fixed_size = Val; 3862 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3863 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3864 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3865 Val, ValRange); 3866 if (Val) 3867 UserSGPRCount += 4; 3868 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3869 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3870 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3871 ValRange); 3872 if (Val) 3873 UserSGPRCount += 2; 3874 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3875 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3876 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3877 ValRange); 3878 if (Val) 3879 UserSGPRCount += 2; 3880 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3881 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3882 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3883 Val, ValRange); 3884 if (Val) 3885 UserSGPRCount += 2; 3886 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3887 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3888 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3889 ValRange); 3890 if (Val) 3891 UserSGPRCount += 2; 3892 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3893 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3894 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3895 ValRange); 3896 if (Val) 3897 UserSGPRCount += 2; 3898 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3899 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3900 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3901 Val, ValRange); 3902 if (Val) 3903 UserSGPRCount += 1; 3904 } else if (ID == ".amdhsa_wavefront_size32") { 3905 if (IVersion.Major < 10) 3906 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3907 IDRange); 3908 EnableWavefrontSize32 = Val; 3909 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3910 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3911 Val, ValRange); 3912 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3913 PARSE_BITS_ENTRY( 3914 KD.compute_pgm_rsrc2, 3915 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3916 ValRange); 3917 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3918 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3919 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3920 ValRange); 3921 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3922 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3923 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3924 ValRange); 3925 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3926 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3927 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3928 ValRange); 3929 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3930 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3931 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3932 ValRange); 3933 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3934 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3935 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3936 ValRange); 3937 } else if (ID == ".amdhsa_next_free_vgpr") { 3938 VGPRRange = ValRange; 3939 NextFreeVGPR = Val; 3940 } else if (ID == ".amdhsa_next_free_sgpr") { 3941 SGPRRange = ValRange; 3942 NextFreeSGPR = Val; 3943 } else if (ID == ".amdhsa_reserve_vcc") { 3944 if (!isUInt<1>(Val)) 3945 return OutOfRangeError(ValRange); 3946 ReserveVCC = Val; 3947 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3948 if (IVersion.Major < 7) 3949 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3950 IDRange); 3951 if (!isUInt<1>(Val)) 3952 return OutOfRangeError(ValRange); 3953 ReserveFlatScr = Val; 3954 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3955 if (IVersion.Major < 8) 3956 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3957 IDRange); 3958 if (!isUInt<1>(Val)) 3959 return OutOfRangeError(ValRange); 3960 ReserveXNACK = Val; 3961 } else if (ID == ".amdhsa_float_round_mode_32") { 3962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3963 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3964 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3965 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3966 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3967 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3968 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3969 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3970 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3971 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3972 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3973 ValRange); 3974 } else if (ID == ".amdhsa_dx10_clamp") { 3975 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3976 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3977 } else if (ID == ".amdhsa_ieee_mode") { 3978 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3979 Val, ValRange); 3980 } else if (ID == ".amdhsa_fp16_overflow") { 3981 if (IVersion.Major < 9) 3982 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3983 IDRange); 3984 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3985 ValRange); 3986 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3987 if (IVersion.Major < 10) 3988 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3989 IDRange); 3990 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3991 ValRange); 3992 } else if (ID == ".amdhsa_memory_ordered") { 3993 if (IVersion.Major < 10) 3994 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3995 IDRange); 3996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3997 ValRange); 3998 } else if (ID == ".amdhsa_forward_progress") { 3999 if (IVersion.Major < 10) 4000 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4001 IDRange); 4002 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4003 ValRange); 4004 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4005 PARSE_BITS_ENTRY( 4006 KD.compute_pgm_rsrc2, 4007 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4008 ValRange); 4009 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4011 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4012 Val, ValRange); 4013 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4014 PARSE_BITS_ENTRY( 4015 KD.compute_pgm_rsrc2, 4016 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4017 ValRange); 4018 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4020 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4021 Val, ValRange); 4022 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4024 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4025 Val, ValRange); 4026 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4028 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4029 Val, ValRange); 4030 } else if (ID == ".amdhsa_exception_int_div_zero") { 4031 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4032 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4033 Val, ValRange); 4034 } else { 4035 return getParser().Error(IDRange.Start, 4036 "unknown .amdhsa_kernel directive", IDRange); 4037 } 4038 4039 #undef PARSE_BITS_ENTRY 4040 } 4041 4042 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4043 return TokError(".amdhsa_next_free_vgpr directive is required"); 4044 4045 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4046 return TokError(".amdhsa_next_free_sgpr directive is required"); 4047 4048 unsigned VGPRBlocks; 4049 unsigned SGPRBlocks; 4050 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4051 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4052 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4053 SGPRBlocks)) 4054 return true; 4055 4056 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4057 VGPRBlocks)) 4058 return OutOfRangeError(VGPRRange); 4059 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4060 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4061 4062 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4063 SGPRBlocks)) 4064 return OutOfRangeError(SGPRRange); 4065 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4066 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4067 SGPRBlocks); 4068 4069 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4070 return TokError("too many user SGPRs enabled"); 4071 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4072 UserSGPRCount); 4073 4074 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4075 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4076 ReserveFlatScr, ReserveXNACK); 4077 return false; 4078 } 4079 4080 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4081 uint32_t Major; 4082 uint32_t Minor; 4083 4084 if (ParseDirectiveMajorMinor(Major, Minor)) 4085 return true; 4086 4087 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4088 return false; 4089 } 4090 4091 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4092 uint32_t Major; 4093 uint32_t Minor; 4094 uint32_t Stepping; 4095 StringRef VendorName; 4096 StringRef ArchName; 4097 4098 // If this directive has no arguments, then use the ISA version for the 4099 // targeted GPU. 4100 if (getLexer().is(AsmToken::EndOfStatement)) { 4101 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4102 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4103 ISA.Stepping, 4104 "AMD", "AMDGPU"); 4105 return false; 4106 } 4107 4108 if (ParseDirectiveMajorMinor(Major, Minor)) 4109 return true; 4110 4111 if (getLexer().isNot(AsmToken::Comma)) 4112 return TokError("stepping version number required, comma expected"); 4113 Lex(); 4114 4115 if (ParseAsAbsoluteExpression(Stepping)) 4116 return TokError("invalid stepping version"); 4117 4118 if (getLexer().isNot(AsmToken::Comma)) 4119 return TokError("vendor name required, comma expected"); 4120 Lex(); 4121 4122 if (getLexer().isNot(AsmToken::String)) 4123 return TokError("invalid vendor name"); 4124 4125 VendorName = getLexer().getTok().getStringContents(); 4126 Lex(); 4127 4128 if (getLexer().isNot(AsmToken::Comma)) 4129 return TokError("arch name required, comma expected"); 4130 Lex(); 4131 4132 if (getLexer().isNot(AsmToken::String)) 4133 return TokError("invalid arch name"); 4134 4135 ArchName = getLexer().getTok().getStringContents(); 4136 Lex(); 4137 4138 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4139 VendorName, ArchName); 4140 return false; 4141 } 4142 4143 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4144 amd_kernel_code_t &Header) { 4145 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4146 // assembly for backwards compatibility. 4147 if (ID == "max_scratch_backing_memory_byte_size") { 4148 Parser.eatToEndOfStatement(); 4149 return false; 4150 } 4151 4152 SmallString<40> ErrStr; 4153 raw_svector_ostream Err(ErrStr); 4154 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4155 return TokError(Err.str()); 4156 } 4157 Lex(); 4158 4159 if (ID == "enable_wavefront_size32") { 4160 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4161 if (!isGFX10()) 4162 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4163 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4164 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4165 } else { 4166 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4167 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4168 } 4169 } 4170 4171 if (ID == "wavefront_size") { 4172 if (Header.wavefront_size == 5) { 4173 if (!isGFX10()) 4174 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4175 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4176 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4177 } else if (Header.wavefront_size == 6) { 4178 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4179 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4180 } 4181 } 4182 4183 if (ID == "enable_wgp_mode") { 4184 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4185 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4186 } 4187 4188 if (ID == "enable_mem_ordered") { 4189 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4190 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4191 } 4192 4193 if (ID == "enable_fwd_progress") { 4194 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4195 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4196 } 4197 4198 return false; 4199 } 4200 4201 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4202 amd_kernel_code_t Header; 4203 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4204 4205 while (true) { 4206 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4207 // will set the current token to EndOfStatement. 4208 while(getLexer().is(AsmToken::EndOfStatement)) 4209 Lex(); 4210 4211 if (getLexer().isNot(AsmToken::Identifier)) 4212 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4213 4214 StringRef ID = getLexer().getTok().getIdentifier(); 4215 Lex(); 4216 4217 if (ID == ".end_amd_kernel_code_t") 4218 break; 4219 4220 if (ParseAMDKernelCodeTValue(ID, Header)) 4221 return true; 4222 } 4223 4224 getTargetStreamer().EmitAMDKernelCodeT(Header); 4225 4226 return false; 4227 } 4228 4229 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4230 if (getLexer().isNot(AsmToken::Identifier)) 4231 return TokError("expected symbol name"); 4232 4233 StringRef KernelName = Parser.getTok().getString(); 4234 4235 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4236 ELF::STT_AMDGPU_HSA_KERNEL); 4237 Lex(); 4238 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4239 KernelScope.initialize(getContext()); 4240 return false; 4241 } 4242 4243 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4244 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4245 return Error(getParser().getTok().getLoc(), 4246 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4247 "architectures"); 4248 } 4249 4250 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4251 4252 std::string ISAVersionStringFromSTI; 4253 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4254 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4255 4256 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4257 return Error(getParser().getTok().getLoc(), 4258 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4259 "arguments specified through the command line"); 4260 } 4261 4262 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4263 Lex(); 4264 4265 return false; 4266 } 4267 4268 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4269 const char *AssemblerDirectiveBegin; 4270 const char *AssemblerDirectiveEnd; 4271 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4272 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4273 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4274 HSAMD::V3::AssemblerDirectiveEnd) 4275 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4276 HSAMD::AssemblerDirectiveEnd); 4277 4278 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4279 return Error(getParser().getTok().getLoc(), 4280 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4281 "not available on non-amdhsa OSes")).str()); 4282 } 4283 4284 std::string HSAMetadataString; 4285 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4286 HSAMetadataString)) 4287 return true; 4288 4289 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4290 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4291 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4292 } else { 4293 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4294 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4295 } 4296 4297 return false; 4298 } 4299 4300 /// Common code to parse out a block of text (typically YAML) between start and 4301 /// end directives. 4302 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4303 const char *AssemblerDirectiveEnd, 4304 std::string &CollectString) { 4305 4306 raw_string_ostream CollectStream(CollectString); 4307 4308 getLexer().setSkipSpace(false); 4309 4310 bool FoundEnd = false; 4311 while (!getLexer().is(AsmToken::Eof)) { 4312 while (getLexer().is(AsmToken::Space)) { 4313 CollectStream << getLexer().getTok().getString(); 4314 Lex(); 4315 } 4316 4317 if (getLexer().is(AsmToken::Identifier)) { 4318 StringRef ID = getLexer().getTok().getIdentifier(); 4319 if (ID == AssemblerDirectiveEnd) { 4320 Lex(); 4321 FoundEnd = true; 4322 break; 4323 } 4324 } 4325 4326 CollectStream << Parser.parseStringToEndOfStatement() 4327 << getContext().getAsmInfo()->getSeparatorString(); 4328 4329 Parser.eatToEndOfStatement(); 4330 } 4331 4332 getLexer().setSkipSpace(true); 4333 4334 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4335 return TokError(Twine("expected directive ") + 4336 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4337 } 4338 4339 CollectStream.flush(); 4340 return false; 4341 } 4342 4343 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4344 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4345 std::string String; 4346 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4347 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4348 return true; 4349 4350 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4351 if (!PALMetadata->setFromString(String)) 4352 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4353 return false; 4354 } 4355 4356 /// Parse the assembler directive for old linear-format PAL metadata. 4357 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4358 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4359 return Error(getParser().getTok().getLoc(), 4360 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4361 "not available on non-amdpal OSes")).str()); 4362 } 4363 4364 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4365 PALMetadata->setLegacy(); 4366 for (;;) { 4367 uint32_t Key, Value; 4368 if (ParseAsAbsoluteExpression(Key)) { 4369 return TokError(Twine("invalid value in ") + 4370 Twine(PALMD::AssemblerDirective)); 4371 } 4372 if (getLexer().isNot(AsmToken::Comma)) { 4373 return TokError(Twine("expected an even number of values in ") + 4374 Twine(PALMD::AssemblerDirective)); 4375 } 4376 Lex(); 4377 if (ParseAsAbsoluteExpression(Value)) { 4378 return TokError(Twine("invalid value in ") + 4379 Twine(PALMD::AssemblerDirective)); 4380 } 4381 PALMetadata->setRegister(Key, Value); 4382 if (getLexer().isNot(AsmToken::Comma)) 4383 break; 4384 Lex(); 4385 } 4386 return false; 4387 } 4388 4389 /// ParseDirectiveAMDGPULDS 4390 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4391 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4392 if (getParser().checkForValidSection()) 4393 return true; 4394 4395 StringRef Name; 4396 SMLoc NameLoc = getLexer().getLoc(); 4397 if (getParser().parseIdentifier(Name)) 4398 return TokError("expected identifier in directive"); 4399 4400 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4401 if (parseToken(AsmToken::Comma, "expected ','")) 4402 return true; 4403 4404 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4405 4406 int64_t Size; 4407 SMLoc SizeLoc = getLexer().getLoc(); 4408 if (getParser().parseAbsoluteExpression(Size)) 4409 return true; 4410 if (Size < 0) 4411 return Error(SizeLoc, "size must be non-negative"); 4412 if (Size > LocalMemorySize) 4413 return Error(SizeLoc, "size is too large"); 4414 4415 int64_t Align = 4; 4416 if (getLexer().is(AsmToken::Comma)) { 4417 Lex(); 4418 SMLoc AlignLoc = getLexer().getLoc(); 4419 if (getParser().parseAbsoluteExpression(Align)) 4420 return true; 4421 if (Align < 0 || !isPowerOf2_64(Align)) 4422 return Error(AlignLoc, "alignment must be a power of two"); 4423 4424 // Alignment larger than the size of LDS is possible in theory, as long 4425 // as the linker manages to place to symbol at address 0, but we do want 4426 // to make sure the alignment fits nicely into a 32-bit integer. 4427 if (Align >= 1u << 31) 4428 return Error(AlignLoc, "alignment is too large"); 4429 } 4430 4431 if (parseToken(AsmToken::EndOfStatement, 4432 "unexpected token in '.amdgpu_lds' directive")) 4433 return true; 4434 4435 Symbol->redefineIfPossible(); 4436 if (!Symbol->isUndefined()) 4437 return Error(NameLoc, "invalid symbol redefinition"); 4438 4439 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4440 return false; 4441 } 4442 4443 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4444 StringRef IDVal = DirectiveID.getString(); 4445 4446 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4447 if (IDVal == ".amdgcn_target") 4448 return ParseDirectiveAMDGCNTarget(); 4449 4450 if (IDVal == ".amdhsa_kernel") 4451 return ParseDirectiveAMDHSAKernel(); 4452 4453 // TODO: Restructure/combine with PAL metadata directive. 4454 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4455 return ParseDirectiveHSAMetadata(); 4456 } else { 4457 if (IDVal == ".hsa_code_object_version") 4458 return ParseDirectiveHSACodeObjectVersion(); 4459 4460 if (IDVal == ".hsa_code_object_isa") 4461 return ParseDirectiveHSACodeObjectISA(); 4462 4463 if (IDVal == ".amd_kernel_code_t") 4464 return ParseDirectiveAMDKernelCodeT(); 4465 4466 if (IDVal == ".amdgpu_hsa_kernel") 4467 return ParseDirectiveAMDGPUHsaKernel(); 4468 4469 if (IDVal == ".amd_amdgpu_isa") 4470 return ParseDirectiveISAVersion(); 4471 4472 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4473 return ParseDirectiveHSAMetadata(); 4474 } 4475 4476 if (IDVal == ".amdgpu_lds") 4477 return ParseDirectiveAMDGPULDS(); 4478 4479 if (IDVal == PALMD::AssemblerDirectiveBegin) 4480 return ParseDirectivePALMetadataBegin(); 4481 4482 if (IDVal == PALMD::AssemblerDirective) 4483 return ParseDirectivePALMetadata(); 4484 4485 return true; 4486 } 4487 4488 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4489 unsigned RegNo) const { 4490 4491 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4492 R.isValid(); ++R) { 4493 if (*R == RegNo) 4494 return isGFX9() || isGFX10(); 4495 } 4496 4497 // GFX10 has 2 more SGPRs 104 and 105. 4498 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4499 R.isValid(); ++R) { 4500 if (*R == RegNo) 4501 return hasSGPR104_SGPR105(); 4502 } 4503 4504 switch (RegNo) { 4505 case AMDGPU::SRC_SHARED_BASE: 4506 case AMDGPU::SRC_SHARED_LIMIT: 4507 case AMDGPU::SRC_PRIVATE_BASE: 4508 case AMDGPU::SRC_PRIVATE_LIMIT: 4509 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4510 return !isCI() && !isSI() && !isVI(); 4511 case AMDGPU::TBA: 4512 case AMDGPU::TBA_LO: 4513 case AMDGPU::TBA_HI: 4514 case AMDGPU::TMA: 4515 case AMDGPU::TMA_LO: 4516 case AMDGPU::TMA_HI: 4517 return !isGFX9() && !isGFX10(); 4518 case AMDGPU::XNACK_MASK: 4519 case AMDGPU::XNACK_MASK_LO: 4520 case AMDGPU::XNACK_MASK_HI: 4521 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4522 case AMDGPU::SGPR_NULL: 4523 return isGFX10(); 4524 default: 4525 break; 4526 } 4527 4528 if (isCI()) 4529 return true; 4530 4531 if (isSI() || isGFX10()) { 4532 // No flat_scr on SI. 4533 // On GFX10 flat scratch is not a valid register operand and can only be 4534 // accessed with s_setreg/s_getreg. 4535 switch (RegNo) { 4536 case AMDGPU::FLAT_SCR: 4537 case AMDGPU::FLAT_SCR_LO: 4538 case AMDGPU::FLAT_SCR_HI: 4539 return false; 4540 default: 4541 return true; 4542 } 4543 } 4544 4545 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4546 // SI/CI have. 4547 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4548 R.isValid(); ++R) { 4549 if (*R == RegNo) 4550 return hasSGPR102_SGPR103(); 4551 } 4552 4553 return true; 4554 } 4555 4556 OperandMatchResultTy 4557 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4558 OperandMode Mode) { 4559 // Try to parse with a custom parser 4560 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4561 4562 // If we successfully parsed the operand or if there as an error parsing, 4563 // we are done. 4564 // 4565 // If we are parsing after we reach EndOfStatement then this means we 4566 // are appending default values to the Operands list. This is only done 4567 // by custom parser, so we shouldn't continue on to the generic parsing. 4568 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4569 getLexer().is(AsmToken::EndOfStatement)) 4570 return ResTy; 4571 4572 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4573 unsigned Prefix = Operands.size(); 4574 SMLoc LBraceLoc = getTok().getLoc(); 4575 Parser.Lex(); // eat the '[' 4576 4577 for (;;) { 4578 ResTy = parseReg(Operands); 4579 if (ResTy != MatchOperand_Success) 4580 return ResTy; 4581 4582 if (getLexer().is(AsmToken::RBrac)) 4583 break; 4584 4585 if (getLexer().isNot(AsmToken::Comma)) 4586 return MatchOperand_ParseFail; 4587 Parser.Lex(); 4588 } 4589 4590 if (Operands.size() - Prefix > 1) { 4591 Operands.insert(Operands.begin() + Prefix, 4592 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4593 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4594 getTok().getLoc())); 4595 } 4596 4597 Parser.Lex(); // eat the ']' 4598 return MatchOperand_Success; 4599 } 4600 4601 return parseRegOrImm(Operands); 4602 } 4603 4604 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4605 // Clear any forced encodings from the previous instruction. 4606 setForcedEncodingSize(0); 4607 setForcedDPP(false); 4608 setForcedSDWA(false); 4609 4610 if (Name.endswith("_e64")) { 4611 setForcedEncodingSize(64); 4612 return Name.substr(0, Name.size() - 4); 4613 } else if (Name.endswith("_e32")) { 4614 setForcedEncodingSize(32); 4615 return Name.substr(0, Name.size() - 4); 4616 } else if (Name.endswith("_dpp")) { 4617 setForcedDPP(true); 4618 return Name.substr(0, Name.size() - 4); 4619 } else if (Name.endswith("_sdwa")) { 4620 setForcedSDWA(true); 4621 return Name.substr(0, Name.size() - 5); 4622 } 4623 return Name; 4624 } 4625 4626 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4627 StringRef Name, 4628 SMLoc NameLoc, OperandVector &Operands) { 4629 // Add the instruction mnemonic 4630 Name = parseMnemonicSuffix(Name); 4631 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4632 4633 bool IsMIMG = Name.startswith("image_"); 4634 4635 while (!getLexer().is(AsmToken::EndOfStatement)) { 4636 OperandMode Mode = OperandMode_Default; 4637 if (IsMIMG && isGFX10() && Operands.size() == 2) 4638 Mode = OperandMode_NSA; 4639 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4640 4641 // Eat the comma or space if there is one. 4642 if (getLexer().is(AsmToken::Comma)) 4643 Parser.Lex(); 4644 4645 switch (Res) { 4646 case MatchOperand_Success: break; 4647 case MatchOperand_ParseFail: 4648 // FIXME: use real operand location rather than the current location. 4649 Error(getLexer().getLoc(), "failed parsing operand."); 4650 while (!getLexer().is(AsmToken::EndOfStatement)) { 4651 Parser.Lex(); 4652 } 4653 return true; 4654 case MatchOperand_NoMatch: 4655 // FIXME: use real operand location rather than the current location. 4656 Error(getLexer().getLoc(), "not a valid operand."); 4657 while (!getLexer().is(AsmToken::EndOfStatement)) { 4658 Parser.Lex(); 4659 } 4660 return true; 4661 } 4662 } 4663 4664 return false; 4665 } 4666 4667 //===----------------------------------------------------------------------===// 4668 // Utility functions 4669 //===----------------------------------------------------------------------===// 4670 4671 OperandMatchResultTy 4672 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4673 4674 if (!trySkipId(Prefix, AsmToken::Colon)) 4675 return MatchOperand_NoMatch; 4676 4677 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4678 } 4679 4680 OperandMatchResultTy 4681 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4682 AMDGPUOperand::ImmTy ImmTy, 4683 bool (*ConvertResult)(int64_t&)) { 4684 SMLoc S = getLoc(); 4685 int64_t Value = 0; 4686 4687 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4688 if (Res != MatchOperand_Success) 4689 return Res; 4690 4691 if (ConvertResult && !ConvertResult(Value)) { 4692 Error(S, "invalid " + StringRef(Prefix) + " value."); 4693 } 4694 4695 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4696 return MatchOperand_Success; 4697 } 4698 4699 OperandMatchResultTy 4700 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4701 OperandVector &Operands, 4702 AMDGPUOperand::ImmTy ImmTy, 4703 bool (*ConvertResult)(int64_t&)) { 4704 SMLoc S = getLoc(); 4705 if (!trySkipId(Prefix, AsmToken::Colon)) 4706 return MatchOperand_NoMatch; 4707 4708 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4709 return MatchOperand_ParseFail; 4710 4711 unsigned Val = 0; 4712 const unsigned MaxSize = 4; 4713 4714 // FIXME: How to verify the number of elements matches the number of src 4715 // operands? 4716 for (int I = 0; ; ++I) { 4717 int64_t Op; 4718 SMLoc Loc = getLoc(); 4719 if (!parseExpr(Op)) 4720 return MatchOperand_ParseFail; 4721 4722 if (Op != 0 && Op != 1) { 4723 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4724 return MatchOperand_ParseFail; 4725 } 4726 4727 Val |= (Op << I); 4728 4729 if (trySkipToken(AsmToken::RBrac)) 4730 break; 4731 4732 if (I + 1 == MaxSize) { 4733 Error(getLoc(), "expected a closing square bracket"); 4734 return MatchOperand_ParseFail; 4735 } 4736 4737 if (!skipToken(AsmToken::Comma, "expected a comma")) 4738 return MatchOperand_ParseFail; 4739 } 4740 4741 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4742 return MatchOperand_Success; 4743 } 4744 4745 OperandMatchResultTy 4746 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4747 AMDGPUOperand::ImmTy ImmTy) { 4748 int64_t Bit = 0; 4749 SMLoc S = Parser.getTok().getLoc(); 4750 4751 // We are at the end of the statement, and this is a default argument, so 4752 // use a default value. 4753 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4754 switch(getLexer().getKind()) { 4755 case AsmToken::Identifier: { 4756 StringRef Tok = Parser.getTok().getString(); 4757 if (Tok == Name) { 4758 if (Tok == "r128" && !hasMIMG_R128()) 4759 Error(S, "r128 modifier is not supported on this GPU"); 4760 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4761 Error(S, "a16 modifier is not supported on this GPU"); 4762 Bit = 1; 4763 Parser.Lex(); 4764 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4765 Bit = 0; 4766 Parser.Lex(); 4767 } else { 4768 return MatchOperand_NoMatch; 4769 } 4770 break; 4771 } 4772 default: 4773 return MatchOperand_NoMatch; 4774 } 4775 } 4776 4777 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4778 return MatchOperand_ParseFail; 4779 4780 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4781 ImmTy = AMDGPUOperand::ImmTyR128A16; 4782 4783 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4784 return MatchOperand_Success; 4785 } 4786 4787 static void addOptionalImmOperand( 4788 MCInst& Inst, const OperandVector& Operands, 4789 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4790 AMDGPUOperand::ImmTy ImmT, 4791 int64_t Default = 0) { 4792 auto i = OptionalIdx.find(ImmT); 4793 if (i != OptionalIdx.end()) { 4794 unsigned Idx = i->second; 4795 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4796 } else { 4797 Inst.addOperand(MCOperand::createImm(Default)); 4798 } 4799 } 4800 4801 OperandMatchResultTy 4802 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4803 if (getLexer().isNot(AsmToken::Identifier)) { 4804 return MatchOperand_NoMatch; 4805 } 4806 StringRef Tok = Parser.getTok().getString(); 4807 if (Tok != Prefix) { 4808 return MatchOperand_NoMatch; 4809 } 4810 4811 Parser.Lex(); 4812 if (getLexer().isNot(AsmToken::Colon)) { 4813 return MatchOperand_ParseFail; 4814 } 4815 4816 Parser.Lex(); 4817 if (getLexer().isNot(AsmToken::Identifier)) { 4818 return MatchOperand_ParseFail; 4819 } 4820 4821 Value = Parser.getTok().getString(); 4822 return MatchOperand_Success; 4823 } 4824 4825 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4826 // values to live in a joint format operand in the MCInst encoding. 4827 OperandMatchResultTy 4828 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4829 SMLoc S = Parser.getTok().getLoc(); 4830 int64_t Dfmt = 0, Nfmt = 0; 4831 // dfmt and nfmt can appear in either order, and each is optional. 4832 bool GotDfmt = false, GotNfmt = false; 4833 while (!GotDfmt || !GotNfmt) { 4834 if (!GotDfmt) { 4835 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4836 if (Res != MatchOperand_NoMatch) { 4837 if (Res != MatchOperand_Success) 4838 return Res; 4839 if (Dfmt >= 16) { 4840 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4841 return MatchOperand_ParseFail; 4842 } 4843 GotDfmt = true; 4844 Parser.Lex(); 4845 continue; 4846 } 4847 } 4848 if (!GotNfmt) { 4849 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4850 if (Res != MatchOperand_NoMatch) { 4851 if (Res != MatchOperand_Success) 4852 return Res; 4853 if (Nfmt >= 8) { 4854 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4855 return MatchOperand_ParseFail; 4856 } 4857 GotNfmt = true; 4858 Parser.Lex(); 4859 continue; 4860 } 4861 } 4862 break; 4863 } 4864 if (!GotDfmt && !GotNfmt) 4865 return MatchOperand_NoMatch; 4866 auto Format = Dfmt | Nfmt << 4; 4867 Operands.push_back( 4868 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4869 return MatchOperand_Success; 4870 } 4871 4872 //===----------------------------------------------------------------------===// 4873 // ds 4874 //===----------------------------------------------------------------------===// 4875 4876 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4877 const OperandVector &Operands) { 4878 OptionalImmIndexMap OptionalIdx; 4879 4880 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4881 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4882 4883 // Add the register arguments 4884 if (Op.isReg()) { 4885 Op.addRegOperands(Inst, 1); 4886 continue; 4887 } 4888 4889 // Handle optional arguments 4890 OptionalIdx[Op.getImmTy()] = i; 4891 } 4892 4893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4895 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4896 4897 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4898 } 4899 4900 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4901 bool IsGdsHardcoded) { 4902 OptionalImmIndexMap OptionalIdx; 4903 4904 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4905 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4906 4907 // Add the register arguments 4908 if (Op.isReg()) { 4909 Op.addRegOperands(Inst, 1); 4910 continue; 4911 } 4912 4913 if (Op.isToken() && Op.getToken() == "gds") { 4914 IsGdsHardcoded = true; 4915 continue; 4916 } 4917 4918 // Handle optional arguments 4919 OptionalIdx[Op.getImmTy()] = i; 4920 } 4921 4922 AMDGPUOperand::ImmTy OffsetType = 4923 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4924 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4925 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4926 AMDGPUOperand::ImmTyOffset; 4927 4928 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4929 4930 if (!IsGdsHardcoded) { 4931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4932 } 4933 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4934 } 4935 4936 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4937 OptionalImmIndexMap OptionalIdx; 4938 4939 unsigned OperandIdx[4]; 4940 unsigned EnMask = 0; 4941 int SrcIdx = 0; 4942 4943 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4944 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4945 4946 // Add the register arguments 4947 if (Op.isReg()) { 4948 assert(SrcIdx < 4); 4949 OperandIdx[SrcIdx] = Inst.size(); 4950 Op.addRegOperands(Inst, 1); 4951 ++SrcIdx; 4952 continue; 4953 } 4954 4955 if (Op.isOff()) { 4956 assert(SrcIdx < 4); 4957 OperandIdx[SrcIdx] = Inst.size(); 4958 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4959 ++SrcIdx; 4960 continue; 4961 } 4962 4963 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4964 Op.addImmOperands(Inst, 1); 4965 continue; 4966 } 4967 4968 if (Op.isToken() && Op.getToken() == "done") 4969 continue; 4970 4971 // Handle optional arguments 4972 OptionalIdx[Op.getImmTy()] = i; 4973 } 4974 4975 assert(SrcIdx == 4); 4976 4977 bool Compr = false; 4978 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4979 Compr = true; 4980 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4981 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4982 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4983 } 4984 4985 for (auto i = 0; i < SrcIdx; ++i) { 4986 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4987 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4988 } 4989 } 4990 4991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4993 4994 Inst.addOperand(MCOperand::createImm(EnMask)); 4995 } 4996 4997 //===----------------------------------------------------------------------===// 4998 // s_waitcnt 4999 //===----------------------------------------------------------------------===// 5000 5001 static bool 5002 encodeCnt( 5003 const AMDGPU::IsaVersion ISA, 5004 int64_t &IntVal, 5005 int64_t CntVal, 5006 bool Saturate, 5007 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5008 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5009 { 5010 bool Failed = false; 5011 5012 IntVal = encode(ISA, IntVal, CntVal); 5013 if (CntVal != decode(ISA, IntVal)) { 5014 if (Saturate) { 5015 IntVal = encode(ISA, IntVal, -1); 5016 } else { 5017 Failed = true; 5018 } 5019 } 5020 return Failed; 5021 } 5022 5023 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5024 5025 SMLoc CntLoc = getLoc(); 5026 StringRef CntName = getTokenStr(); 5027 5028 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5029 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5030 return false; 5031 5032 int64_t CntVal; 5033 SMLoc ValLoc = getLoc(); 5034 if (!parseExpr(CntVal)) 5035 return false; 5036 5037 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5038 5039 bool Failed = true; 5040 bool Sat = CntName.endswith("_sat"); 5041 5042 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5043 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5044 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5045 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5046 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5047 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5048 } else { 5049 Error(CntLoc, "invalid counter name " + CntName); 5050 return false; 5051 } 5052 5053 if (Failed) { 5054 Error(ValLoc, "too large value for " + CntName); 5055 return false; 5056 } 5057 5058 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5059 return false; 5060 5061 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5062 if (isToken(AsmToken::EndOfStatement)) { 5063 Error(getLoc(), "expected a counter name"); 5064 return false; 5065 } 5066 } 5067 5068 return true; 5069 } 5070 5071 OperandMatchResultTy 5072 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5073 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5074 int64_t Waitcnt = getWaitcntBitMask(ISA); 5075 SMLoc S = getLoc(); 5076 5077 // If parse failed, do not return error code 5078 // to avoid excessive error messages. 5079 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5080 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5081 } else { 5082 parseExpr(Waitcnt); 5083 } 5084 5085 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5086 return MatchOperand_Success; 5087 } 5088 5089 bool 5090 AMDGPUOperand::isSWaitCnt() const { 5091 return isImm(); 5092 } 5093 5094 //===----------------------------------------------------------------------===// 5095 // hwreg 5096 //===----------------------------------------------------------------------===// 5097 5098 bool 5099 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5100 int64_t &Offset, 5101 int64_t &Width) { 5102 using namespace llvm::AMDGPU::Hwreg; 5103 5104 // The register may be specified by name or using a numeric code 5105 if (isToken(AsmToken::Identifier) && 5106 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5107 HwReg.IsSymbolic = true; 5108 lex(); // skip message name 5109 } else if (!parseExpr(HwReg.Id)) { 5110 return false; 5111 } 5112 5113 if (trySkipToken(AsmToken::RParen)) 5114 return true; 5115 5116 // parse optional params 5117 return 5118 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5119 parseExpr(Offset) && 5120 skipToken(AsmToken::Comma, "expected a comma") && 5121 parseExpr(Width) && 5122 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5123 } 5124 5125 bool 5126 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5127 const int64_t Offset, 5128 const int64_t Width, 5129 const SMLoc Loc) { 5130 5131 using namespace llvm::AMDGPU::Hwreg; 5132 5133 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5134 Error(Loc, "specified hardware register is not supported on this GPU"); 5135 return false; 5136 } else if (!isValidHwreg(HwReg.Id)) { 5137 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5138 return false; 5139 } else if (!isValidHwregOffset(Offset)) { 5140 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5141 return false; 5142 } else if (!isValidHwregWidth(Width)) { 5143 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5144 return false; 5145 } 5146 return true; 5147 } 5148 5149 OperandMatchResultTy 5150 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5151 using namespace llvm::AMDGPU::Hwreg; 5152 5153 int64_t ImmVal = 0; 5154 SMLoc Loc = getLoc(); 5155 5156 // If parse failed, do not return error code 5157 // to avoid excessive error messages. 5158 if (trySkipId("hwreg", AsmToken::LParen)) { 5159 OperandInfoTy HwReg(ID_UNKNOWN_); 5160 int64_t Offset = OFFSET_DEFAULT_; 5161 int64_t Width = WIDTH_DEFAULT_; 5162 if (parseHwregBody(HwReg, Offset, Width) && 5163 validateHwreg(HwReg, Offset, Width, Loc)) { 5164 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5165 } 5166 } else if (parseExpr(ImmVal)) { 5167 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5168 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5169 } 5170 5171 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5172 return MatchOperand_Success; 5173 } 5174 5175 bool AMDGPUOperand::isHwreg() const { 5176 return isImmTy(ImmTyHwreg); 5177 } 5178 5179 //===----------------------------------------------------------------------===// 5180 // sendmsg 5181 //===----------------------------------------------------------------------===// 5182 5183 bool 5184 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5185 OperandInfoTy &Op, 5186 OperandInfoTy &Stream) { 5187 using namespace llvm::AMDGPU::SendMsg; 5188 5189 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5190 Msg.IsSymbolic = true; 5191 lex(); // skip message name 5192 } else if (!parseExpr(Msg.Id)) { 5193 return false; 5194 } 5195 5196 if (trySkipToken(AsmToken::Comma)) { 5197 Op.IsDefined = true; 5198 if (isToken(AsmToken::Identifier) && 5199 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5200 lex(); // skip operation name 5201 } else if (!parseExpr(Op.Id)) { 5202 return false; 5203 } 5204 5205 if (trySkipToken(AsmToken::Comma)) { 5206 Stream.IsDefined = true; 5207 if (!parseExpr(Stream.Id)) 5208 return false; 5209 } 5210 } 5211 5212 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5213 } 5214 5215 bool 5216 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5217 const OperandInfoTy &Op, 5218 const OperandInfoTy &Stream, 5219 const SMLoc S) { 5220 using namespace llvm::AMDGPU::SendMsg; 5221 5222 // Validation strictness depends on whether message is specified 5223 // in a symbolc or in a numeric form. In the latter case 5224 // only encoding possibility is checked. 5225 bool Strict = Msg.IsSymbolic; 5226 5227 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5228 Error(S, "invalid message id"); 5229 return false; 5230 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5231 Error(S, Op.IsDefined ? 5232 "message does not support operations" : 5233 "missing message operation"); 5234 return false; 5235 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5236 Error(S, "invalid operation id"); 5237 return false; 5238 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5239 Error(S, "message operation does not support streams"); 5240 return false; 5241 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5242 Error(S, "invalid message stream id"); 5243 return false; 5244 } 5245 return true; 5246 } 5247 5248 OperandMatchResultTy 5249 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5250 using namespace llvm::AMDGPU::SendMsg; 5251 5252 int64_t ImmVal = 0; 5253 SMLoc Loc = getLoc(); 5254 5255 // If parse failed, do not return error code 5256 // to avoid excessive error messages. 5257 if (trySkipId("sendmsg", AsmToken::LParen)) { 5258 OperandInfoTy Msg(ID_UNKNOWN_); 5259 OperandInfoTy Op(OP_NONE_); 5260 OperandInfoTy Stream(STREAM_ID_NONE_); 5261 if (parseSendMsgBody(Msg, Op, Stream) && 5262 validateSendMsg(Msg, Op, Stream, Loc)) { 5263 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5264 } 5265 } else if (parseExpr(ImmVal)) { 5266 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5267 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5268 } 5269 5270 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5271 return MatchOperand_Success; 5272 } 5273 5274 bool AMDGPUOperand::isSendMsg() const { 5275 return isImmTy(ImmTySendMsg); 5276 } 5277 5278 //===----------------------------------------------------------------------===// 5279 // v_interp 5280 //===----------------------------------------------------------------------===// 5281 5282 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5283 if (getLexer().getKind() != AsmToken::Identifier) 5284 return MatchOperand_NoMatch; 5285 5286 StringRef Str = Parser.getTok().getString(); 5287 int Slot = StringSwitch<int>(Str) 5288 .Case("p10", 0) 5289 .Case("p20", 1) 5290 .Case("p0", 2) 5291 .Default(-1); 5292 5293 SMLoc S = Parser.getTok().getLoc(); 5294 if (Slot == -1) 5295 return MatchOperand_ParseFail; 5296 5297 Parser.Lex(); 5298 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5299 AMDGPUOperand::ImmTyInterpSlot)); 5300 return MatchOperand_Success; 5301 } 5302 5303 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5304 if (getLexer().getKind() != AsmToken::Identifier) 5305 return MatchOperand_NoMatch; 5306 5307 StringRef Str = Parser.getTok().getString(); 5308 if (!Str.startswith("attr")) 5309 return MatchOperand_NoMatch; 5310 5311 StringRef Chan = Str.take_back(2); 5312 int AttrChan = StringSwitch<int>(Chan) 5313 .Case(".x", 0) 5314 .Case(".y", 1) 5315 .Case(".z", 2) 5316 .Case(".w", 3) 5317 .Default(-1); 5318 if (AttrChan == -1) 5319 return MatchOperand_ParseFail; 5320 5321 Str = Str.drop_back(2).drop_front(4); 5322 5323 uint8_t Attr; 5324 if (Str.getAsInteger(10, Attr)) 5325 return MatchOperand_ParseFail; 5326 5327 SMLoc S = Parser.getTok().getLoc(); 5328 Parser.Lex(); 5329 if (Attr > 63) { 5330 Error(S, "out of bounds attr"); 5331 return MatchOperand_Success; 5332 } 5333 5334 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5335 5336 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5337 AMDGPUOperand::ImmTyInterpAttr)); 5338 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5339 AMDGPUOperand::ImmTyAttrChan)); 5340 return MatchOperand_Success; 5341 } 5342 5343 //===----------------------------------------------------------------------===// 5344 // exp 5345 //===----------------------------------------------------------------------===// 5346 5347 void AMDGPUAsmParser::errorExpTgt() { 5348 Error(Parser.getTok().getLoc(), "invalid exp target"); 5349 } 5350 5351 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5352 uint8_t &Val) { 5353 if (Str == "null") { 5354 Val = 9; 5355 return MatchOperand_Success; 5356 } 5357 5358 if (Str.startswith("mrt")) { 5359 Str = Str.drop_front(3); 5360 if (Str == "z") { // == mrtz 5361 Val = 8; 5362 return MatchOperand_Success; 5363 } 5364 5365 if (Str.getAsInteger(10, Val)) 5366 return MatchOperand_ParseFail; 5367 5368 if (Val > 7) 5369 errorExpTgt(); 5370 5371 return MatchOperand_Success; 5372 } 5373 5374 if (Str.startswith("pos")) { 5375 Str = Str.drop_front(3); 5376 if (Str.getAsInteger(10, Val)) 5377 return MatchOperand_ParseFail; 5378 5379 if (Val > 4 || (Val == 4 && !isGFX10())) 5380 errorExpTgt(); 5381 5382 Val += 12; 5383 return MatchOperand_Success; 5384 } 5385 5386 if (isGFX10() && Str == "prim") { 5387 Val = 20; 5388 return MatchOperand_Success; 5389 } 5390 5391 if (Str.startswith("param")) { 5392 Str = Str.drop_front(5); 5393 if (Str.getAsInteger(10, Val)) 5394 return MatchOperand_ParseFail; 5395 5396 if (Val >= 32) 5397 errorExpTgt(); 5398 5399 Val += 32; 5400 return MatchOperand_Success; 5401 } 5402 5403 if (Str.startswith("invalid_target_")) { 5404 Str = Str.drop_front(15); 5405 if (Str.getAsInteger(10, Val)) 5406 return MatchOperand_ParseFail; 5407 5408 errorExpTgt(); 5409 return MatchOperand_Success; 5410 } 5411 5412 return MatchOperand_NoMatch; 5413 } 5414 5415 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5416 uint8_t Val; 5417 StringRef Str = Parser.getTok().getString(); 5418 5419 auto Res = parseExpTgtImpl(Str, Val); 5420 if (Res != MatchOperand_Success) 5421 return Res; 5422 5423 SMLoc S = Parser.getTok().getLoc(); 5424 Parser.Lex(); 5425 5426 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5427 AMDGPUOperand::ImmTyExpTgt)); 5428 return MatchOperand_Success; 5429 } 5430 5431 //===----------------------------------------------------------------------===// 5432 // parser helpers 5433 //===----------------------------------------------------------------------===// 5434 5435 bool 5436 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5437 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5438 } 5439 5440 bool 5441 AMDGPUAsmParser::isId(const StringRef Id) const { 5442 return isId(getToken(), Id); 5443 } 5444 5445 bool 5446 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5447 return getTokenKind() == Kind; 5448 } 5449 5450 bool 5451 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5452 if (isId(Id)) { 5453 lex(); 5454 return true; 5455 } 5456 return false; 5457 } 5458 5459 bool 5460 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5461 if (isId(Id) && peekToken().is(Kind)) { 5462 lex(); 5463 lex(); 5464 return true; 5465 } 5466 return false; 5467 } 5468 5469 bool 5470 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5471 if (isToken(Kind)) { 5472 lex(); 5473 return true; 5474 } 5475 return false; 5476 } 5477 5478 bool 5479 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5480 const StringRef ErrMsg) { 5481 if (!trySkipToken(Kind)) { 5482 Error(getLoc(), ErrMsg); 5483 return false; 5484 } 5485 return true; 5486 } 5487 5488 bool 5489 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5490 return !getParser().parseAbsoluteExpression(Imm); 5491 } 5492 5493 bool 5494 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5495 SMLoc S = getLoc(); 5496 5497 const MCExpr *Expr; 5498 if (Parser.parseExpression(Expr)) 5499 return false; 5500 5501 int64_t IntVal; 5502 if (Expr->evaluateAsAbsolute(IntVal)) { 5503 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5504 } else { 5505 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5506 } 5507 return true; 5508 } 5509 5510 bool 5511 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5512 if (isToken(AsmToken::String)) { 5513 Val = getToken().getStringContents(); 5514 lex(); 5515 return true; 5516 } else { 5517 Error(getLoc(), ErrMsg); 5518 return false; 5519 } 5520 } 5521 5522 AsmToken 5523 AMDGPUAsmParser::getToken() const { 5524 return Parser.getTok(); 5525 } 5526 5527 AsmToken 5528 AMDGPUAsmParser::peekToken() { 5529 return getLexer().peekTok(); 5530 } 5531 5532 void 5533 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5534 auto TokCount = getLexer().peekTokens(Tokens); 5535 5536 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5537 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5538 } 5539 5540 AsmToken::TokenKind 5541 AMDGPUAsmParser::getTokenKind() const { 5542 return getLexer().getKind(); 5543 } 5544 5545 SMLoc 5546 AMDGPUAsmParser::getLoc() const { 5547 return getToken().getLoc(); 5548 } 5549 5550 StringRef 5551 AMDGPUAsmParser::getTokenStr() const { 5552 return getToken().getString(); 5553 } 5554 5555 void 5556 AMDGPUAsmParser::lex() { 5557 Parser.Lex(); 5558 } 5559 5560 //===----------------------------------------------------------------------===// 5561 // swizzle 5562 //===----------------------------------------------------------------------===// 5563 5564 LLVM_READNONE 5565 static unsigned 5566 encodeBitmaskPerm(const unsigned AndMask, 5567 const unsigned OrMask, 5568 const unsigned XorMask) { 5569 using namespace llvm::AMDGPU::Swizzle; 5570 5571 return BITMASK_PERM_ENC | 5572 (AndMask << BITMASK_AND_SHIFT) | 5573 (OrMask << BITMASK_OR_SHIFT) | 5574 (XorMask << BITMASK_XOR_SHIFT); 5575 } 5576 5577 bool 5578 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5579 const unsigned MinVal, 5580 const unsigned MaxVal, 5581 const StringRef ErrMsg) { 5582 for (unsigned i = 0; i < OpNum; ++i) { 5583 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5584 return false; 5585 } 5586 SMLoc ExprLoc = Parser.getTok().getLoc(); 5587 if (!parseExpr(Op[i])) { 5588 return false; 5589 } 5590 if (Op[i] < MinVal || Op[i] > MaxVal) { 5591 Error(ExprLoc, ErrMsg); 5592 return false; 5593 } 5594 } 5595 5596 return true; 5597 } 5598 5599 bool 5600 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5601 using namespace llvm::AMDGPU::Swizzle; 5602 5603 int64_t Lane[LANE_NUM]; 5604 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5605 "expected a 2-bit lane id")) { 5606 Imm = QUAD_PERM_ENC; 5607 for (unsigned I = 0; I < LANE_NUM; ++I) { 5608 Imm |= Lane[I] << (LANE_SHIFT * I); 5609 } 5610 return true; 5611 } 5612 return false; 5613 } 5614 5615 bool 5616 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5617 using namespace llvm::AMDGPU::Swizzle; 5618 5619 SMLoc S = Parser.getTok().getLoc(); 5620 int64_t GroupSize; 5621 int64_t LaneIdx; 5622 5623 if (!parseSwizzleOperands(1, &GroupSize, 5624 2, 32, 5625 "group size must be in the interval [2,32]")) { 5626 return false; 5627 } 5628 if (!isPowerOf2_64(GroupSize)) { 5629 Error(S, "group size must be a power of two"); 5630 return false; 5631 } 5632 if (parseSwizzleOperands(1, &LaneIdx, 5633 0, GroupSize - 1, 5634 "lane id must be in the interval [0,group size - 1]")) { 5635 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5636 return true; 5637 } 5638 return false; 5639 } 5640 5641 bool 5642 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5643 using namespace llvm::AMDGPU::Swizzle; 5644 5645 SMLoc S = Parser.getTok().getLoc(); 5646 int64_t GroupSize; 5647 5648 if (!parseSwizzleOperands(1, &GroupSize, 5649 2, 32, "group size must be in the interval [2,32]")) { 5650 return false; 5651 } 5652 if (!isPowerOf2_64(GroupSize)) { 5653 Error(S, "group size must be a power of two"); 5654 return false; 5655 } 5656 5657 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5658 return true; 5659 } 5660 5661 bool 5662 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5663 using namespace llvm::AMDGPU::Swizzle; 5664 5665 SMLoc S = Parser.getTok().getLoc(); 5666 int64_t GroupSize; 5667 5668 if (!parseSwizzleOperands(1, &GroupSize, 5669 1, 16, "group size must be in the interval [1,16]")) { 5670 return false; 5671 } 5672 if (!isPowerOf2_64(GroupSize)) { 5673 Error(S, "group size must be a power of two"); 5674 return false; 5675 } 5676 5677 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5678 return true; 5679 } 5680 5681 bool 5682 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5683 using namespace llvm::AMDGPU::Swizzle; 5684 5685 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5686 return false; 5687 } 5688 5689 StringRef Ctl; 5690 SMLoc StrLoc = Parser.getTok().getLoc(); 5691 if (!parseString(Ctl)) { 5692 return false; 5693 } 5694 if (Ctl.size() != BITMASK_WIDTH) { 5695 Error(StrLoc, "expected a 5-character mask"); 5696 return false; 5697 } 5698 5699 unsigned AndMask = 0; 5700 unsigned OrMask = 0; 5701 unsigned XorMask = 0; 5702 5703 for (size_t i = 0; i < Ctl.size(); ++i) { 5704 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5705 switch(Ctl[i]) { 5706 default: 5707 Error(StrLoc, "invalid mask"); 5708 return false; 5709 case '0': 5710 break; 5711 case '1': 5712 OrMask |= Mask; 5713 break; 5714 case 'p': 5715 AndMask |= Mask; 5716 break; 5717 case 'i': 5718 AndMask |= Mask; 5719 XorMask |= Mask; 5720 break; 5721 } 5722 } 5723 5724 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5725 return true; 5726 } 5727 5728 bool 5729 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5730 5731 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5732 5733 if (!parseExpr(Imm)) { 5734 return false; 5735 } 5736 if (!isUInt<16>(Imm)) { 5737 Error(OffsetLoc, "expected a 16-bit offset"); 5738 return false; 5739 } 5740 return true; 5741 } 5742 5743 bool 5744 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5745 using namespace llvm::AMDGPU::Swizzle; 5746 5747 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5748 5749 SMLoc ModeLoc = Parser.getTok().getLoc(); 5750 bool Ok = false; 5751 5752 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5753 Ok = parseSwizzleQuadPerm(Imm); 5754 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5755 Ok = parseSwizzleBitmaskPerm(Imm); 5756 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5757 Ok = parseSwizzleBroadcast(Imm); 5758 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5759 Ok = parseSwizzleSwap(Imm); 5760 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5761 Ok = parseSwizzleReverse(Imm); 5762 } else { 5763 Error(ModeLoc, "expected a swizzle mode"); 5764 } 5765 5766 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5767 } 5768 5769 return false; 5770 } 5771 5772 OperandMatchResultTy 5773 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5774 SMLoc S = Parser.getTok().getLoc(); 5775 int64_t Imm = 0; 5776 5777 if (trySkipId("offset")) { 5778 5779 bool Ok = false; 5780 if (skipToken(AsmToken::Colon, "expected a colon")) { 5781 if (trySkipId("swizzle")) { 5782 Ok = parseSwizzleMacro(Imm); 5783 } else { 5784 Ok = parseSwizzleOffset(Imm); 5785 } 5786 } 5787 5788 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5789 5790 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5791 } else { 5792 // Swizzle "offset" operand is optional. 5793 // If it is omitted, try parsing other optional operands. 5794 return parseOptionalOpr(Operands); 5795 } 5796 } 5797 5798 bool 5799 AMDGPUOperand::isSwizzle() const { 5800 return isImmTy(ImmTySwizzle); 5801 } 5802 5803 //===----------------------------------------------------------------------===// 5804 // VGPR Index Mode 5805 //===----------------------------------------------------------------------===// 5806 5807 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5808 5809 using namespace llvm::AMDGPU::VGPRIndexMode; 5810 5811 if (trySkipToken(AsmToken::RParen)) { 5812 return OFF; 5813 } 5814 5815 int64_t Imm = 0; 5816 5817 while (true) { 5818 unsigned Mode = 0; 5819 SMLoc S = Parser.getTok().getLoc(); 5820 5821 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5822 if (trySkipId(IdSymbolic[ModeId])) { 5823 Mode = 1 << ModeId; 5824 break; 5825 } 5826 } 5827 5828 if (Mode == 0) { 5829 Error(S, (Imm == 0)? 5830 "expected a VGPR index mode or a closing parenthesis" : 5831 "expected a VGPR index mode"); 5832 break; 5833 } 5834 5835 if (Imm & Mode) { 5836 Error(S, "duplicate VGPR index mode"); 5837 break; 5838 } 5839 Imm |= Mode; 5840 5841 if (trySkipToken(AsmToken::RParen)) 5842 break; 5843 if (!skipToken(AsmToken::Comma, 5844 "expected a comma or a closing parenthesis")) 5845 break; 5846 } 5847 5848 return Imm; 5849 } 5850 5851 OperandMatchResultTy 5852 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5853 5854 int64_t Imm = 0; 5855 SMLoc S = Parser.getTok().getLoc(); 5856 5857 if (getLexer().getKind() == AsmToken::Identifier && 5858 Parser.getTok().getString() == "gpr_idx" && 5859 getLexer().peekTok().is(AsmToken::LParen)) { 5860 5861 Parser.Lex(); 5862 Parser.Lex(); 5863 5864 // If parse failed, trigger an error but do not return error code 5865 // to avoid excessive error messages. 5866 Imm = parseGPRIdxMacro(); 5867 5868 } else { 5869 if (getParser().parseAbsoluteExpression(Imm)) 5870 return MatchOperand_NoMatch; 5871 if (Imm < 0 || !isUInt<4>(Imm)) { 5872 Error(S, "invalid immediate: only 4-bit values are legal"); 5873 } 5874 } 5875 5876 Operands.push_back( 5877 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5878 return MatchOperand_Success; 5879 } 5880 5881 bool AMDGPUOperand::isGPRIdxMode() const { 5882 return isImmTy(ImmTyGprIdxMode); 5883 } 5884 5885 //===----------------------------------------------------------------------===// 5886 // sopp branch targets 5887 //===----------------------------------------------------------------------===// 5888 5889 OperandMatchResultTy 5890 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5891 5892 // Make sure we are not parsing something 5893 // that looks like a label or an expression but is not. 5894 // This will improve error messages. 5895 if (isRegister() || isModifier()) 5896 return MatchOperand_NoMatch; 5897 5898 if (parseExpr(Operands)) { 5899 5900 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5901 assert(Opr.isImm() || Opr.isExpr()); 5902 SMLoc Loc = Opr.getStartLoc(); 5903 5904 // Currently we do not support arbitrary expressions as branch targets. 5905 // Only labels and absolute expressions are accepted. 5906 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5907 Error(Loc, "expected an absolute expression or a label"); 5908 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5909 Error(Loc, "expected a 16-bit signed jump offset"); 5910 } 5911 } 5912 5913 return MatchOperand_Success; // avoid excessive error messages 5914 } 5915 5916 //===----------------------------------------------------------------------===// 5917 // Boolean holding registers 5918 //===----------------------------------------------------------------------===// 5919 5920 OperandMatchResultTy 5921 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5922 return parseReg(Operands); 5923 } 5924 5925 //===----------------------------------------------------------------------===// 5926 // mubuf 5927 //===----------------------------------------------------------------------===// 5928 5929 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5930 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5931 } 5932 5933 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5934 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5935 } 5936 5937 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5938 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5939 } 5940 5941 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5942 const OperandVector &Operands, 5943 bool IsAtomic, 5944 bool IsAtomicReturn, 5945 bool IsLds) { 5946 bool IsLdsOpcode = IsLds; 5947 bool HasLdsModifier = false; 5948 OptionalImmIndexMap OptionalIdx; 5949 assert(IsAtomicReturn ? IsAtomic : true); 5950 unsigned FirstOperandIdx = 1; 5951 5952 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5953 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5954 5955 // Add the register arguments 5956 if (Op.isReg()) { 5957 Op.addRegOperands(Inst, 1); 5958 // Insert a tied src for atomic return dst. 5959 // This cannot be postponed as subsequent calls to 5960 // addImmOperands rely on correct number of MC operands. 5961 if (IsAtomicReturn && i == FirstOperandIdx) 5962 Op.addRegOperands(Inst, 1); 5963 continue; 5964 } 5965 5966 // Handle the case where soffset is an immediate 5967 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5968 Op.addImmOperands(Inst, 1); 5969 continue; 5970 } 5971 5972 HasLdsModifier |= Op.isLDS(); 5973 5974 // Handle tokens like 'offen' which are sometimes hard-coded into the 5975 // asm string. There are no MCInst operands for these. 5976 if (Op.isToken()) { 5977 continue; 5978 } 5979 assert(Op.isImm()); 5980 5981 // Handle optional arguments 5982 OptionalIdx[Op.getImmTy()] = i; 5983 } 5984 5985 // This is a workaround for an llvm quirk which may result in an 5986 // incorrect instruction selection. Lds and non-lds versions of 5987 // MUBUF instructions are identical except that lds versions 5988 // have mandatory 'lds' modifier. However this modifier follows 5989 // optional modifiers and llvm asm matcher regards this 'lds' 5990 // modifier as an optional one. As a result, an lds version 5991 // of opcode may be selected even if it has no 'lds' modifier. 5992 if (IsLdsOpcode && !HasLdsModifier) { 5993 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5994 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5995 Inst.setOpcode(NoLdsOpcode); 5996 IsLdsOpcode = false; 5997 } 5998 } 5999 6000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6001 if (!IsAtomic) { // glc is hard-coded. 6002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6003 } 6004 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6005 6006 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6008 } 6009 6010 if (isGFX10()) 6011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6012 } 6013 6014 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6015 OptionalImmIndexMap OptionalIdx; 6016 6017 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6018 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6019 6020 // Add the register arguments 6021 if (Op.isReg()) { 6022 Op.addRegOperands(Inst, 1); 6023 continue; 6024 } 6025 6026 // Handle the case where soffset is an immediate 6027 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6028 Op.addImmOperands(Inst, 1); 6029 continue; 6030 } 6031 6032 // Handle tokens like 'offen' which are sometimes hard-coded into the 6033 // asm string. There are no MCInst operands for these. 6034 if (Op.isToken()) { 6035 continue; 6036 } 6037 assert(Op.isImm()); 6038 6039 // Handle optional arguments 6040 OptionalIdx[Op.getImmTy()] = i; 6041 } 6042 6043 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6044 AMDGPUOperand::ImmTyOffset); 6045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6049 6050 if (isGFX10()) 6051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6052 } 6053 6054 //===----------------------------------------------------------------------===// 6055 // mimg 6056 //===----------------------------------------------------------------------===// 6057 6058 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6059 bool IsAtomic) { 6060 unsigned I = 1; 6061 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6062 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6063 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6064 } 6065 6066 if (IsAtomic) { 6067 // Add src, same as dst 6068 assert(Desc.getNumDefs() == 1); 6069 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6070 } 6071 6072 OptionalImmIndexMap OptionalIdx; 6073 6074 for (unsigned E = Operands.size(); I != E; ++I) { 6075 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6076 6077 // Add the register arguments 6078 if (Op.isReg()) { 6079 Op.addRegOperands(Inst, 1); 6080 } else if (Op.isImmModifier()) { 6081 OptionalIdx[Op.getImmTy()] = I; 6082 } else if (!Op.isToken()) { 6083 llvm_unreachable("unexpected operand type"); 6084 } 6085 } 6086 6087 bool IsGFX10 = isGFX10(); 6088 6089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6090 if (IsGFX10) 6091 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6093 if (IsGFX10) 6094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6098 if (IsGFX10) 6099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6100 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6101 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6102 if (!IsGFX10) 6103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6104 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6105 } 6106 6107 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6108 cvtMIMG(Inst, Operands, true); 6109 } 6110 6111 //===----------------------------------------------------------------------===// 6112 // smrd 6113 //===----------------------------------------------------------------------===// 6114 6115 bool AMDGPUOperand::isSMRDOffset8() const { 6116 return isImm() && isUInt<8>(getImm()); 6117 } 6118 6119 bool AMDGPUOperand::isSMEMOffset() const { 6120 return isImm(); // Offset range is checked later by validator. 6121 } 6122 6123 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6124 // 32-bit literals are only supported on CI and we only want to use them 6125 // when the offset is > 8-bits. 6126 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6127 } 6128 6129 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6130 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6131 } 6132 6133 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6134 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6135 } 6136 6137 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6138 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6139 } 6140 6141 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6142 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6143 } 6144 6145 //===----------------------------------------------------------------------===// 6146 // vop3 6147 //===----------------------------------------------------------------------===// 6148 6149 static bool ConvertOmodMul(int64_t &Mul) { 6150 if (Mul != 1 && Mul != 2 && Mul != 4) 6151 return false; 6152 6153 Mul >>= 1; 6154 return true; 6155 } 6156 6157 static bool ConvertOmodDiv(int64_t &Div) { 6158 if (Div == 1) { 6159 Div = 0; 6160 return true; 6161 } 6162 6163 if (Div == 2) { 6164 Div = 3; 6165 return true; 6166 } 6167 6168 return false; 6169 } 6170 6171 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6172 if (BoundCtrl == 0) { 6173 BoundCtrl = 1; 6174 return true; 6175 } 6176 6177 if (BoundCtrl == -1) { 6178 BoundCtrl = 0; 6179 return true; 6180 } 6181 6182 return false; 6183 } 6184 6185 // Note: the order in this table matches the order of operands in AsmString. 6186 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6187 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6188 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6189 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6190 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6191 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6192 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6193 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6194 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6195 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6196 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6197 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6198 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6199 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6200 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6201 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6202 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6203 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6204 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6205 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6206 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6207 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6208 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6209 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6210 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6211 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6212 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6213 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6214 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6215 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6216 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6217 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6218 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6219 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6220 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6221 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6222 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6223 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6224 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6225 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6226 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6227 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6228 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6229 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6230 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6231 }; 6232 6233 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6234 6235 OperandMatchResultTy res = parseOptionalOpr(Operands); 6236 6237 // This is a hack to enable hardcoded mandatory operands which follow 6238 // optional operands. 6239 // 6240 // Current design assumes that all operands after the first optional operand 6241 // are also optional. However implementation of some instructions violates 6242 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6243 // 6244 // To alleviate this problem, we have to (implicitly) parse extra operands 6245 // to make sure autogenerated parser of custom operands never hit hardcoded 6246 // mandatory operands. 6247 6248 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6249 if (res != MatchOperand_Success || 6250 isToken(AsmToken::EndOfStatement)) 6251 break; 6252 6253 trySkipToken(AsmToken::Comma); 6254 res = parseOptionalOpr(Operands); 6255 } 6256 6257 return res; 6258 } 6259 6260 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6261 OperandMatchResultTy res; 6262 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6263 // try to parse any optional operand here 6264 if (Op.IsBit) { 6265 res = parseNamedBit(Op.Name, Operands, Op.Type); 6266 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6267 res = parseOModOperand(Operands); 6268 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6269 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6270 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6271 res = parseSDWASel(Operands, Op.Name, Op.Type); 6272 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6273 res = parseSDWADstUnused(Operands); 6274 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6275 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6276 Op.Type == AMDGPUOperand::ImmTyNegLo || 6277 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6278 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6279 Op.ConvertResult); 6280 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6281 res = parseDim(Operands); 6282 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6283 res = parseDfmtNfmt(Operands); 6284 } else { 6285 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6286 } 6287 if (res != MatchOperand_NoMatch) { 6288 return res; 6289 } 6290 } 6291 return MatchOperand_NoMatch; 6292 } 6293 6294 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6295 StringRef Name = Parser.getTok().getString(); 6296 if (Name == "mul") { 6297 return parseIntWithPrefix("mul", Operands, 6298 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6299 } 6300 6301 if (Name == "div") { 6302 return parseIntWithPrefix("div", Operands, 6303 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6304 } 6305 6306 return MatchOperand_NoMatch; 6307 } 6308 6309 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6310 cvtVOP3P(Inst, Operands); 6311 6312 int Opc = Inst.getOpcode(); 6313 6314 int SrcNum; 6315 const int Ops[] = { AMDGPU::OpName::src0, 6316 AMDGPU::OpName::src1, 6317 AMDGPU::OpName::src2 }; 6318 for (SrcNum = 0; 6319 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6320 ++SrcNum); 6321 assert(SrcNum > 0); 6322 6323 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6324 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6325 6326 if ((OpSel & (1 << SrcNum)) != 0) { 6327 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6328 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6329 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6330 } 6331 } 6332 6333 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6334 // 1. This operand is input modifiers 6335 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6336 // 2. This is not last operand 6337 && Desc.NumOperands > (OpNum + 1) 6338 // 3. Next operand is register class 6339 && Desc.OpInfo[OpNum + 1].RegClass != -1 6340 // 4. Next register is not tied to any other operand 6341 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6342 } 6343 6344 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6345 { 6346 OptionalImmIndexMap OptionalIdx; 6347 unsigned Opc = Inst.getOpcode(); 6348 6349 unsigned I = 1; 6350 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6351 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6352 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6353 } 6354 6355 for (unsigned E = Operands.size(); I != E; ++I) { 6356 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6357 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6358 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6359 } else if (Op.isInterpSlot() || 6360 Op.isInterpAttr() || 6361 Op.isAttrChan()) { 6362 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6363 } else if (Op.isImmModifier()) { 6364 OptionalIdx[Op.getImmTy()] = I; 6365 } else { 6366 llvm_unreachable("unhandled operand type"); 6367 } 6368 } 6369 6370 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6371 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6372 } 6373 6374 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6376 } 6377 6378 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6380 } 6381 } 6382 6383 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6384 OptionalImmIndexMap &OptionalIdx) { 6385 unsigned Opc = Inst.getOpcode(); 6386 6387 unsigned I = 1; 6388 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6389 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6390 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6391 } 6392 6393 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6394 // This instruction has src modifiers 6395 for (unsigned E = Operands.size(); I != E; ++I) { 6396 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6397 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6398 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6399 } else if (Op.isImmModifier()) { 6400 OptionalIdx[Op.getImmTy()] = I; 6401 } else if (Op.isRegOrImm()) { 6402 Op.addRegOrImmOperands(Inst, 1); 6403 } else { 6404 llvm_unreachable("unhandled operand type"); 6405 } 6406 } 6407 } else { 6408 // No src modifiers 6409 for (unsigned E = Operands.size(); I != E; ++I) { 6410 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6411 if (Op.isMod()) { 6412 OptionalIdx[Op.getImmTy()] = I; 6413 } else { 6414 Op.addRegOrImmOperands(Inst, 1); 6415 } 6416 } 6417 } 6418 6419 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6421 } 6422 6423 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6425 } 6426 6427 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6428 // it has src2 register operand that is tied to dst operand 6429 // we don't allow modifiers for this operand in assembler so src2_modifiers 6430 // should be 0. 6431 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6432 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6433 Opc == AMDGPU::V_MAC_F32_e64_vi || 6434 Opc == AMDGPU::V_MAC_F16_e64_vi || 6435 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6436 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6437 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6438 auto it = Inst.begin(); 6439 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6440 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6441 ++it; 6442 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6443 } 6444 } 6445 6446 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6447 OptionalImmIndexMap OptionalIdx; 6448 cvtVOP3(Inst, Operands, OptionalIdx); 6449 } 6450 6451 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6452 const OperandVector &Operands) { 6453 OptionalImmIndexMap OptIdx; 6454 const int Opc = Inst.getOpcode(); 6455 const MCInstrDesc &Desc = MII.get(Opc); 6456 6457 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6458 6459 cvtVOP3(Inst, Operands, OptIdx); 6460 6461 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6462 assert(!IsPacked); 6463 Inst.addOperand(Inst.getOperand(0)); 6464 } 6465 6466 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6467 // instruction, and then figure out where to actually put the modifiers 6468 6469 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6470 6471 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6472 if (OpSelHiIdx != -1) { 6473 int DefaultVal = IsPacked ? -1 : 0; 6474 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6475 DefaultVal); 6476 } 6477 6478 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6479 if (NegLoIdx != -1) { 6480 assert(IsPacked); 6481 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6482 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6483 } 6484 6485 const int Ops[] = { AMDGPU::OpName::src0, 6486 AMDGPU::OpName::src1, 6487 AMDGPU::OpName::src2 }; 6488 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6489 AMDGPU::OpName::src1_modifiers, 6490 AMDGPU::OpName::src2_modifiers }; 6491 6492 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6493 6494 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6495 unsigned OpSelHi = 0; 6496 unsigned NegLo = 0; 6497 unsigned NegHi = 0; 6498 6499 if (OpSelHiIdx != -1) { 6500 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6501 } 6502 6503 if (NegLoIdx != -1) { 6504 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6505 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6506 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6507 } 6508 6509 for (int J = 0; J < 3; ++J) { 6510 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6511 if (OpIdx == -1) 6512 break; 6513 6514 uint32_t ModVal = 0; 6515 6516 if ((OpSel & (1 << J)) != 0) 6517 ModVal |= SISrcMods::OP_SEL_0; 6518 6519 if ((OpSelHi & (1 << J)) != 0) 6520 ModVal |= SISrcMods::OP_SEL_1; 6521 6522 if ((NegLo & (1 << J)) != 0) 6523 ModVal |= SISrcMods::NEG; 6524 6525 if ((NegHi & (1 << J)) != 0) 6526 ModVal |= SISrcMods::NEG_HI; 6527 6528 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6529 6530 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6531 } 6532 } 6533 6534 //===----------------------------------------------------------------------===// 6535 // dpp 6536 //===----------------------------------------------------------------------===// 6537 6538 bool AMDGPUOperand::isDPP8() const { 6539 return isImmTy(ImmTyDPP8); 6540 } 6541 6542 bool AMDGPUOperand::isDPPCtrl() const { 6543 using namespace AMDGPU::DPP; 6544 6545 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6546 if (result) { 6547 int64_t Imm = getImm(); 6548 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6549 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6550 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6551 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6552 (Imm == DppCtrl::WAVE_SHL1) || 6553 (Imm == DppCtrl::WAVE_ROL1) || 6554 (Imm == DppCtrl::WAVE_SHR1) || 6555 (Imm == DppCtrl::WAVE_ROR1) || 6556 (Imm == DppCtrl::ROW_MIRROR) || 6557 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6558 (Imm == DppCtrl::BCAST15) || 6559 (Imm == DppCtrl::BCAST31) || 6560 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6561 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6562 } 6563 return false; 6564 } 6565 6566 //===----------------------------------------------------------------------===// 6567 // mAI 6568 //===----------------------------------------------------------------------===// 6569 6570 bool AMDGPUOperand::isBLGP() const { 6571 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6572 } 6573 6574 bool AMDGPUOperand::isCBSZ() const { 6575 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6576 } 6577 6578 bool AMDGPUOperand::isABID() const { 6579 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6580 } 6581 6582 bool AMDGPUOperand::isS16Imm() const { 6583 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6584 } 6585 6586 bool AMDGPUOperand::isU16Imm() const { 6587 return isImm() && isUInt<16>(getImm()); 6588 } 6589 6590 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6591 if (!isGFX10()) 6592 return MatchOperand_NoMatch; 6593 6594 SMLoc S = Parser.getTok().getLoc(); 6595 6596 if (getLexer().isNot(AsmToken::Identifier)) 6597 return MatchOperand_NoMatch; 6598 if (getLexer().getTok().getString() != "dim") 6599 return MatchOperand_NoMatch; 6600 6601 Parser.Lex(); 6602 if (getLexer().isNot(AsmToken::Colon)) 6603 return MatchOperand_ParseFail; 6604 6605 Parser.Lex(); 6606 6607 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6608 // integer. 6609 std::string Token; 6610 if (getLexer().is(AsmToken::Integer)) { 6611 SMLoc Loc = getLexer().getTok().getEndLoc(); 6612 Token = std::string(getLexer().getTok().getString()); 6613 Parser.Lex(); 6614 if (getLexer().getTok().getLoc() != Loc) 6615 return MatchOperand_ParseFail; 6616 } 6617 if (getLexer().isNot(AsmToken::Identifier)) 6618 return MatchOperand_ParseFail; 6619 Token += getLexer().getTok().getString(); 6620 6621 StringRef DimId = Token; 6622 if (DimId.startswith("SQ_RSRC_IMG_")) 6623 DimId = DimId.substr(12); 6624 6625 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6626 if (!DimInfo) 6627 return MatchOperand_ParseFail; 6628 6629 Parser.Lex(); 6630 6631 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6632 AMDGPUOperand::ImmTyDim)); 6633 return MatchOperand_Success; 6634 } 6635 6636 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6637 SMLoc S = Parser.getTok().getLoc(); 6638 StringRef Prefix; 6639 6640 if (getLexer().getKind() == AsmToken::Identifier) { 6641 Prefix = Parser.getTok().getString(); 6642 } else { 6643 return MatchOperand_NoMatch; 6644 } 6645 6646 if (Prefix != "dpp8") 6647 return parseDPPCtrl(Operands); 6648 if (!isGFX10()) 6649 return MatchOperand_NoMatch; 6650 6651 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6652 6653 int64_t Sels[8]; 6654 6655 Parser.Lex(); 6656 if (getLexer().isNot(AsmToken::Colon)) 6657 return MatchOperand_ParseFail; 6658 6659 Parser.Lex(); 6660 if (getLexer().isNot(AsmToken::LBrac)) 6661 return MatchOperand_ParseFail; 6662 6663 Parser.Lex(); 6664 if (getParser().parseAbsoluteExpression(Sels[0])) 6665 return MatchOperand_ParseFail; 6666 if (0 > Sels[0] || 7 < Sels[0]) 6667 return MatchOperand_ParseFail; 6668 6669 for (size_t i = 1; i < 8; ++i) { 6670 if (getLexer().isNot(AsmToken::Comma)) 6671 return MatchOperand_ParseFail; 6672 6673 Parser.Lex(); 6674 if (getParser().parseAbsoluteExpression(Sels[i])) 6675 return MatchOperand_ParseFail; 6676 if (0 > Sels[i] || 7 < Sels[i]) 6677 return MatchOperand_ParseFail; 6678 } 6679 6680 if (getLexer().isNot(AsmToken::RBrac)) 6681 return MatchOperand_ParseFail; 6682 Parser.Lex(); 6683 6684 unsigned DPP8 = 0; 6685 for (size_t i = 0; i < 8; ++i) 6686 DPP8 |= (Sels[i] << (i * 3)); 6687 6688 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6689 return MatchOperand_Success; 6690 } 6691 6692 OperandMatchResultTy 6693 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6694 using namespace AMDGPU::DPP; 6695 6696 SMLoc S = Parser.getTok().getLoc(); 6697 StringRef Prefix; 6698 int64_t Int; 6699 6700 if (getLexer().getKind() == AsmToken::Identifier) { 6701 Prefix = Parser.getTok().getString(); 6702 } else { 6703 return MatchOperand_NoMatch; 6704 } 6705 6706 if (Prefix == "row_mirror") { 6707 Int = DppCtrl::ROW_MIRROR; 6708 Parser.Lex(); 6709 } else if (Prefix == "row_half_mirror") { 6710 Int = DppCtrl::ROW_HALF_MIRROR; 6711 Parser.Lex(); 6712 } else { 6713 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6714 if (Prefix != "quad_perm" 6715 && Prefix != "row_shl" 6716 && Prefix != "row_shr" 6717 && Prefix != "row_ror" 6718 && Prefix != "wave_shl" 6719 && Prefix != "wave_rol" 6720 && Prefix != "wave_shr" 6721 && Prefix != "wave_ror" 6722 && Prefix != "row_bcast" 6723 && Prefix != "row_share" 6724 && Prefix != "row_xmask") { 6725 return MatchOperand_NoMatch; 6726 } 6727 6728 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6729 return MatchOperand_NoMatch; 6730 6731 if (!isVI() && !isGFX9() && 6732 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6733 Prefix == "wave_rol" || Prefix == "wave_ror" || 6734 Prefix == "row_bcast")) 6735 return MatchOperand_NoMatch; 6736 6737 Parser.Lex(); 6738 if (getLexer().isNot(AsmToken::Colon)) 6739 return MatchOperand_ParseFail; 6740 6741 if (Prefix == "quad_perm") { 6742 // quad_perm:[%d,%d,%d,%d] 6743 Parser.Lex(); 6744 if (getLexer().isNot(AsmToken::LBrac)) 6745 return MatchOperand_ParseFail; 6746 Parser.Lex(); 6747 6748 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6749 return MatchOperand_ParseFail; 6750 6751 for (int i = 0; i < 3; ++i) { 6752 if (getLexer().isNot(AsmToken::Comma)) 6753 return MatchOperand_ParseFail; 6754 Parser.Lex(); 6755 6756 int64_t Temp; 6757 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6758 return MatchOperand_ParseFail; 6759 const int shift = i*2 + 2; 6760 Int += (Temp << shift); 6761 } 6762 6763 if (getLexer().isNot(AsmToken::RBrac)) 6764 return MatchOperand_ParseFail; 6765 Parser.Lex(); 6766 } else { 6767 // sel:%d 6768 Parser.Lex(); 6769 if (getParser().parseAbsoluteExpression(Int)) 6770 return MatchOperand_ParseFail; 6771 6772 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6773 Int |= DppCtrl::ROW_SHL0; 6774 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6775 Int |= DppCtrl::ROW_SHR0; 6776 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6777 Int |= DppCtrl::ROW_ROR0; 6778 } else if (Prefix == "wave_shl" && 1 == Int) { 6779 Int = DppCtrl::WAVE_SHL1; 6780 } else if (Prefix == "wave_rol" && 1 == Int) { 6781 Int = DppCtrl::WAVE_ROL1; 6782 } else if (Prefix == "wave_shr" && 1 == Int) { 6783 Int = DppCtrl::WAVE_SHR1; 6784 } else if (Prefix == "wave_ror" && 1 == Int) { 6785 Int = DppCtrl::WAVE_ROR1; 6786 } else if (Prefix == "row_bcast") { 6787 if (Int == 15) { 6788 Int = DppCtrl::BCAST15; 6789 } else if (Int == 31) { 6790 Int = DppCtrl::BCAST31; 6791 } else { 6792 return MatchOperand_ParseFail; 6793 } 6794 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6795 Int |= DppCtrl::ROW_SHARE_FIRST; 6796 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6797 Int |= DppCtrl::ROW_XMASK_FIRST; 6798 } else { 6799 return MatchOperand_ParseFail; 6800 } 6801 } 6802 } 6803 6804 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6805 return MatchOperand_Success; 6806 } 6807 6808 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6809 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6810 } 6811 6812 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6813 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6814 } 6815 6816 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6817 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6818 } 6819 6820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6821 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6822 } 6823 6824 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6825 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6826 } 6827 6828 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6829 OptionalImmIndexMap OptionalIdx; 6830 6831 unsigned I = 1; 6832 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6833 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6834 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6835 } 6836 6837 int Fi = 0; 6838 for (unsigned E = Operands.size(); I != E; ++I) { 6839 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6840 MCOI::TIED_TO); 6841 if (TiedTo != -1) { 6842 assert((unsigned)TiedTo < Inst.getNumOperands()); 6843 // handle tied old or src2 for MAC instructions 6844 Inst.addOperand(Inst.getOperand(TiedTo)); 6845 } 6846 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6847 // Add the register arguments 6848 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6849 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6850 // Skip it. 6851 continue; 6852 } 6853 6854 if (IsDPP8) { 6855 if (Op.isDPP8()) { 6856 Op.addImmOperands(Inst, 1); 6857 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6858 Op.addRegWithFPInputModsOperands(Inst, 2); 6859 } else if (Op.isFI()) { 6860 Fi = Op.getImm(); 6861 } else if (Op.isReg()) { 6862 Op.addRegOperands(Inst, 1); 6863 } else { 6864 llvm_unreachable("Invalid operand type"); 6865 } 6866 } else { 6867 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6868 Op.addRegWithFPInputModsOperands(Inst, 2); 6869 } else if (Op.isDPPCtrl()) { 6870 Op.addImmOperands(Inst, 1); 6871 } else if (Op.isImm()) { 6872 // Handle optional arguments 6873 OptionalIdx[Op.getImmTy()] = I; 6874 } else { 6875 llvm_unreachable("Invalid operand type"); 6876 } 6877 } 6878 } 6879 6880 if (IsDPP8) { 6881 using namespace llvm::AMDGPU::DPP; 6882 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6883 } else { 6884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6886 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6887 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6889 } 6890 } 6891 } 6892 6893 //===----------------------------------------------------------------------===// 6894 // sdwa 6895 //===----------------------------------------------------------------------===// 6896 6897 OperandMatchResultTy 6898 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6899 AMDGPUOperand::ImmTy Type) { 6900 using namespace llvm::AMDGPU::SDWA; 6901 6902 SMLoc S = Parser.getTok().getLoc(); 6903 StringRef Value; 6904 OperandMatchResultTy res; 6905 6906 res = parseStringWithPrefix(Prefix, Value); 6907 if (res != MatchOperand_Success) { 6908 return res; 6909 } 6910 6911 int64_t Int; 6912 Int = StringSwitch<int64_t>(Value) 6913 .Case("BYTE_0", SdwaSel::BYTE_0) 6914 .Case("BYTE_1", SdwaSel::BYTE_1) 6915 .Case("BYTE_2", SdwaSel::BYTE_2) 6916 .Case("BYTE_3", SdwaSel::BYTE_3) 6917 .Case("WORD_0", SdwaSel::WORD_0) 6918 .Case("WORD_1", SdwaSel::WORD_1) 6919 .Case("DWORD", SdwaSel::DWORD) 6920 .Default(0xffffffff); 6921 Parser.Lex(); // eat last token 6922 6923 if (Int == 0xffffffff) { 6924 return MatchOperand_ParseFail; 6925 } 6926 6927 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6928 return MatchOperand_Success; 6929 } 6930 6931 OperandMatchResultTy 6932 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6933 using namespace llvm::AMDGPU::SDWA; 6934 6935 SMLoc S = Parser.getTok().getLoc(); 6936 StringRef Value; 6937 OperandMatchResultTy res; 6938 6939 res = parseStringWithPrefix("dst_unused", Value); 6940 if (res != MatchOperand_Success) { 6941 return res; 6942 } 6943 6944 int64_t Int; 6945 Int = StringSwitch<int64_t>(Value) 6946 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6947 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6948 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6949 .Default(0xffffffff); 6950 Parser.Lex(); // eat last token 6951 6952 if (Int == 0xffffffff) { 6953 return MatchOperand_ParseFail; 6954 } 6955 6956 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6957 return MatchOperand_Success; 6958 } 6959 6960 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6961 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6962 } 6963 6964 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6965 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6966 } 6967 6968 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6969 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6970 } 6971 6972 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 6973 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 6974 } 6975 6976 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6977 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6978 } 6979 6980 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6981 uint64_t BasicInstType, 6982 bool SkipDstVcc, 6983 bool SkipSrcVcc) { 6984 using namespace llvm::AMDGPU::SDWA; 6985 6986 OptionalImmIndexMap OptionalIdx; 6987 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 6988 bool SkippedVcc = false; 6989 6990 unsigned I = 1; 6991 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6992 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6993 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6994 } 6995 6996 for (unsigned E = Operands.size(); I != E; ++I) { 6997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6998 if (SkipVcc && !SkippedVcc && Op.isReg() && 6999 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7000 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7001 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7002 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7003 // Skip VCC only if we didn't skip it on previous iteration. 7004 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7005 if (BasicInstType == SIInstrFlags::VOP2 && 7006 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7007 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7008 SkippedVcc = true; 7009 continue; 7010 } else if (BasicInstType == SIInstrFlags::VOPC && 7011 Inst.getNumOperands() == 0) { 7012 SkippedVcc = true; 7013 continue; 7014 } 7015 } 7016 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7017 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7018 } else if (Op.isImm()) { 7019 // Handle optional arguments 7020 OptionalIdx[Op.getImmTy()] = I; 7021 } else { 7022 llvm_unreachable("Invalid operand type"); 7023 } 7024 SkippedVcc = false; 7025 } 7026 7027 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7028 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7029 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7030 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7031 switch (BasicInstType) { 7032 case SIInstrFlags::VOP1: 7033 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7034 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7036 } 7037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7039 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7040 break; 7041 7042 case SIInstrFlags::VOP2: 7043 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7044 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7046 } 7047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7051 break; 7052 7053 case SIInstrFlags::VOPC: 7054 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7057 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7058 break; 7059 7060 default: 7061 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7062 } 7063 } 7064 7065 // special case v_mac_{f16, f32}: 7066 // it has src2 register operand that is tied to dst operand 7067 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7068 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7069 auto it = Inst.begin(); 7070 std::advance( 7071 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7072 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7073 } 7074 } 7075 7076 //===----------------------------------------------------------------------===// 7077 // mAI 7078 //===----------------------------------------------------------------------===// 7079 7080 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7081 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7082 } 7083 7084 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7085 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7086 } 7087 7088 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7089 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7090 } 7091 7092 /// Force static initialization. 7093 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7094 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7095 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7096 } 7097 7098 #define GET_REGISTER_MATCHER 7099 #define GET_MATCHER_IMPLEMENTATION 7100 #define GET_MNEMONIC_SPELL_CHECKER 7101 #include "AMDGPUGenAsmMatcher.inc" 7102 7103 // This fuction should be defined after auto-generated include so that we have 7104 // MatchClassKind enum defined 7105 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7106 unsigned Kind) { 7107 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7108 // But MatchInstructionImpl() expects to meet token and fails to validate 7109 // operand. This method checks if we are given immediate operand but expect to 7110 // get corresponding token. 7111 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7112 switch (Kind) { 7113 case MCK_addr64: 7114 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7115 case MCK_gds: 7116 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7117 case MCK_lds: 7118 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7119 case MCK_glc: 7120 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7121 case MCK_idxen: 7122 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7123 case MCK_offen: 7124 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7125 case MCK_SSrcB32: 7126 // When operands have expression values, they will return true for isToken, 7127 // because it is not possible to distinguish between a token and an 7128 // expression at parse time. MatchInstructionImpl() will always try to 7129 // match an operand as a token, when isToken returns true, and when the 7130 // name of the expression is not a valid token, the match will fail, 7131 // so we need to handle it here. 7132 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7133 case MCK_SSrcF32: 7134 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7135 case MCK_SoppBrTarget: 7136 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7137 case MCK_VReg32OrOff: 7138 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7139 case MCK_InterpSlot: 7140 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7141 case MCK_Attr: 7142 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7143 case MCK_AttrChan: 7144 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7145 case MCK_ImmSMEMOffset: 7146 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7147 case MCK_SReg_64: 7148 case MCK_SReg_64_XEXEC: 7149 // Null is defined as a 32-bit register but 7150 // it should also be enabled with 64-bit operands. 7151 // The following code enables it for SReg_64 operands 7152 // used as source and destination. Remaining source 7153 // operands are handled in isInlinableImm. 7154 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7155 default: 7156 return Match_InvalidOperand; 7157 } 7158 } 7159 7160 //===----------------------------------------------------------------------===// 7161 // endpgm 7162 //===----------------------------------------------------------------------===// 7163 7164 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7165 SMLoc S = Parser.getTok().getLoc(); 7166 int64_t Imm = 0; 7167 7168 if (!parseExpr(Imm)) { 7169 // The operand is optional, if not present default to 0 7170 Imm = 0; 7171 } 7172 7173 if (!isUInt<16>(Imm)) { 7174 Error(S, "expected a 16-bit value"); 7175 return MatchOperand_ParseFail; 7176 } 7177 7178 Operands.push_back( 7179 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7180 return MatchOperand_Success; 7181 } 7182 7183 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7184