1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 339 // value of the GLC operand. 340 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 341 bool isSLC() const { return isImmTy(ImmTySLC); } 342 bool isSWZ() const { return isImmTy(ImmTySWZ); } 343 bool isTFE() const { return isImmTy(ImmTyTFE); } 344 bool isD16() const { return isImmTy(ImmTyD16); } 345 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 346 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 347 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 348 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 349 bool isFI() const { return isImmTy(ImmTyDppFi); } 350 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 351 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 352 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 353 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 354 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 355 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 356 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 357 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 358 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 359 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 360 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 361 bool isHigh() const { return isImmTy(ImmTyHigh); } 362 363 bool isMod() const { 364 return isClampSI() || isOModSI(); 365 } 366 367 bool isRegOrImm() const { 368 return isReg() || isImm(); 369 } 370 371 bool isRegClass(unsigned RCID) const; 372 373 bool isInlineValue() const; 374 375 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 376 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 377 } 378 379 bool isSCSrcB16() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 381 } 382 383 bool isSCSrcV2B16() const { 384 return isSCSrcB16(); 385 } 386 387 bool isSCSrcB32() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 389 } 390 391 bool isSCSrcB64() const { 392 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 393 } 394 395 bool isBoolReg() const; 396 397 bool isSCSrcF16() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 399 } 400 401 bool isSCSrcV2F16() const { 402 return isSCSrcF16(); 403 } 404 405 bool isSCSrcF32() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 407 } 408 409 bool isSCSrcF64() const { 410 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 411 } 412 413 bool isSSrcB32() const { 414 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 415 } 416 417 bool isSSrcB16() const { 418 return isSCSrcB16() || isLiteralImm(MVT::i16); 419 } 420 421 bool isSSrcV2B16() const { 422 llvm_unreachable("cannot happen"); 423 return isSSrcB16(); 424 } 425 426 bool isSSrcB64() const { 427 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 428 // See isVSrc64(). 429 return isSCSrcB64() || isLiteralImm(MVT::i64); 430 } 431 432 bool isSSrcF32() const { 433 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 434 } 435 436 bool isSSrcF64() const { 437 return isSCSrcB64() || isLiteralImm(MVT::f64); 438 } 439 440 bool isSSrcF16() const { 441 return isSCSrcB16() || isLiteralImm(MVT::f16); 442 } 443 444 bool isSSrcV2F16() const { 445 llvm_unreachable("cannot happen"); 446 return isSSrcF16(); 447 } 448 449 bool isSSrcOrLdsB32() const { 450 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 451 isLiteralImm(MVT::i32) || isExpr(); 452 } 453 454 bool isVCSrcB32() const { 455 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 456 } 457 458 bool isVCSrcB64() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 460 } 461 462 bool isVCSrcB16() const { 463 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 464 } 465 466 bool isVCSrcV2B16() const { 467 return isVCSrcB16(); 468 } 469 470 bool isVCSrcF32() const { 471 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 472 } 473 474 bool isVCSrcF64() const { 475 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 476 } 477 478 bool isVCSrcF16() const { 479 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 480 } 481 482 bool isVCSrcV2F16() const { 483 return isVCSrcF16(); 484 } 485 486 bool isVSrcB32() const { 487 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 488 } 489 490 bool isVSrcB64() const { 491 return isVCSrcF64() || isLiteralImm(MVT::i64); 492 } 493 494 bool isVSrcB16() const { 495 return isVCSrcB16() || isLiteralImm(MVT::i16); 496 } 497 498 bool isVSrcV2B16() const { 499 return isVSrcB16() || isLiteralImm(MVT::v2i16); 500 } 501 502 bool isVSrcF32() const { 503 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 504 } 505 506 bool isVSrcF64() const { 507 return isVCSrcF64() || isLiteralImm(MVT::f64); 508 } 509 510 bool isVSrcF16() const { 511 return isVCSrcF16() || isLiteralImm(MVT::f16); 512 } 513 514 bool isVSrcV2F16() const { 515 return isVSrcF16() || isLiteralImm(MVT::v2f16); 516 } 517 518 bool isVISrcB32() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 520 } 521 522 bool isVISrcB16() const { 523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 524 } 525 526 bool isVISrcV2B16() const { 527 return isVISrcB16(); 528 } 529 530 bool isVISrcF32() const { 531 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 532 } 533 534 bool isVISrcF16() const { 535 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 536 } 537 538 bool isVISrcV2F16() const { 539 return isVISrcF16() || isVISrcB32(); 540 } 541 542 bool isAISrcB32() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 544 } 545 546 bool isAISrcB16() const { 547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 548 } 549 550 bool isAISrcV2B16() const { 551 return isAISrcB16(); 552 } 553 554 bool isAISrcF32() const { 555 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 556 } 557 558 bool isAISrcF16() const { 559 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 560 } 561 562 bool isAISrcV2F16() const { 563 return isAISrcF16() || isAISrcB32(); 564 } 565 566 bool isAISrc_128B32() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 568 } 569 570 bool isAISrc_128B16() const { 571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 572 } 573 574 bool isAISrc_128V2B16() const { 575 return isAISrc_128B16(); 576 } 577 578 bool isAISrc_128F32() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 580 } 581 582 bool isAISrc_128F16() const { 583 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 584 } 585 586 bool isAISrc_128V2F16() const { 587 return isAISrc_128F16() || isAISrc_128B32(); 588 } 589 590 bool isAISrc_512B32() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 592 } 593 594 bool isAISrc_512B16() const { 595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 596 } 597 598 bool isAISrc_512V2B16() const { 599 return isAISrc_512B16(); 600 } 601 602 bool isAISrc_512F32() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 604 } 605 606 bool isAISrc_512F16() const { 607 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 608 } 609 610 bool isAISrc_512V2F16() const { 611 return isAISrc_512F16() || isAISrc_512B32(); 612 } 613 614 bool isAISrc_1024B32() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 616 } 617 618 bool isAISrc_1024B16() const { 619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 620 } 621 622 bool isAISrc_1024V2B16() const { 623 return isAISrc_1024B16(); 624 } 625 626 bool isAISrc_1024F32() const { 627 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 628 } 629 630 bool isAISrc_1024F16() const { 631 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 632 } 633 634 bool isAISrc_1024V2F16() const { 635 return isAISrc_1024F16() || isAISrc_1024B32(); 636 } 637 638 bool isKImmFP32() const { 639 return isLiteralImm(MVT::f32); 640 } 641 642 bool isKImmFP16() const { 643 return isLiteralImm(MVT::f16); 644 } 645 646 bool isMem() const override { 647 return false; 648 } 649 650 bool isExpr() const { 651 return Kind == Expression; 652 } 653 654 bool isSoppBrTarget() const { 655 return isExpr() || isImm(); 656 } 657 658 bool isSWaitCnt() const; 659 bool isHwreg() const; 660 bool isSendMsg() const; 661 bool isSwizzle() const; 662 bool isSMRDOffset8() const; 663 bool isSMEMOffset() const; 664 bool isSMRDLiteralOffset() const; 665 bool isDPP8() const; 666 bool isDPPCtrl() const; 667 bool isBLGP() const; 668 bool isCBSZ() const; 669 bool isABID() const; 670 bool isGPRIdxMode() const; 671 bool isS16Imm() const; 672 bool isU16Imm() const; 673 bool isEndpgm() const; 674 675 StringRef getExpressionAsToken() const { 676 assert(isExpr()); 677 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 678 return S->getSymbol().getName(); 679 } 680 681 StringRef getToken() const { 682 assert(isToken()); 683 684 if (Kind == Expression) 685 return getExpressionAsToken(); 686 687 return StringRef(Tok.Data, Tok.Length); 688 } 689 690 int64_t getImm() const { 691 assert(isImm()); 692 return Imm.Val; 693 } 694 695 void setImm(int64_t Val) { 696 assert(isImm()); 697 Imm.Val = Val; 698 } 699 700 ImmTy getImmTy() const { 701 assert(isImm()); 702 return Imm.Type; 703 } 704 705 unsigned getReg() const override { 706 assert(isRegKind()); 707 return Reg.RegNo; 708 } 709 710 SMLoc getStartLoc() const override { 711 return StartLoc; 712 } 713 714 SMLoc getEndLoc() const override { 715 return EndLoc; 716 } 717 718 SMRange getLocRange() const { 719 return SMRange(StartLoc, EndLoc); 720 } 721 722 Modifiers getModifiers() const { 723 assert(isRegKind() || isImmTy(ImmTyNone)); 724 return isRegKind() ? Reg.Mods : Imm.Mods; 725 } 726 727 void setModifiers(Modifiers Mods) { 728 assert(isRegKind() || isImmTy(ImmTyNone)); 729 if (isRegKind()) 730 Reg.Mods = Mods; 731 else 732 Imm.Mods = Mods; 733 } 734 735 bool hasModifiers() const { 736 return getModifiers().hasModifiers(); 737 } 738 739 bool hasFPModifiers() const { 740 return getModifiers().hasFPModifiers(); 741 } 742 743 bool hasIntModifiers() const { 744 return getModifiers().hasIntModifiers(); 745 } 746 747 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 748 749 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 750 751 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 752 753 template <unsigned Bitwidth> 754 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 755 756 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 757 addKImmFPOperands<16>(Inst, N); 758 } 759 760 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 761 addKImmFPOperands<32>(Inst, N); 762 } 763 764 void addRegOperands(MCInst &Inst, unsigned N) const; 765 766 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 767 addRegOperands(Inst, N); 768 } 769 770 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 771 if (isRegKind()) 772 addRegOperands(Inst, N); 773 else if (isExpr()) 774 Inst.addOperand(MCOperand::createExpr(Expr)); 775 else 776 addImmOperands(Inst, N); 777 } 778 779 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 780 Modifiers Mods = getModifiers(); 781 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 782 if (isRegKind()) { 783 addRegOperands(Inst, N); 784 } else { 785 addImmOperands(Inst, N, false); 786 } 787 } 788 789 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 790 assert(!hasIntModifiers()); 791 addRegOrImmWithInputModsOperands(Inst, N); 792 } 793 794 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 795 assert(!hasFPModifiers()); 796 addRegOrImmWithInputModsOperands(Inst, N); 797 } 798 799 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 800 Modifiers Mods = getModifiers(); 801 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 802 assert(isRegKind()); 803 addRegOperands(Inst, N); 804 } 805 806 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 807 assert(!hasIntModifiers()); 808 addRegWithInputModsOperands(Inst, N); 809 } 810 811 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 812 assert(!hasFPModifiers()); 813 addRegWithInputModsOperands(Inst, N); 814 } 815 816 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 817 if (isImm()) 818 addImmOperands(Inst, N); 819 else { 820 assert(isExpr()); 821 Inst.addOperand(MCOperand::createExpr(Expr)); 822 } 823 } 824 825 static void printImmTy(raw_ostream& OS, ImmTy Type) { 826 switch (Type) { 827 case ImmTyNone: OS << "None"; break; 828 case ImmTyGDS: OS << "GDS"; break; 829 case ImmTyLDS: OS << "LDS"; break; 830 case ImmTyOffen: OS << "Offen"; break; 831 case ImmTyIdxen: OS << "Idxen"; break; 832 case ImmTyAddr64: OS << "Addr64"; break; 833 case ImmTyOffset: OS << "Offset"; break; 834 case ImmTyInstOffset: OS << "InstOffset"; break; 835 case ImmTyOffset0: OS << "Offset0"; break; 836 case ImmTyOffset1: OS << "Offset1"; break; 837 case ImmTyDLC: OS << "DLC"; break; 838 case ImmTyGLC: OS << "GLC"; break; 839 case ImmTySLC: OS << "SLC"; break; 840 case ImmTySWZ: OS << "SWZ"; break; 841 case ImmTyTFE: OS << "TFE"; break; 842 case ImmTyD16: OS << "D16"; break; 843 case ImmTyFORMAT: OS << "FORMAT"; break; 844 case ImmTyClampSI: OS << "ClampSI"; break; 845 case ImmTyOModSI: OS << "OModSI"; break; 846 case ImmTyDPP8: OS << "DPP8"; break; 847 case ImmTyDppCtrl: OS << "DppCtrl"; break; 848 case ImmTyDppRowMask: OS << "DppRowMask"; break; 849 case ImmTyDppBankMask: OS << "DppBankMask"; break; 850 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 851 case ImmTyDppFi: OS << "FI"; break; 852 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 853 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 854 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 855 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 856 case ImmTyDMask: OS << "DMask"; break; 857 case ImmTyDim: OS << "Dim"; break; 858 case ImmTyUNorm: OS << "UNorm"; break; 859 case ImmTyDA: OS << "DA"; break; 860 case ImmTyR128A16: OS << "R128A16"; break; 861 case ImmTyA16: OS << "A16"; break; 862 case ImmTyLWE: OS << "LWE"; break; 863 case ImmTyOff: OS << "Off"; break; 864 case ImmTyExpTgt: OS << "ExpTgt"; break; 865 case ImmTyExpCompr: OS << "ExpCompr"; break; 866 case ImmTyExpVM: OS << "ExpVM"; break; 867 case ImmTyHwreg: OS << "Hwreg"; break; 868 case ImmTySendMsg: OS << "SendMsg"; break; 869 case ImmTyInterpSlot: OS << "InterpSlot"; break; 870 case ImmTyInterpAttr: OS << "InterpAttr"; break; 871 case ImmTyAttrChan: OS << "AttrChan"; break; 872 case ImmTyOpSel: OS << "OpSel"; break; 873 case ImmTyOpSelHi: OS << "OpSelHi"; break; 874 case ImmTyNegLo: OS << "NegLo"; break; 875 case ImmTyNegHi: OS << "NegHi"; break; 876 case ImmTySwizzle: OS << "Swizzle"; break; 877 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 878 case ImmTyHigh: OS << "High"; break; 879 case ImmTyBLGP: OS << "BLGP"; break; 880 case ImmTyCBSZ: OS << "CBSZ"; break; 881 case ImmTyABID: OS << "ABID"; break; 882 case ImmTyEndpgm: OS << "Endpgm"; break; 883 } 884 } 885 886 void print(raw_ostream &OS) const override { 887 switch (Kind) { 888 case Register: 889 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 890 break; 891 case Immediate: 892 OS << '<' << getImm(); 893 if (getImmTy() != ImmTyNone) { 894 OS << " type: "; printImmTy(OS, getImmTy()); 895 } 896 OS << " mods: " << Imm.Mods << '>'; 897 break; 898 case Token: 899 OS << '\'' << getToken() << '\''; 900 break; 901 case Expression: 902 OS << "<expr " << *Expr << '>'; 903 break; 904 } 905 } 906 907 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 908 int64_t Val, SMLoc Loc, 909 ImmTy Type = ImmTyNone, 910 bool IsFPImm = false) { 911 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 912 Op->Imm.Val = Val; 913 Op->Imm.IsFPImm = IsFPImm; 914 Op->Imm.Type = Type; 915 Op->Imm.Mods = Modifiers(); 916 Op->StartLoc = Loc; 917 Op->EndLoc = Loc; 918 return Op; 919 } 920 921 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 922 StringRef Str, SMLoc Loc, 923 bool HasExplicitEncodingSize = true) { 924 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 925 Res->Tok.Data = Str.data(); 926 Res->Tok.Length = Str.size(); 927 Res->StartLoc = Loc; 928 Res->EndLoc = Loc; 929 return Res; 930 } 931 932 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 933 unsigned RegNo, SMLoc S, 934 SMLoc E) { 935 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 936 Op->Reg.RegNo = RegNo; 937 Op->Reg.Mods = Modifiers(); 938 Op->StartLoc = S; 939 Op->EndLoc = E; 940 return Op; 941 } 942 943 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 944 const class MCExpr *Expr, SMLoc S) { 945 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 946 Op->Expr = Expr; 947 Op->StartLoc = S; 948 Op->EndLoc = S; 949 return Op; 950 } 951 }; 952 953 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 954 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 955 return OS; 956 } 957 958 //===----------------------------------------------------------------------===// 959 // AsmParser 960 //===----------------------------------------------------------------------===// 961 962 // Holds info related to the current kernel, e.g. count of SGPRs used. 963 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 964 // .amdgpu_hsa_kernel or at EOF. 965 class KernelScopeInfo { 966 int SgprIndexUnusedMin = -1; 967 int VgprIndexUnusedMin = -1; 968 MCContext *Ctx = nullptr; 969 970 void usesSgprAt(int i) { 971 if (i >= SgprIndexUnusedMin) { 972 SgprIndexUnusedMin = ++i; 973 if (Ctx) { 974 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 975 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 976 } 977 } 978 } 979 980 void usesVgprAt(int i) { 981 if (i >= VgprIndexUnusedMin) { 982 VgprIndexUnusedMin = ++i; 983 if (Ctx) { 984 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 985 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 986 } 987 } 988 } 989 990 public: 991 KernelScopeInfo() = default; 992 993 void initialize(MCContext &Context) { 994 Ctx = &Context; 995 usesSgprAt(SgprIndexUnusedMin = -1); 996 usesVgprAt(VgprIndexUnusedMin = -1); 997 } 998 999 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1000 switch (RegKind) { 1001 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1002 case IS_AGPR: // fall through 1003 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1004 default: break; 1005 } 1006 } 1007 }; 1008 1009 class AMDGPUAsmParser : public MCTargetAsmParser { 1010 MCAsmParser &Parser; 1011 1012 // Number of extra operands parsed after the first optional operand. 1013 // This may be necessary to skip hardcoded mandatory operands. 1014 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1015 1016 unsigned ForcedEncodingSize = 0; 1017 bool ForcedDPP = false; 1018 bool ForcedSDWA = false; 1019 KernelScopeInfo KernelScope; 1020 1021 /// @name Auto-generated Match Functions 1022 /// { 1023 1024 #define GET_ASSEMBLER_HEADER 1025 #include "AMDGPUGenAsmMatcher.inc" 1026 1027 /// } 1028 1029 private: 1030 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1031 bool OutOfRangeError(SMRange Range); 1032 /// Calculate VGPR/SGPR blocks required for given target, reserved 1033 /// registers, and user-specified NextFreeXGPR values. 1034 /// 1035 /// \param Features [in] Target features, used for bug corrections. 1036 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1037 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1038 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1039 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1040 /// descriptor field, if valid. 1041 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1042 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1043 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1044 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1045 /// \param VGPRBlocks [out] Result VGPR block count. 1046 /// \param SGPRBlocks [out] Result SGPR block count. 1047 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1048 bool FlatScrUsed, bool XNACKUsed, 1049 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1050 SMRange VGPRRange, unsigned NextFreeSGPR, 1051 SMRange SGPRRange, unsigned &VGPRBlocks, 1052 unsigned &SGPRBlocks); 1053 bool ParseDirectiveAMDGCNTarget(); 1054 bool ParseDirectiveAMDHSAKernel(); 1055 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1056 bool ParseDirectiveHSACodeObjectVersion(); 1057 bool ParseDirectiveHSACodeObjectISA(); 1058 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1059 bool ParseDirectiveAMDKernelCodeT(); 1060 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1061 bool ParseDirectiveAMDGPUHsaKernel(); 1062 1063 bool ParseDirectiveISAVersion(); 1064 bool ParseDirectiveHSAMetadata(); 1065 bool ParseDirectivePALMetadataBegin(); 1066 bool ParseDirectivePALMetadata(); 1067 bool ParseDirectiveAMDGPULDS(); 1068 1069 /// Common code to parse out a block of text (typically YAML) between start and 1070 /// end directives. 1071 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1072 const char *AssemblerDirectiveEnd, 1073 std::string &CollectString); 1074 1075 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1076 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 bool RestoreOnFailure = false); 1080 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1081 unsigned &RegNum, unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, 1088 SmallVectorImpl<AsmToken> &Tokens); 1089 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1090 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1091 bool ParseRegRange(unsigned& Num, unsigned& Width); 1092 unsigned getRegularReg(RegisterKind RegKind, 1093 unsigned RegNum, 1094 unsigned RegWidth, 1095 SMLoc Loc); 1096 1097 bool isRegister(); 1098 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1099 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1100 void initializeGprCountSymbol(RegisterKind RegKind); 1101 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1102 unsigned RegWidth); 1103 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1104 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1105 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1106 bool IsGdsHardcoded); 1107 1108 public: 1109 enum AMDGPUMatchResultTy { 1110 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1111 }; 1112 enum OperandMode { 1113 OperandMode_Default, 1114 OperandMode_NSA, 1115 }; 1116 1117 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1118 1119 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1120 const MCInstrInfo &MII, 1121 const MCTargetOptions &Options) 1122 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1123 MCAsmParserExtension::Initialize(Parser); 1124 1125 if (getFeatureBits().none()) { 1126 // Set default features. 1127 copySTI().ToggleFeature("southern-islands"); 1128 } 1129 1130 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1131 1132 { 1133 // TODO: make those pre-defined variables read-only. 1134 // Currently there is none suitable machinery in the core llvm-mc for this. 1135 // MCSymbol::isRedefinable is intended for another purpose, and 1136 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1137 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1138 MCContext &Ctx = getContext(); 1139 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1140 MCSymbol *Sym = 1141 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1142 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1143 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1144 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1145 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1147 } else { 1148 MCSymbol *Sym = 1149 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1151 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1152 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1153 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1154 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1155 } 1156 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1157 initializeGprCountSymbol(IS_VGPR); 1158 initializeGprCountSymbol(IS_SGPR); 1159 } else 1160 KernelScope.initialize(getContext()); 1161 } 1162 } 1163 1164 bool hasXNACK() const { 1165 return AMDGPU::hasXNACK(getSTI()); 1166 } 1167 1168 bool hasMIMG_R128() const { 1169 return AMDGPU::hasMIMG_R128(getSTI()); 1170 } 1171 1172 bool hasPackedD16() const { 1173 return AMDGPU::hasPackedD16(getSTI()); 1174 } 1175 1176 bool hasGFX10A16() const { 1177 return AMDGPU::hasGFX10A16(getSTI()); 1178 } 1179 1180 bool isSI() const { 1181 return AMDGPU::isSI(getSTI()); 1182 } 1183 1184 bool isCI() const { 1185 return AMDGPU::isCI(getSTI()); 1186 } 1187 1188 bool isVI() const { 1189 return AMDGPU::isVI(getSTI()); 1190 } 1191 1192 bool isGFX9() const { 1193 return AMDGPU::isGFX9(getSTI()); 1194 } 1195 1196 bool isGFX9Plus() const { 1197 return AMDGPU::isGFX9Plus(getSTI()); 1198 } 1199 1200 bool isGFX10() const { 1201 return AMDGPU::isGFX10(getSTI()); 1202 } 1203 1204 bool isGFX10_BEncoding() const { 1205 return AMDGPU::isGFX10_BEncoding(getSTI()); 1206 } 1207 1208 bool hasInv2PiInlineImm() const { 1209 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1210 } 1211 1212 bool hasFlatOffsets() const { 1213 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1214 } 1215 1216 bool hasSGPR102_SGPR103() const { 1217 return !isVI() && !isGFX9(); 1218 } 1219 1220 bool hasSGPR104_SGPR105() const { 1221 return isGFX10(); 1222 } 1223 1224 bool hasIntClamp() const { 1225 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1226 } 1227 1228 AMDGPUTargetStreamer &getTargetStreamer() { 1229 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1230 return static_cast<AMDGPUTargetStreamer &>(TS); 1231 } 1232 1233 const MCRegisterInfo *getMRI() const { 1234 // We need this const_cast because for some reason getContext() is not const 1235 // in MCAsmParser. 1236 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1237 } 1238 1239 const MCInstrInfo *getMII() const { 1240 return &MII; 1241 } 1242 1243 const FeatureBitset &getFeatureBits() const { 1244 return getSTI().getFeatureBits(); 1245 } 1246 1247 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1248 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1249 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1250 1251 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1252 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1253 bool isForcedDPP() const { return ForcedDPP; } 1254 bool isForcedSDWA() const { return ForcedSDWA; } 1255 ArrayRef<unsigned> getMatchedVariants() const; 1256 StringRef getMatchedVariantName() const; 1257 1258 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1259 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1260 bool RestoreOnFailure); 1261 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1262 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1263 SMLoc &EndLoc) override; 1264 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1265 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1266 unsigned Kind) override; 1267 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1268 OperandVector &Operands, MCStreamer &Out, 1269 uint64_t &ErrorInfo, 1270 bool MatchingInlineAsm) override; 1271 bool ParseDirective(AsmToken DirectiveID) override; 1272 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1273 OperandMode Mode = OperandMode_Default); 1274 StringRef parseMnemonicSuffix(StringRef Name); 1275 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1276 SMLoc NameLoc, OperandVector &Operands) override; 1277 //bool ProcessInstruction(MCInst &Inst); 1278 1279 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1280 1281 OperandMatchResultTy 1282 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1283 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1284 bool (*ConvertResult)(int64_t &) = nullptr); 1285 1286 OperandMatchResultTy 1287 parseOperandArrayWithPrefix(const char *Prefix, 1288 OperandVector &Operands, 1289 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1290 bool (*ConvertResult)(int64_t&) = nullptr); 1291 1292 OperandMatchResultTy 1293 parseNamedBit(const char *Name, OperandVector &Operands, 1294 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1295 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1296 StringRef &Value); 1297 1298 bool isModifier(); 1299 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1300 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1301 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1302 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1303 bool parseSP3NegModifier(); 1304 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1305 OperandMatchResultTy parseReg(OperandVector &Operands); 1306 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1307 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1308 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1309 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1310 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1311 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1312 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1313 OperandMatchResultTy parseUfmt(int64_t &Format); 1314 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1315 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1316 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1317 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1318 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1319 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1320 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1321 1322 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1323 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1324 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1325 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1326 1327 bool parseCnt(int64_t &IntVal); 1328 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1329 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1330 1331 private: 1332 struct OperandInfoTy { 1333 int64_t Id; 1334 bool IsSymbolic = false; 1335 bool IsDefined = false; 1336 1337 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1338 }; 1339 1340 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1341 bool validateSendMsg(const OperandInfoTy &Msg, 1342 const OperandInfoTy &Op, 1343 const OperandInfoTy &Stream, 1344 const SMLoc Loc); 1345 1346 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1347 bool validateHwreg(const OperandInfoTy &HwReg, 1348 const int64_t Offset, 1349 const int64_t Width, 1350 const SMLoc Loc); 1351 1352 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1353 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1354 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1355 1356 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1357 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1358 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1359 bool validateSOPLiteral(const MCInst &Inst) const; 1360 bool validateConstantBusLimitations(const MCInst &Inst); 1361 bool validateEarlyClobberLimitations(const MCInst &Inst); 1362 bool validateIntClampSupported(const MCInst &Inst); 1363 bool validateMIMGAtomicDMask(const MCInst &Inst); 1364 bool validateMIMGGatherDMask(const MCInst &Inst); 1365 bool validateMovrels(const MCInst &Inst); 1366 bool validateMIMGDataSize(const MCInst &Inst); 1367 bool validateMIMGAddrSize(const MCInst &Inst); 1368 bool validateMIMGD16(const MCInst &Inst); 1369 bool validateMIMGDim(const MCInst &Inst); 1370 bool validateLdsDirect(const MCInst &Inst); 1371 bool validateOpSel(const MCInst &Inst); 1372 bool validateVccOperand(unsigned Reg) const; 1373 bool validateVOP3Literal(const MCInst &Inst) const; 1374 bool validateMAIAccWrite(const MCInst &Inst); 1375 bool validateDivScale(const MCInst &Inst); 1376 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1377 const SMLoc &IDLoc); 1378 unsigned getConstantBusLimit(unsigned Opcode) const; 1379 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1380 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1381 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1382 1383 bool isSupportedMnemo(StringRef Mnemo, 1384 const FeatureBitset &FBS); 1385 bool isSupportedMnemo(StringRef Mnemo, 1386 const FeatureBitset &FBS, 1387 ArrayRef<unsigned> Variants); 1388 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1389 1390 bool isId(const StringRef Id) const; 1391 bool isId(const AsmToken &Token, const StringRef Id) const; 1392 bool isToken(const AsmToken::TokenKind Kind) const; 1393 bool trySkipId(const StringRef Id); 1394 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1395 bool trySkipToken(const AsmToken::TokenKind Kind); 1396 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1397 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1398 bool parseId(StringRef &Val, const StringRef ErrMsg); 1399 1400 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1401 AsmToken::TokenKind getTokenKind() const; 1402 bool parseExpr(int64_t &Imm); 1403 bool parseExpr(OperandVector &Operands); 1404 StringRef getTokenStr() const; 1405 AsmToken peekToken(); 1406 AsmToken getToken() const; 1407 SMLoc getLoc() const; 1408 void lex(); 1409 1410 public: 1411 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1412 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1413 1414 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1415 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1416 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1417 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1418 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1419 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1420 1421 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1422 const unsigned MinVal, 1423 const unsigned MaxVal, 1424 const StringRef ErrMsg); 1425 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1426 bool parseSwizzleOffset(int64_t &Imm); 1427 bool parseSwizzleMacro(int64_t &Imm); 1428 bool parseSwizzleQuadPerm(int64_t &Imm); 1429 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1430 bool parseSwizzleBroadcast(int64_t &Imm); 1431 bool parseSwizzleSwap(int64_t &Imm); 1432 bool parseSwizzleReverse(int64_t &Imm); 1433 1434 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1435 int64_t parseGPRIdxMacro(); 1436 1437 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1438 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1439 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1440 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1441 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1442 1443 AMDGPUOperand::Ptr defaultDLC() const; 1444 AMDGPUOperand::Ptr defaultGLC() const; 1445 AMDGPUOperand::Ptr defaultGLC_1() const; 1446 AMDGPUOperand::Ptr defaultSLC() const; 1447 1448 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1449 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1450 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1451 AMDGPUOperand::Ptr defaultFlatOffset() const; 1452 1453 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1454 1455 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1456 OptionalImmIndexMap &OptionalIdx); 1457 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1458 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1459 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1460 1461 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1462 1463 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1464 bool IsAtomic = false); 1465 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1466 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1467 1468 OperandMatchResultTy parseDim(OperandVector &Operands); 1469 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1470 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1471 AMDGPUOperand::Ptr defaultRowMask() const; 1472 AMDGPUOperand::Ptr defaultBankMask() const; 1473 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1474 AMDGPUOperand::Ptr defaultFI() const; 1475 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1476 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1477 1478 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1479 AMDGPUOperand::ImmTy Type); 1480 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1481 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1482 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1483 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1484 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1485 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1486 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1487 uint64_t BasicInstType, 1488 bool SkipDstVcc = false, 1489 bool SkipSrcVcc = false); 1490 1491 AMDGPUOperand::Ptr defaultBLGP() const; 1492 AMDGPUOperand::Ptr defaultCBSZ() const; 1493 AMDGPUOperand::Ptr defaultABID() const; 1494 1495 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1496 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1497 }; 1498 1499 struct OptionalOperand { 1500 const char *Name; 1501 AMDGPUOperand::ImmTy Type; 1502 bool IsBit; 1503 bool (*ConvertResult)(int64_t&); 1504 }; 1505 1506 } // end anonymous namespace 1507 1508 // May be called with integer type with equivalent bitwidth. 1509 static const fltSemantics *getFltSemantics(unsigned Size) { 1510 switch (Size) { 1511 case 4: 1512 return &APFloat::IEEEsingle(); 1513 case 8: 1514 return &APFloat::IEEEdouble(); 1515 case 2: 1516 return &APFloat::IEEEhalf(); 1517 default: 1518 llvm_unreachable("unsupported fp type"); 1519 } 1520 } 1521 1522 static const fltSemantics *getFltSemantics(MVT VT) { 1523 return getFltSemantics(VT.getSizeInBits() / 8); 1524 } 1525 1526 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1527 switch (OperandType) { 1528 case AMDGPU::OPERAND_REG_IMM_INT32: 1529 case AMDGPU::OPERAND_REG_IMM_FP32: 1530 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1531 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1532 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1533 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1534 return &APFloat::IEEEsingle(); 1535 case AMDGPU::OPERAND_REG_IMM_INT64: 1536 case AMDGPU::OPERAND_REG_IMM_FP64: 1537 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1538 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1539 return &APFloat::IEEEdouble(); 1540 case AMDGPU::OPERAND_REG_IMM_INT16: 1541 case AMDGPU::OPERAND_REG_IMM_FP16: 1542 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1543 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1544 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1545 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1546 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1547 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1548 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1549 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1550 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1551 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1552 return &APFloat::IEEEhalf(); 1553 default: 1554 llvm_unreachable("unsupported fp type"); 1555 } 1556 } 1557 1558 //===----------------------------------------------------------------------===// 1559 // Operand 1560 //===----------------------------------------------------------------------===// 1561 1562 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1563 bool Lost; 1564 1565 // Convert literal to single precision 1566 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1567 APFloat::rmNearestTiesToEven, 1568 &Lost); 1569 // We allow precision lost but not overflow or underflow 1570 if (Status != APFloat::opOK && 1571 Lost && 1572 ((Status & APFloat::opOverflow) != 0 || 1573 (Status & APFloat::opUnderflow) != 0)) { 1574 return false; 1575 } 1576 1577 return true; 1578 } 1579 1580 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1581 return isUIntN(Size, Val) || isIntN(Size, Val); 1582 } 1583 1584 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1585 if (VT.getScalarType() == MVT::i16) { 1586 // FP immediate values are broken. 1587 return isInlinableIntLiteral(Val); 1588 } 1589 1590 // f16/v2f16 operands work correctly for all values. 1591 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1592 } 1593 1594 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1595 1596 // This is a hack to enable named inline values like 1597 // shared_base with both 32-bit and 64-bit operands. 1598 // Note that these values are defined as 1599 // 32-bit operands only. 1600 if (isInlineValue()) { 1601 return true; 1602 } 1603 1604 if (!isImmTy(ImmTyNone)) { 1605 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1606 return false; 1607 } 1608 // TODO: We should avoid using host float here. It would be better to 1609 // check the float bit values which is what a few other places do. 1610 // We've had bot failures before due to weird NaN support on mips hosts. 1611 1612 APInt Literal(64, Imm.Val); 1613 1614 if (Imm.IsFPImm) { // We got fp literal token 1615 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1616 return AMDGPU::isInlinableLiteral64(Imm.Val, 1617 AsmParser->hasInv2PiInlineImm()); 1618 } 1619 1620 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1621 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1622 return false; 1623 1624 if (type.getScalarSizeInBits() == 16) { 1625 return isInlineableLiteralOp16( 1626 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1627 type, AsmParser->hasInv2PiInlineImm()); 1628 } 1629 1630 // Check if single precision literal is inlinable 1631 return AMDGPU::isInlinableLiteral32( 1632 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1633 AsmParser->hasInv2PiInlineImm()); 1634 } 1635 1636 // We got int literal token. 1637 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1638 return AMDGPU::isInlinableLiteral64(Imm.Val, 1639 AsmParser->hasInv2PiInlineImm()); 1640 } 1641 1642 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1643 return false; 1644 } 1645 1646 if (type.getScalarSizeInBits() == 16) { 1647 return isInlineableLiteralOp16( 1648 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1649 type, AsmParser->hasInv2PiInlineImm()); 1650 } 1651 1652 return AMDGPU::isInlinableLiteral32( 1653 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1654 AsmParser->hasInv2PiInlineImm()); 1655 } 1656 1657 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1658 // Check that this immediate can be added as literal 1659 if (!isImmTy(ImmTyNone)) { 1660 return false; 1661 } 1662 1663 if (!Imm.IsFPImm) { 1664 // We got int literal token. 1665 1666 if (type == MVT::f64 && hasFPModifiers()) { 1667 // Cannot apply fp modifiers to int literals preserving the same semantics 1668 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1669 // disable these cases. 1670 return false; 1671 } 1672 1673 unsigned Size = type.getSizeInBits(); 1674 if (Size == 64) 1675 Size = 32; 1676 1677 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1678 // types. 1679 return isSafeTruncation(Imm.Val, Size); 1680 } 1681 1682 // We got fp literal token 1683 if (type == MVT::f64) { // Expected 64-bit fp operand 1684 // We would set low 64-bits of literal to zeroes but we accept this literals 1685 return true; 1686 } 1687 1688 if (type == MVT::i64) { // Expected 64-bit int operand 1689 // We don't allow fp literals in 64-bit integer instructions. It is 1690 // unclear how we should encode them. 1691 return false; 1692 } 1693 1694 // We allow fp literals with f16x2 operands assuming that the specified 1695 // literal goes into the lower half and the upper half is zero. We also 1696 // require that the literal may be losslesly converted to f16. 1697 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1698 (type == MVT::v2i16)? MVT::i16 : type; 1699 1700 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1701 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1702 } 1703 1704 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1705 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1706 } 1707 1708 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1709 if (AsmParser->isVI()) 1710 return isVReg32(); 1711 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1712 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1713 else 1714 return false; 1715 } 1716 1717 bool AMDGPUOperand::isSDWAFP16Operand() const { 1718 return isSDWAOperand(MVT::f16); 1719 } 1720 1721 bool AMDGPUOperand::isSDWAFP32Operand() const { 1722 return isSDWAOperand(MVT::f32); 1723 } 1724 1725 bool AMDGPUOperand::isSDWAInt16Operand() const { 1726 return isSDWAOperand(MVT::i16); 1727 } 1728 1729 bool AMDGPUOperand::isSDWAInt32Operand() const { 1730 return isSDWAOperand(MVT::i32); 1731 } 1732 1733 bool AMDGPUOperand::isBoolReg() const { 1734 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1735 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1736 } 1737 1738 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1739 { 1740 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1741 assert(Size == 2 || Size == 4 || Size == 8); 1742 1743 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1744 1745 if (Imm.Mods.Abs) { 1746 Val &= ~FpSignMask; 1747 } 1748 if (Imm.Mods.Neg) { 1749 Val ^= FpSignMask; 1750 } 1751 1752 return Val; 1753 } 1754 1755 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1756 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1757 Inst.getNumOperands())) { 1758 addLiteralImmOperand(Inst, Imm.Val, 1759 ApplyModifiers & 1760 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1761 } else { 1762 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1763 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1764 } 1765 } 1766 1767 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1768 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1769 auto OpNum = Inst.getNumOperands(); 1770 // Check that this operand accepts literals 1771 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1772 1773 if (ApplyModifiers) { 1774 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1775 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1776 Val = applyInputFPModifiers(Val, Size); 1777 } 1778 1779 APInt Literal(64, Val); 1780 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1781 1782 if (Imm.IsFPImm) { // We got fp literal token 1783 switch (OpTy) { 1784 case AMDGPU::OPERAND_REG_IMM_INT64: 1785 case AMDGPU::OPERAND_REG_IMM_FP64: 1786 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1787 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1788 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1789 AsmParser->hasInv2PiInlineImm())) { 1790 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1791 return; 1792 } 1793 1794 // Non-inlineable 1795 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1796 // For fp operands we check if low 32 bits are zeros 1797 if (Literal.getLoBits(32) != 0) { 1798 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1799 "Can't encode literal as exact 64-bit floating-point operand. " 1800 "Low 32-bits will be set to zero"); 1801 } 1802 1803 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1804 return; 1805 } 1806 1807 // We don't allow fp literals in 64-bit integer instructions. It is 1808 // unclear how we should encode them. This case should be checked earlier 1809 // in predicate methods (isLiteralImm()) 1810 llvm_unreachable("fp literal in 64-bit integer instruction."); 1811 1812 case AMDGPU::OPERAND_REG_IMM_INT32: 1813 case AMDGPU::OPERAND_REG_IMM_FP32: 1814 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1815 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1816 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1817 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1818 case AMDGPU::OPERAND_REG_IMM_INT16: 1819 case AMDGPU::OPERAND_REG_IMM_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1822 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1823 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1824 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1828 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1829 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1830 bool lost; 1831 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1832 // Convert literal to single precision 1833 FPLiteral.convert(*getOpFltSemantics(OpTy), 1834 APFloat::rmNearestTiesToEven, &lost); 1835 // We allow precision lost but not overflow or underflow. This should be 1836 // checked earlier in isLiteralImm() 1837 1838 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1839 Inst.addOperand(MCOperand::createImm(ImmVal)); 1840 return; 1841 } 1842 default: 1843 llvm_unreachable("invalid operand size"); 1844 } 1845 1846 return; 1847 } 1848 1849 // We got int literal token. 1850 // Only sign extend inline immediates. 1851 switch (OpTy) { 1852 case AMDGPU::OPERAND_REG_IMM_INT32: 1853 case AMDGPU::OPERAND_REG_IMM_FP32: 1854 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1855 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1856 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1857 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1858 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1859 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1860 if (isSafeTruncation(Val, 32) && 1861 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1862 AsmParser->hasInv2PiInlineImm())) { 1863 Inst.addOperand(MCOperand::createImm(Val)); 1864 return; 1865 } 1866 1867 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1868 return; 1869 1870 case AMDGPU::OPERAND_REG_IMM_INT64: 1871 case AMDGPU::OPERAND_REG_IMM_FP64: 1872 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1873 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1874 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1875 Inst.addOperand(MCOperand::createImm(Val)); 1876 return; 1877 } 1878 1879 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1880 return; 1881 1882 case AMDGPU::OPERAND_REG_IMM_INT16: 1883 case AMDGPU::OPERAND_REG_IMM_FP16: 1884 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1885 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1886 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1887 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1888 if (isSafeTruncation(Val, 16) && 1889 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1890 AsmParser->hasInv2PiInlineImm())) { 1891 Inst.addOperand(MCOperand::createImm(Val)); 1892 return; 1893 } 1894 1895 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1896 return; 1897 1898 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1899 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1900 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1901 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1902 assert(isSafeTruncation(Val, 16)); 1903 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1904 AsmParser->hasInv2PiInlineImm())); 1905 1906 Inst.addOperand(MCOperand::createImm(Val)); 1907 return; 1908 } 1909 default: 1910 llvm_unreachable("invalid operand size"); 1911 } 1912 } 1913 1914 template <unsigned Bitwidth> 1915 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1916 APInt Literal(64, Imm.Val); 1917 1918 if (!Imm.IsFPImm) { 1919 // We got int literal token. 1920 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1921 return; 1922 } 1923 1924 bool Lost; 1925 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1926 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1927 APFloat::rmNearestTiesToEven, &Lost); 1928 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1929 } 1930 1931 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1932 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1933 } 1934 1935 static bool isInlineValue(unsigned Reg) { 1936 switch (Reg) { 1937 case AMDGPU::SRC_SHARED_BASE: 1938 case AMDGPU::SRC_SHARED_LIMIT: 1939 case AMDGPU::SRC_PRIVATE_BASE: 1940 case AMDGPU::SRC_PRIVATE_LIMIT: 1941 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1942 return true; 1943 case AMDGPU::SRC_VCCZ: 1944 case AMDGPU::SRC_EXECZ: 1945 case AMDGPU::SRC_SCC: 1946 return true; 1947 case AMDGPU::SGPR_NULL: 1948 return true; 1949 default: 1950 return false; 1951 } 1952 } 1953 1954 bool AMDGPUOperand::isInlineValue() const { 1955 return isRegKind() && ::isInlineValue(getReg()); 1956 } 1957 1958 //===----------------------------------------------------------------------===// 1959 // AsmParser 1960 //===----------------------------------------------------------------------===// 1961 1962 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1963 if (Is == IS_VGPR) { 1964 switch (RegWidth) { 1965 default: return -1; 1966 case 1: return AMDGPU::VGPR_32RegClassID; 1967 case 2: return AMDGPU::VReg_64RegClassID; 1968 case 3: return AMDGPU::VReg_96RegClassID; 1969 case 4: return AMDGPU::VReg_128RegClassID; 1970 case 5: return AMDGPU::VReg_160RegClassID; 1971 case 6: return AMDGPU::VReg_192RegClassID; 1972 case 8: return AMDGPU::VReg_256RegClassID; 1973 case 16: return AMDGPU::VReg_512RegClassID; 1974 case 32: return AMDGPU::VReg_1024RegClassID; 1975 } 1976 } else if (Is == IS_TTMP) { 1977 switch (RegWidth) { 1978 default: return -1; 1979 case 1: return AMDGPU::TTMP_32RegClassID; 1980 case 2: return AMDGPU::TTMP_64RegClassID; 1981 case 4: return AMDGPU::TTMP_128RegClassID; 1982 case 8: return AMDGPU::TTMP_256RegClassID; 1983 case 16: return AMDGPU::TTMP_512RegClassID; 1984 } 1985 } else if (Is == IS_SGPR) { 1986 switch (RegWidth) { 1987 default: return -1; 1988 case 1: return AMDGPU::SGPR_32RegClassID; 1989 case 2: return AMDGPU::SGPR_64RegClassID; 1990 case 3: return AMDGPU::SGPR_96RegClassID; 1991 case 4: return AMDGPU::SGPR_128RegClassID; 1992 case 5: return AMDGPU::SGPR_160RegClassID; 1993 case 6: return AMDGPU::SGPR_192RegClassID; 1994 case 8: return AMDGPU::SGPR_256RegClassID; 1995 case 16: return AMDGPU::SGPR_512RegClassID; 1996 } 1997 } else if (Is == IS_AGPR) { 1998 switch (RegWidth) { 1999 default: return -1; 2000 case 1: return AMDGPU::AGPR_32RegClassID; 2001 case 2: return AMDGPU::AReg_64RegClassID; 2002 case 3: return AMDGPU::AReg_96RegClassID; 2003 case 4: return AMDGPU::AReg_128RegClassID; 2004 case 5: return AMDGPU::AReg_160RegClassID; 2005 case 6: return AMDGPU::AReg_192RegClassID; 2006 case 8: return AMDGPU::AReg_256RegClassID; 2007 case 16: return AMDGPU::AReg_512RegClassID; 2008 case 32: return AMDGPU::AReg_1024RegClassID; 2009 } 2010 } 2011 return -1; 2012 } 2013 2014 static unsigned getSpecialRegForName(StringRef RegName) { 2015 return StringSwitch<unsigned>(RegName) 2016 .Case("exec", AMDGPU::EXEC) 2017 .Case("vcc", AMDGPU::VCC) 2018 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2019 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2020 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2021 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2022 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2023 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2024 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2025 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2026 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2027 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2028 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2029 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2030 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2031 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2032 .Case("m0", AMDGPU::M0) 2033 .Case("vccz", AMDGPU::SRC_VCCZ) 2034 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2035 .Case("execz", AMDGPU::SRC_EXECZ) 2036 .Case("src_execz", AMDGPU::SRC_EXECZ) 2037 .Case("scc", AMDGPU::SRC_SCC) 2038 .Case("src_scc", AMDGPU::SRC_SCC) 2039 .Case("tba", AMDGPU::TBA) 2040 .Case("tma", AMDGPU::TMA) 2041 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2042 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2043 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2044 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2045 .Case("vcc_lo", AMDGPU::VCC_LO) 2046 .Case("vcc_hi", AMDGPU::VCC_HI) 2047 .Case("exec_lo", AMDGPU::EXEC_LO) 2048 .Case("exec_hi", AMDGPU::EXEC_HI) 2049 .Case("tma_lo", AMDGPU::TMA_LO) 2050 .Case("tma_hi", AMDGPU::TMA_HI) 2051 .Case("tba_lo", AMDGPU::TBA_LO) 2052 .Case("tba_hi", AMDGPU::TBA_HI) 2053 .Case("pc", AMDGPU::PC_REG) 2054 .Case("null", AMDGPU::SGPR_NULL) 2055 .Default(AMDGPU::NoRegister); 2056 } 2057 2058 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2059 SMLoc &EndLoc, bool RestoreOnFailure) { 2060 auto R = parseRegister(); 2061 if (!R) return true; 2062 assert(R->isReg()); 2063 RegNo = R->getReg(); 2064 StartLoc = R->getStartLoc(); 2065 EndLoc = R->getEndLoc(); 2066 return false; 2067 } 2068 2069 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2070 SMLoc &EndLoc) { 2071 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2072 } 2073 2074 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2075 SMLoc &StartLoc, 2076 SMLoc &EndLoc) { 2077 bool Result = 2078 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2079 bool PendingErrors = getParser().hasPendingError(); 2080 getParser().clearPendingErrors(); 2081 if (PendingErrors) 2082 return MatchOperand_ParseFail; 2083 if (Result) 2084 return MatchOperand_NoMatch; 2085 return MatchOperand_Success; 2086 } 2087 2088 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2089 RegisterKind RegKind, unsigned Reg1, 2090 SMLoc Loc) { 2091 switch (RegKind) { 2092 case IS_SPECIAL: 2093 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2094 Reg = AMDGPU::EXEC; 2095 RegWidth = 2; 2096 return true; 2097 } 2098 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2099 Reg = AMDGPU::FLAT_SCR; 2100 RegWidth = 2; 2101 return true; 2102 } 2103 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2104 Reg = AMDGPU::XNACK_MASK; 2105 RegWidth = 2; 2106 return true; 2107 } 2108 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2109 Reg = AMDGPU::VCC; 2110 RegWidth = 2; 2111 return true; 2112 } 2113 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2114 Reg = AMDGPU::TBA; 2115 RegWidth = 2; 2116 return true; 2117 } 2118 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2119 Reg = AMDGPU::TMA; 2120 RegWidth = 2; 2121 return true; 2122 } 2123 Error(Loc, "register does not fit in the list"); 2124 return false; 2125 case IS_VGPR: 2126 case IS_SGPR: 2127 case IS_AGPR: 2128 case IS_TTMP: 2129 if (Reg1 != Reg + RegWidth) { 2130 Error(Loc, "registers in a list must have consecutive indices"); 2131 return false; 2132 } 2133 RegWidth++; 2134 return true; 2135 default: 2136 llvm_unreachable("unexpected register kind"); 2137 } 2138 } 2139 2140 struct RegInfo { 2141 StringLiteral Name; 2142 RegisterKind Kind; 2143 }; 2144 2145 static constexpr RegInfo RegularRegisters[] = { 2146 {{"v"}, IS_VGPR}, 2147 {{"s"}, IS_SGPR}, 2148 {{"ttmp"}, IS_TTMP}, 2149 {{"acc"}, IS_AGPR}, 2150 {{"a"}, IS_AGPR}, 2151 }; 2152 2153 static bool isRegularReg(RegisterKind Kind) { 2154 return Kind == IS_VGPR || 2155 Kind == IS_SGPR || 2156 Kind == IS_TTMP || 2157 Kind == IS_AGPR; 2158 } 2159 2160 static const RegInfo* getRegularRegInfo(StringRef Str) { 2161 for (const RegInfo &Reg : RegularRegisters) 2162 if (Str.startswith(Reg.Name)) 2163 return &Reg; 2164 return nullptr; 2165 } 2166 2167 static bool getRegNum(StringRef Str, unsigned& Num) { 2168 return !Str.getAsInteger(10, Num); 2169 } 2170 2171 bool 2172 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2173 const AsmToken &NextToken) const { 2174 2175 // A list of consecutive registers: [s0,s1,s2,s3] 2176 if (Token.is(AsmToken::LBrac)) 2177 return true; 2178 2179 if (!Token.is(AsmToken::Identifier)) 2180 return false; 2181 2182 // A single register like s0 or a range of registers like s[0:1] 2183 2184 StringRef Str = Token.getString(); 2185 const RegInfo *Reg = getRegularRegInfo(Str); 2186 if (Reg) { 2187 StringRef RegName = Reg->Name; 2188 StringRef RegSuffix = Str.substr(RegName.size()); 2189 if (!RegSuffix.empty()) { 2190 unsigned Num; 2191 // A single register with an index: rXX 2192 if (getRegNum(RegSuffix, Num)) 2193 return true; 2194 } else { 2195 // A range of registers: r[XX:YY]. 2196 if (NextToken.is(AsmToken::LBrac)) 2197 return true; 2198 } 2199 } 2200 2201 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2202 } 2203 2204 bool 2205 AMDGPUAsmParser::isRegister() 2206 { 2207 return isRegister(getToken(), peekToken()); 2208 } 2209 2210 unsigned 2211 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2212 unsigned RegNum, 2213 unsigned RegWidth, 2214 SMLoc Loc) { 2215 2216 assert(isRegularReg(RegKind)); 2217 2218 unsigned AlignSize = 1; 2219 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2220 // SGPR and TTMP registers must be aligned. 2221 // Max required alignment is 4 dwords. 2222 AlignSize = std::min(RegWidth, 4u); 2223 } 2224 2225 if (RegNum % AlignSize != 0) { 2226 Error(Loc, "invalid register alignment"); 2227 return AMDGPU::NoRegister; 2228 } 2229 2230 unsigned RegIdx = RegNum / AlignSize; 2231 int RCID = getRegClass(RegKind, RegWidth); 2232 if (RCID == -1) { 2233 Error(Loc, "invalid or unsupported register size"); 2234 return AMDGPU::NoRegister; 2235 } 2236 2237 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2238 const MCRegisterClass RC = TRI->getRegClass(RCID); 2239 if (RegIdx >= RC.getNumRegs()) { 2240 Error(Loc, "register index is out of range"); 2241 return AMDGPU::NoRegister; 2242 } 2243 2244 return RC.getRegister(RegIdx); 2245 } 2246 2247 bool 2248 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2249 int64_t RegLo, RegHi; 2250 if (!skipToken(AsmToken::LBrac, "missing register index")) 2251 return false; 2252 2253 SMLoc FirstIdxLoc = getLoc(); 2254 SMLoc SecondIdxLoc; 2255 2256 if (!parseExpr(RegLo)) 2257 return false; 2258 2259 if (trySkipToken(AsmToken::Colon)) { 2260 SecondIdxLoc = getLoc(); 2261 if (!parseExpr(RegHi)) 2262 return false; 2263 } else { 2264 RegHi = RegLo; 2265 } 2266 2267 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2268 return false; 2269 2270 if (!isUInt<32>(RegLo)) { 2271 Error(FirstIdxLoc, "invalid register index"); 2272 return false; 2273 } 2274 2275 if (!isUInt<32>(RegHi)) { 2276 Error(SecondIdxLoc, "invalid register index"); 2277 return false; 2278 } 2279 2280 if (RegLo > RegHi) { 2281 Error(FirstIdxLoc, "first register index should not exceed second index"); 2282 return false; 2283 } 2284 2285 Num = static_cast<unsigned>(RegLo); 2286 Width = (RegHi - RegLo) + 1; 2287 return true; 2288 } 2289 2290 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2291 unsigned &RegNum, unsigned &RegWidth, 2292 SmallVectorImpl<AsmToken> &Tokens) { 2293 assert(isToken(AsmToken::Identifier)); 2294 unsigned Reg = getSpecialRegForName(getTokenStr()); 2295 if (Reg) { 2296 RegNum = 0; 2297 RegWidth = 1; 2298 RegKind = IS_SPECIAL; 2299 Tokens.push_back(getToken()); 2300 lex(); // skip register name 2301 } 2302 return Reg; 2303 } 2304 2305 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2306 unsigned &RegNum, unsigned &RegWidth, 2307 SmallVectorImpl<AsmToken> &Tokens) { 2308 assert(isToken(AsmToken::Identifier)); 2309 StringRef RegName = getTokenStr(); 2310 auto Loc = getLoc(); 2311 2312 const RegInfo *RI = getRegularRegInfo(RegName); 2313 if (!RI) { 2314 Error(Loc, "invalid register name"); 2315 return AMDGPU::NoRegister; 2316 } 2317 2318 Tokens.push_back(getToken()); 2319 lex(); // skip register name 2320 2321 RegKind = RI->Kind; 2322 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2323 if (!RegSuffix.empty()) { 2324 // Single 32-bit register: vXX. 2325 if (!getRegNum(RegSuffix, RegNum)) { 2326 Error(Loc, "invalid register index"); 2327 return AMDGPU::NoRegister; 2328 } 2329 RegWidth = 1; 2330 } else { 2331 // Range of registers: v[XX:YY]. ":YY" is optional. 2332 if (!ParseRegRange(RegNum, RegWidth)) 2333 return AMDGPU::NoRegister; 2334 } 2335 2336 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2337 } 2338 2339 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2340 unsigned &RegWidth, 2341 SmallVectorImpl<AsmToken> &Tokens) { 2342 unsigned Reg = AMDGPU::NoRegister; 2343 auto ListLoc = getLoc(); 2344 2345 if (!skipToken(AsmToken::LBrac, 2346 "expected a register or a list of registers")) { 2347 return AMDGPU::NoRegister; 2348 } 2349 2350 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2351 2352 auto Loc = getLoc(); 2353 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2354 return AMDGPU::NoRegister; 2355 if (RegWidth != 1) { 2356 Error(Loc, "expected a single 32-bit register"); 2357 return AMDGPU::NoRegister; 2358 } 2359 2360 for (; trySkipToken(AsmToken::Comma); ) { 2361 RegisterKind NextRegKind; 2362 unsigned NextReg, NextRegNum, NextRegWidth; 2363 Loc = getLoc(); 2364 2365 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2366 NextRegNum, NextRegWidth, 2367 Tokens)) { 2368 return AMDGPU::NoRegister; 2369 } 2370 if (NextRegWidth != 1) { 2371 Error(Loc, "expected a single 32-bit register"); 2372 return AMDGPU::NoRegister; 2373 } 2374 if (NextRegKind != RegKind) { 2375 Error(Loc, "registers in a list must be of the same kind"); 2376 return AMDGPU::NoRegister; 2377 } 2378 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2379 return AMDGPU::NoRegister; 2380 } 2381 2382 if (!skipToken(AsmToken::RBrac, 2383 "expected a comma or a closing square bracket")) { 2384 return AMDGPU::NoRegister; 2385 } 2386 2387 if (isRegularReg(RegKind)) 2388 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2389 2390 return Reg; 2391 } 2392 2393 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2394 unsigned &RegNum, unsigned &RegWidth, 2395 SmallVectorImpl<AsmToken> &Tokens) { 2396 auto Loc = getLoc(); 2397 Reg = AMDGPU::NoRegister; 2398 2399 if (isToken(AsmToken::Identifier)) { 2400 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2401 if (Reg == AMDGPU::NoRegister) 2402 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2403 } else { 2404 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2405 } 2406 2407 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2408 if (Reg == AMDGPU::NoRegister) { 2409 assert(Parser.hasPendingError()); 2410 return false; 2411 } 2412 2413 if (!subtargetHasRegister(*TRI, Reg)) { 2414 if (Reg == AMDGPU::SGPR_NULL) { 2415 Error(Loc, "'null' operand is not supported on this GPU"); 2416 } else { 2417 Error(Loc, "register not available on this GPU"); 2418 } 2419 return false; 2420 } 2421 2422 return true; 2423 } 2424 2425 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2426 unsigned &RegNum, unsigned &RegWidth, 2427 bool RestoreOnFailure /*=false*/) { 2428 Reg = AMDGPU::NoRegister; 2429 2430 SmallVector<AsmToken, 1> Tokens; 2431 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2432 if (RestoreOnFailure) { 2433 while (!Tokens.empty()) { 2434 getLexer().UnLex(Tokens.pop_back_val()); 2435 } 2436 } 2437 return true; 2438 } 2439 return false; 2440 } 2441 2442 Optional<StringRef> 2443 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2444 switch (RegKind) { 2445 case IS_VGPR: 2446 return StringRef(".amdgcn.next_free_vgpr"); 2447 case IS_SGPR: 2448 return StringRef(".amdgcn.next_free_sgpr"); 2449 default: 2450 return None; 2451 } 2452 } 2453 2454 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2455 auto SymbolName = getGprCountSymbolName(RegKind); 2456 assert(SymbolName && "initializing invalid register kind"); 2457 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2458 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2459 } 2460 2461 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2462 unsigned DwordRegIndex, 2463 unsigned RegWidth) { 2464 // Symbols are only defined for GCN targets 2465 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2466 return true; 2467 2468 auto SymbolName = getGprCountSymbolName(RegKind); 2469 if (!SymbolName) 2470 return true; 2471 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2472 2473 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2474 int64_t OldCount; 2475 2476 if (!Sym->isVariable()) 2477 return !Error(getParser().getTok().getLoc(), 2478 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2479 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2480 return !Error( 2481 getParser().getTok().getLoc(), 2482 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2483 2484 if (OldCount <= NewMax) 2485 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2486 2487 return true; 2488 } 2489 2490 std::unique_ptr<AMDGPUOperand> 2491 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2492 const auto &Tok = Parser.getTok(); 2493 SMLoc StartLoc = Tok.getLoc(); 2494 SMLoc EndLoc = Tok.getEndLoc(); 2495 RegisterKind RegKind; 2496 unsigned Reg, RegNum, RegWidth; 2497 2498 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2499 return nullptr; 2500 } 2501 if (isHsaAbiVersion3(&getSTI())) { 2502 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2503 return nullptr; 2504 } else 2505 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2506 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2507 } 2508 2509 OperandMatchResultTy 2510 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2511 // TODO: add syntactic sugar for 1/(2*PI) 2512 2513 assert(!isRegister()); 2514 assert(!isModifier()); 2515 2516 const auto& Tok = getToken(); 2517 const auto& NextTok = peekToken(); 2518 bool IsReal = Tok.is(AsmToken::Real); 2519 SMLoc S = getLoc(); 2520 bool Negate = false; 2521 2522 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2523 lex(); 2524 IsReal = true; 2525 Negate = true; 2526 } 2527 2528 if (IsReal) { 2529 // Floating-point expressions are not supported. 2530 // Can only allow floating-point literals with an 2531 // optional sign. 2532 2533 StringRef Num = getTokenStr(); 2534 lex(); 2535 2536 APFloat RealVal(APFloat::IEEEdouble()); 2537 auto roundMode = APFloat::rmNearestTiesToEven; 2538 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2539 return MatchOperand_ParseFail; 2540 } 2541 if (Negate) 2542 RealVal.changeSign(); 2543 2544 Operands.push_back( 2545 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2546 AMDGPUOperand::ImmTyNone, true)); 2547 2548 return MatchOperand_Success; 2549 2550 } else { 2551 int64_t IntVal; 2552 const MCExpr *Expr; 2553 SMLoc S = getLoc(); 2554 2555 if (HasSP3AbsModifier) { 2556 // This is a workaround for handling expressions 2557 // as arguments of SP3 'abs' modifier, for example: 2558 // |1.0| 2559 // |-1| 2560 // |1+x| 2561 // This syntax is not compatible with syntax of standard 2562 // MC expressions (due to the trailing '|'). 2563 SMLoc EndLoc; 2564 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2565 return MatchOperand_ParseFail; 2566 } else { 2567 if (Parser.parseExpression(Expr)) 2568 return MatchOperand_ParseFail; 2569 } 2570 2571 if (Expr->evaluateAsAbsolute(IntVal)) { 2572 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2573 } else { 2574 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2575 } 2576 2577 return MatchOperand_Success; 2578 } 2579 2580 return MatchOperand_NoMatch; 2581 } 2582 2583 OperandMatchResultTy 2584 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2585 if (!isRegister()) 2586 return MatchOperand_NoMatch; 2587 2588 if (auto R = parseRegister()) { 2589 assert(R->isReg()); 2590 Operands.push_back(std::move(R)); 2591 return MatchOperand_Success; 2592 } 2593 return MatchOperand_ParseFail; 2594 } 2595 2596 OperandMatchResultTy 2597 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2598 auto res = parseReg(Operands); 2599 if (res != MatchOperand_NoMatch) { 2600 return res; 2601 } else if (isModifier()) { 2602 return MatchOperand_NoMatch; 2603 } else { 2604 return parseImm(Operands, HasSP3AbsMod); 2605 } 2606 } 2607 2608 bool 2609 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2610 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2611 const auto &str = Token.getString(); 2612 return str == "abs" || str == "neg" || str == "sext"; 2613 } 2614 return false; 2615 } 2616 2617 bool 2618 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2619 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2620 } 2621 2622 bool 2623 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2624 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2625 } 2626 2627 bool 2628 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2629 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2630 } 2631 2632 // Check if this is an operand modifier or an opcode modifier 2633 // which may look like an expression but it is not. We should 2634 // avoid parsing these modifiers as expressions. Currently 2635 // recognized sequences are: 2636 // |...| 2637 // abs(...) 2638 // neg(...) 2639 // sext(...) 2640 // -reg 2641 // -|...| 2642 // -abs(...) 2643 // name:... 2644 // Note that simple opcode modifiers like 'gds' may be parsed as 2645 // expressions; this is a special case. See getExpressionAsToken. 2646 // 2647 bool 2648 AMDGPUAsmParser::isModifier() { 2649 2650 AsmToken Tok = getToken(); 2651 AsmToken NextToken[2]; 2652 peekTokens(NextToken); 2653 2654 return isOperandModifier(Tok, NextToken[0]) || 2655 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2656 isOpcodeModifierWithVal(Tok, NextToken[0]); 2657 } 2658 2659 // Check if the current token is an SP3 'neg' modifier. 2660 // Currently this modifier is allowed in the following context: 2661 // 2662 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2663 // 2. Before an 'abs' modifier: -abs(...) 2664 // 3. Before an SP3 'abs' modifier: -|...| 2665 // 2666 // In all other cases "-" is handled as a part 2667 // of an expression that follows the sign. 2668 // 2669 // Note: When "-" is followed by an integer literal, 2670 // this is interpreted as integer negation rather 2671 // than a floating-point NEG modifier applied to N. 2672 // Beside being contr-intuitive, such use of floating-point 2673 // NEG modifier would have resulted in different meaning 2674 // of integer literals used with VOP1/2/C and VOP3, 2675 // for example: 2676 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2677 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2678 // Negative fp literals with preceding "-" are 2679 // handled likewise for unifomtity 2680 // 2681 bool 2682 AMDGPUAsmParser::parseSP3NegModifier() { 2683 2684 AsmToken NextToken[2]; 2685 peekTokens(NextToken); 2686 2687 if (isToken(AsmToken::Minus) && 2688 (isRegister(NextToken[0], NextToken[1]) || 2689 NextToken[0].is(AsmToken::Pipe) || 2690 isId(NextToken[0], "abs"))) { 2691 lex(); 2692 return true; 2693 } 2694 2695 return false; 2696 } 2697 2698 OperandMatchResultTy 2699 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2700 bool AllowImm) { 2701 bool Neg, SP3Neg; 2702 bool Abs, SP3Abs; 2703 SMLoc Loc; 2704 2705 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2706 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2707 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2708 return MatchOperand_ParseFail; 2709 } 2710 2711 SP3Neg = parseSP3NegModifier(); 2712 2713 Loc = getLoc(); 2714 Neg = trySkipId("neg"); 2715 if (Neg && SP3Neg) { 2716 Error(Loc, "expected register or immediate"); 2717 return MatchOperand_ParseFail; 2718 } 2719 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2720 return MatchOperand_ParseFail; 2721 2722 Abs = trySkipId("abs"); 2723 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2724 return MatchOperand_ParseFail; 2725 2726 Loc = getLoc(); 2727 SP3Abs = trySkipToken(AsmToken::Pipe); 2728 if (Abs && SP3Abs) { 2729 Error(Loc, "expected register or immediate"); 2730 return MatchOperand_ParseFail; 2731 } 2732 2733 OperandMatchResultTy Res; 2734 if (AllowImm) { 2735 Res = parseRegOrImm(Operands, SP3Abs); 2736 } else { 2737 Res = parseReg(Operands); 2738 } 2739 if (Res != MatchOperand_Success) { 2740 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2741 } 2742 2743 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2744 return MatchOperand_ParseFail; 2745 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2746 return MatchOperand_ParseFail; 2747 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2748 return MatchOperand_ParseFail; 2749 2750 AMDGPUOperand::Modifiers Mods; 2751 Mods.Abs = Abs || SP3Abs; 2752 Mods.Neg = Neg || SP3Neg; 2753 2754 if (Mods.hasFPModifiers()) { 2755 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2756 if (Op.isExpr()) { 2757 Error(Op.getStartLoc(), "expected an absolute expression"); 2758 return MatchOperand_ParseFail; 2759 } 2760 Op.setModifiers(Mods); 2761 } 2762 return MatchOperand_Success; 2763 } 2764 2765 OperandMatchResultTy 2766 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2767 bool AllowImm) { 2768 bool Sext = trySkipId("sext"); 2769 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2770 return MatchOperand_ParseFail; 2771 2772 OperandMatchResultTy Res; 2773 if (AllowImm) { 2774 Res = parseRegOrImm(Operands); 2775 } else { 2776 Res = parseReg(Operands); 2777 } 2778 if (Res != MatchOperand_Success) { 2779 return Sext? MatchOperand_ParseFail : Res; 2780 } 2781 2782 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2783 return MatchOperand_ParseFail; 2784 2785 AMDGPUOperand::Modifiers Mods; 2786 Mods.Sext = Sext; 2787 2788 if (Mods.hasIntModifiers()) { 2789 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2790 if (Op.isExpr()) { 2791 Error(Op.getStartLoc(), "expected an absolute expression"); 2792 return MatchOperand_ParseFail; 2793 } 2794 Op.setModifiers(Mods); 2795 } 2796 2797 return MatchOperand_Success; 2798 } 2799 2800 OperandMatchResultTy 2801 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2802 return parseRegOrImmWithFPInputMods(Operands, false); 2803 } 2804 2805 OperandMatchResultTy 2806 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2807 return parseRegOrImmWithIntInputMods(Operands, false); 2808 } 2809 2810 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2811 auto Loc = getLoc(); 2812 if (trySkipId("off")) { 2813 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2814 AMDGPUOperand::ImmTyOff, false)); 2815 return MatchOperand_Success; 2816 } 2817 2818 if (!isRegister()) 2819 return MatchOperand_NoMatch; 2820 2821 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2822 if (Reg) { 2823 Operands.push_back(std::move(Reg)); 2824 return MatchOperand_Success; 2825 } 2826 2827 return MatchOperand_ParseFail; 2828 2829 } 2830 2831 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2832 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2833 2834 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2835 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2836 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2837 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2838 return Match_InvalidOperand; 2839 2840 if ((TSFlags & SIInstrFlags::VOP3) && 2841 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2842 getForcedEncodingSize() != 64) 2843 return Match_PreferE32; 2844 2845 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2846 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2847 // v_mac_f32/16 allow only dst_sel == DWORD; 2848 auto OpNum = 2849 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2850 const auto &Op = Inst.getOperand(OpNum); 2851 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2852 return Match_InvalidOperand; 2853 } 2854 } 2855 2856 return Match_Success; 2857 } 2858 2859 static ArrayRef<unsigned> getAllVariants() { 2860 static const unsigned Variants[] = { 2861 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2862 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2863 }; 2864 2865 return makeArrayRef(Variants); 2866 } 2867 2868 // What asm variants we should check 2869 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2870 if (getForcedEncodingSize() == 32) { 2871 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2872 return makeArrayRef(Variants); 2873 } 2874 2875 if (isForcedVOP3()) { 2876 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2877 return makeArrayRef(Variants); 2878 } 2879 2880 if (isForcedSDWA()) { 2881 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2882 AMDGPUAsmVariants::SDWA9}; 2883 return makeArrayRef(Variants); 2884 } 2885 2886 if (isForcedDPP()) { 2887 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2888 return makeArrayRef(Variants); 2889 } 2890 2891 return getAllVariants(); 2892 } 2893 2894 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2895 if (getForcedEncodingSize() == 32) 2896 return "e32"; 2897 2898 if (isForcedVOP3()) 2899 return "e64"; 2900 2901 if (isForcedSDWA()) 2902 return "sdwa"; 2903 2904 if (isForcedDPP()) 2905 return "dpp"; 2906 2907 return ""; 2908 } 2909 2910 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2911 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2912 const unsigned Num = Desc.getNumImplicitUses(); 2913 for (unsigned i = 0; i < Num; ++i) { 2914 unsigned Reg = Desc.ImplicitUses[i]; 2915 switch (Reg) { 2916 case AMDGPU::FLAT_SCR: 2917 case AMDGPU::VCC: 2918 case AMDGPU::VCC_LO: 2919 case AMDGPU::VCC_HI: 2920 case AMDGPU::M0: 2921 return Reg; 2922 default: 2923 break; 2924 } 2925 } 2926 return AMDGPU::NoRegister; 2927 } 2928 2929 // NB: This code is correct only when used to check constant 2930 // bus limitations because GFX7 support no f16 inline constants. 2931 // Note that there are no cases when a GFX7 opcode violates 2932 // constant bus limitations due to the use of an f16 constant. 2933 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2934 unsigned OpIdx) const { 2935 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2936 2937 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2938 return false; 2939 } 2940 2941 const MCOperand &MO = Inst.getOperand(OpIdx); 2942 2943 int64_t Val = MO.getImm(); 2944 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2945 2946 switch (OpSize) { // expected operand size 2947 case 8: 2948 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2949 case 4: 2950 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2951 case 2: { 2952 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2953 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2954 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2955 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2956 return AMDGPU::isInlinableIntLiteral(Val); 2957 2958 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2959 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2960 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2961 return AMDGPU::isInlinableIntLiteralV216(Val); 2962 2963 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2964 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2965 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2966 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2967 2968 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2969 } 2970 default: 2971 llvm_unreachable("invalid operand size"); 2972 } 2973 } 2974 2975 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2976 if (!isGFX10()) 2977 return 1; 2978 2979 switch (Opcode) { 2980 // 64-bit shift instructions can use only one scalar value input 2981 case AMDGPU::V_LSHLREV_B64: 2982 case AMDGPU::V_LSHLREV_B64_gfx10: 2983 case AMDGPU::V_LSHL_B64: 2984 case AMDGPU::V_LSHRREV_B64: 2985 case AMDGPU::V_LSHRREV_B64_gfx10: 2986 case AMDGPU::V_LSHR_B64: 2987 case AMDGPU::V_ASHRREV_I64: 2988 case AMDGPU::V_ASHRREV_I64_gfx10: 2989 case AMDGPU::V_ASHR_I64: 2990 return 1; 2991 default: 2992 return 2; 2993 } 2994 } 2995 2996 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2997 const MCOperand &MO = Inst.getOperand(OpIdx); 2998 if (MO.isImm()) { 2999 return !isInlineConstant(Inst, OpIdx); 3000 } else if (MO.isReg()) { 3001 auto Reg = MO.getReg(); 3002 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3003 auto PReg = mc2PseudoReg(Reg); 3004 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3005 } else { 3006 return true; 3007 } 3008 } 3009 3010 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 3011 const unsigned Opcode = Inst.getOpcode(); 3012 const MCInstrDesc &Desc = MII.get(Opcode); 3013 unsigned ConstantBusUseCount = 0; 3014 unsigned NumLiterals = 0; 3015 unsigned LiteralSize; 3016 3017 if (Desc.TSFlags & 3018 (SIInstrFlags::VOPC | 3019 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3020 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3021 SIInstrFlags::SDWA)) { 3022 // Check special imm operands (used by madmk, etc) 3023 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3024 ++ConstantBusUseCount; 3025 } 3026 3027 SmallDenseSet<unsigned> SGPRsUsed; 3028 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3029 if (SGPRUsed != AMDGPU::NoRegister) { 3030 SGPRsUsed.insert(SGPRUsed); 3031 ++ConstantBusUseCount; 3032 } 3033 3034 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3035 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3036 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3037 3038 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3039 3040 for (int OpIdx : OpIndices) { 3041 if (OpIdx == -1) break; 3042 3043 const MCOperand &MO = Inst.getOperand(OpIdx); 3044 if (usesConstantBus(Inst, OpIdx)) { 3045 if (MO.isReg()) { 3046 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3047 // Pairs of registers with a partial intersections like these 3048 // s0, s[0:1] 3049 // flat_scratch_lo, flat_scratch 3050 // flat_scratch_lo, flat_scratch_hi 3051 // are theoretically valid but they are disabled anyway. 3052 // Note that this code mimics SIInstrInfo::verifyInstruction 3053 if (!SGPRsUsed.count(Reg)) { 3054 SGPRsUsed.insert(Reg); 3055 ++ConstantBusUseCount; 3056 } 3057 } else { // Expression or a literal 3058 3059 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3060 continue; // special operand like VINTERP attr_chan 3061 3062 // An instruction may use only one literal. 3063 // This has been validated on the previous step. 3064 // See validateVOP3Literal. 3065 // This literal may be used as more than one operand. 3066 // If all these operands are of the same size, 3067 // this literal counts as one scalar value. 3068 // Otherwise it counts as 2 scalar values. 3069 // See "GFX10 Shader Programming", section 3.6.2.3. 3070 3071 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3072 if (Size < 4) Size = 4; 3073 3074 if (NumLiterals == 0) { 3075 NumLiterals = 1; 3076 LiteralSize = Size; 3077 } else if (LiteralSize != Size) { 3078 NumLiterals = 2; 3079 } 3080 } 3081 } 3082 } 3083 } 3084 ConstantBusUseCount += NumLiterals; 3085 3086 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3087 } 3088 3089 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3090 const unsigned Opcode = Inst.getOpcode(); 3091 const MCInstrDesc &Desc = MII.get(Opcode); 3092 3093 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3094 if (DstIdx == -1 || 3095 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3096 return true; 3097 } 3098 3099 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3100 3101 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3102 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3103 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3104 3105 assert(DstIdx != -1); 3106 const MCOperand &Dst = Inst.getOperand(DstIdx); 3107 assert(Dst.isReg()); 3108 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3109 3110 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3111 3112 for (int SrcIdx : SrcIndices) { 3113 if (SrcIdx == -1) break; 3114 const MCOperand &Src = Inst.getOperand(SrcIdx); 3115 if (Src.isReg()) { 3116 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3117 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3118 return false; 3119 } 3120 } 3121 } 3122 3123 return true; 3124 } 3125 3126 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3127 3128 const unsigned Opc = Inst.getOpcode(); 3129 const MCInstrDesc &Desc = MII.get(Opc); 3130 3131 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3132 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3133 assert(ClampIdx != -1); 3134 return Inst.getOperand(ClampIdx).getImm() == 0; 3135 } 3136 3137 return true; 3138 } 3139 3140 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3141 3142 const unsigned Opc = Inst.getOpcode(); 3143 const MCInstrDesc &Desc = MII.get(Opc); 3144 3145 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3146 return true; 3147 3148 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3149 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3150 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3151 3152 assert(VDataIdx != -1); 3153 3154 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3155 return true; 3156 3157 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3158 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3159 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3160 if (DMask == 0) 3161 DMask = 1; 3162 3163 unsigned DataSize = 3164 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3165 if (hasPackedD16()) { 3166 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3167 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3168 DataSize = (DataSize + 1) / 2; 3169 } 3170 3171 return (VDataSize / 4) == DataSize + TFESize; 3172 } 3173 3174 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3175 const unsigned Opc = Inst.getOpcode(); 3176 const MCInstrDesc &Desc = MII.get(Opc); 3177 3178 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3179 return true; 3180 3181 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3182 3183 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3184 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3185 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3186 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3187 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3188 3189 assert(VAddr0Idx != -1); 3190 assert(SrsrcIdx != -1); 3191 assert(SrsrcIdx > VAddr0Idx); 3192 3193 if (DimIdx == -1) 3194 return true; // intersect_ray 3195 3196 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3197 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3198 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3199 unsigned VAddrSize = 3200 IsNSA ? SrsrcIdx - VAddr0Idx 3201 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3202 3203 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3204 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3205 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3206 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3207 if (!IsNSA) { 3208 if (AddrSize > 8) 3209 AddrSize = 16; 3210 else if (AddrSize > 4) 3211 AddrSize = 8; 3212 } 3213 3214 return VAddrSize == AddrSize; 3215 } 3216 3217 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3218 3219 const unsigned Opc = Inst.getOpcode(); 3220 const MCInstrDesc &Desc = MII.get(Opc); 3221 3222 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3223 return true; 3224 if (!Desc.mayLoad() || !Desc.mayStore()) 3225 return true; // Not atomic 3226 3227 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3228 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3229 3230 // This is an incomplete check because image_atomic_cmpswap 3231 // may only use 0x3 and 0xf while other atomic operations 3232 // may use 0x1 and 0x3. However these limitations are 3233 // verified when we check that dmask matches dst size. 3234 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3235 } 3236 3237 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3238 3239 const unsigned Opc = Inst.getOpcode(); 3240 const MCInstrDesc &Desc = MII.get(Opc); 3241 3242 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3243 return true; 3244 3245 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3246 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3247 3248 // GATHER4 instructions use dmask in a different fashion compared to 3249 // other MIMG instructions. The only useful DMASK values are 3250 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3251 // (red,red,red,red) etc.) The ISA document doesn't mention 3252 // this. 3253 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3254 } 3255 3256 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3257 { 3258 switch (Opcode) { 3259 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3260 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3261 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3262 return true; 3263 default: 3264 return false; 3265 } 3266 } 3267 3268 // movrels* opcodes should only allow VGPRS as src0. 3269 // This is specified in .td description for vop1/vop3, 3270 // but sdwa is handled differently. See isSDWAOperand. 3271 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3272 3273 const unsigned Opc = Inst.getOpcode(); 3274 const MCInstrDesc &Desc = MII.get(Opc); 3275 3276 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3277 return true; 3278 3279 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3280 assert(Src0Idx != -1); 3281 3282 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3283 if (!Src0.isReg()) 3284 return false; 3285 3286 auto Reg = Src0.getReg(); 3287 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3288 return !isSGPR(mc2PseudoReg(Reg), TRI); 3289 } 3290 3291 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3292 3293 const unsigned Opc = Inst.getOpcode(); 3294 3295 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3296 return true; 3297 3298 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3299 assert(Src0Idx != -1); 3300 3301 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3302 if (!Src0.isReg()) 3303 return true; 3304 3305 auto Reg = Src0.getReg(); 3306 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3307 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3308 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3309 return false; 3310 } 3311 3312 return true; 3313 } 3314 3315 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3316 switch (Inst.getOpcode()) { 3317 default: 3318 return true; 3319 case V_DIV_SCALE_F32_gfx6_gfx7: 3320 case V_DIV_SCALE_F32_vi: 3321 case V_DIV_SCALE_F32_gfx10: 3322 case V_DIV_SCALE_F64_gfx6_gfx7: 3323 case V_DIV_SCALE_F64_vi: 3324 case V_DIV_SCALE_F64_gfx10: 3325 break; 3326 } 3327 3328 // TODO: Check that src0 = src1 or src2. 3329 3330 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3331 AMDGPU::OpName::src2_modifiers, 3332 AMDGPU::OpName::src2_modifiers}) { 3333 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3334 .getImm() & 3335 SISrcMods::ABS) { 3336 Error(getLoc(), "ABS not allowed in VOP3B instructions"); 3337 return false; 3338 } 3339 } 3340 3341 return true; 3342 } 3343 3344 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3345 3346 const unsigned Opc = Inst.getOpcode(); 3347 const MCInstrDesc &Desc = MII.get(Opc); 3348 3349 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3350 return true; 3351 3352 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3353 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3354 if (isCI() || isSI()) 3355 return false; 3356 } 3357 3358 return true; 3359 } 3360 3361 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3362 const unsigned Opc = Inst.getOpcode(); 3363 const MCInstrDesc &Desc = MII.get(Opc); 3364 3365 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3366 return true; 3367 3368 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3369 if (DimIdx < 0) 3370 return true; 3371 3372 long Imm = Inst.getOperand(DimIdx).getImm(); 3373 if (Imm < 0 || Imm >= 8) 3374 return false; 3375 3376 return true; 3377 } 3378 3379 static bool IsRevOpcode(const unsigned Opcode) 3380 { 3381 switch (Opcode) { 3382 case AMDGPU::V_SUBREV_F32_e32: 3383 case AMDGPU::V_SUBREV_F32_e64: 3384 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3385 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3386 case AMDGPU::V_SUBREV_F32_e32_vi: 3387 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3388 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3389 case AMDGPU::V_SUBREV_F32_e64_vi: 3390 3391 case AMDGPU::V_SUBREV_CO_U32_e32: 3392 case AMDGPU::V_SUBREV_CO_U32_e64: 3393 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3394 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3395 3396 case AMDGPU::V_SUBBREV_U32_e32: 3397 case AMDGPU::V_SUBBREV_U32_e64: 3398 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3399 case AMDGPU::V_SUBBREV_U32_e32_vi: 3400 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3401 case AMDGPU::V_SUBBREV_U32_e64_vi: 3402 3403 case AMDGPU::V_SUBREV_U32_e32: 3404 case AMDGPU::V_SUBREV_U32_e64: 3405 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3406 case AMDGPU::V_SUBREV_U32_e32_vi: 3407 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3408 case AMDGPU::V_SUBREV_U32_e64_vi: 3409 3410 case AMDGPU::V_SUBREV_F16_e32: 3411 case AMDGPU::V_SUBREV_F16_e64: 3412 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3413 case AMDGPU::V_SUBREV_F16_e32_vi: 3414 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3415 case AMDGPU::V_SUBREV_F16_e64_vi: 3416 3417 case AMDGPU::V_SUBREV_U16_e32: 3418 case AMDGPU::V_SUBREV_U16_e64: 3419 case AMDGPU::V_SUBREV_U16_e32_vi: 3420 case AMDGPU::V_SUBREV_U16_e64_vi: 3421 3422 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3423 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3424 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3425 3426 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3427 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3428 3429 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3430 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3431 3432 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3433 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3434 3435 case AMDGPU::V_LSHRREV_B32_e32: 3436 case AMDGPU::V_LSHRREV_B32_e64: 3437 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3438 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3439 case AMDGPU::V_LSHRREV_B32_e32_vi: 3440 case AMDGPU::V_LSHRREV_B32_e64_vi: 3441 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3442 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3443 3444 case AMDGPU::V_ASHRREV_I32_e32: 3445 case AMDGPU::V_ASHRREV_I32_e64: 3446 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3447 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3448 case AMDGPU::V_ASHRREV_I32_e32_vi: 3449 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3450 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3451 case AMDGPU::V_ASHRREV_I32_e64_vi: 3452 3453 case AMDGPU::V_LSHLREV_B32_e32: 3454 case AMDGPU::V_LSHLREV_B32_e64: 3455 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3456 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3457 case AMDGPU::V_LSHLREV_B32_e32_vi: 3458 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3459 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3460 case AMDGPU::V_LSHLREV_B32_e64_vi: 3461 3462 case AMDGPU::V_LSHLREV_B16_e32: 3463 case AMDGPU::V_LSHLREV_B16_e64: 3464 case AMDGPU::V_LSHLREV_B16_e32_vi: 3465 case AMDGPU::V_LSHLREV_B16_e64_vi: 3466 case AMDGPU::V_LSHLREV_B16_gfx10: 3467 3468 case AMDGPU::V_LSHRREV_B16_e32: 3469 case AMDGPU::V_LSHRREV_B16_e64: 3470 case AMDGPU::V_LSHRREV_B16_e32_vi: 3471 case AMDGPU::V_LSHRREV_B16_e64_vi: 3472 case AMDGPU::V_LSHRREV_B16_gfx10: 3473 3474 case AMDGPU::V_ASHRREV_I16_e32: 3475 case AMDGPU::V_ASHRREV_I16_e64: 3476 case AMDGPU::V_ASHRREV_I16_e32_vi: 3477 case AMDGPU::V_ASHRREV_I16_e64_vi: 3478 case AMDGPU::V_ASHRREV_I16_gfx10: 3479 3480 case AMDGPU::V_LSHLREV_B64: 3481 case AMDGPU::V_LSHLREV_B64_gfx10: 3482 case AMDGPU::V_LSHLREV_B64_vi: 3483 3484 case AMDGPU::V_LSHRREV_B64: 3485 case AMDGPU::V_LSHRREV_B64_gfx10: 3486 case AMDGPU::V_LSHRREV_B64_vi: 3487 3488 case AMDGPU::V_ASHRREV_I64: 3489 case AMDGPU::V_ASHRREV_I64_gfx10: 3490 case AMDGPU::V_ASHRREV_I64_vi: 3491 3492 case AMDGPU::V_PK_LSHLREV_B16: 3493 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3494 case AMDGPU::V_PK_LSHLREV_B16_vi: 3495 3496 case AMDGPU::V_PK_LSHRREV_B16: 3497 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3498 case AMDGPU::V_PK_LSHRREV_B16_vi: 3499 case AMDGPU::V_PK_ASHRREV_I16: 3500 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3501 case AMDGPU::V_PK_ASHRREV_I16_vi: 3502 return true; 3503 default: 3504 return false; 3505 } 3506 } 3507 3508 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3509 3510 using namespace SIInstrFlags; 3511 const unsigned Opcode = Inst.getOpcode(); 3512 const MCInstrDesc &Desc = MII.get(Opcode); 3513 3514 // lds_direct register is defined so that it can be used 3515 // with 9-bit operands only. Ignore encodings which do not accept these. 3516 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3517 return true; 3518 3519 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3520 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3521 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3522 3523 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3524 3525 // lds_direct cannot be specified as either src1 or src2. 3526 for (int SrcIdx : SrcIndices) { 3527 if (SrcIdx == -1) break; 3528 const MCOperand &Src = Inst.getOperand(SrcIdx); 3529 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3530 return false; 3531 } 3532 } 3533 3534 if (Src0Idx == -1) 3535 return true; 3536 3537 const MCOperand &Src = Inst.getOperand(Src0Idx); 3538 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3539 return true; 3540 3541 // lds_direct is specified as src0. Check additional limitations. 3542 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3543 } 3544 3545 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3546 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3547 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3548 if (Op.isFlatOffset()) 3549 return Op.getStartLoc(); 3550 } 3551 return getLoc(); 3552 } 3553 3554 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3555 const OperandVector &Operands) { 3556 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3557 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3558 return true; 3559 3560 auto Opcode = Inst.getOpcode(); 3561 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3562 assert(OpNum != -1); 3563 3564 const auto &Op = Inst.getOperand(OpNum); 3565 if (!hasFlatOffsets() && Op.getImm() != 0) { 3566 Error(getFlatOffsetLoc(Operands), 3567 "flat offset modifier is not supported on this GPU"); 3568 return false; 3569 } 3570 3571 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3572 // For FLAT segment the offset must be positive; 3573 // MSB is ignored and forced to zero. 3574 unsigned OffsetSize = isGFX9() ? 13 : 12; 3575 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3576 if (!isIntN(OffsetSize, Op.getImm())) { 3577 Error(getFlatOffsetLoc(Operands), 3578 isGFX9() ? "expected a 13-bit signed offset" : 3579 "expected a 12-bit signed offset"); 3580 return false; 3581 } 3582 } else { 3583 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3584 Error(getFlatOffsetLoc(Operands), 3585 isGFX9() ? "expected a 12-bit unsigned offset" : 3586 "expected an 11-bit unsigned offset"); 3587 return false; 3588 } 3589 } 3590 3591 return true; 3592 } 3593 3594 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3595 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3596 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3597 if (Op.isSMEMOffset()) 3598 return Op.getStartLoc(); 3599 } 3600 return getLoc(); 3601 } 3602 3603 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3604 const OperandVector &Operands) { 3605 if (isCI() || isSI()) 3606 return true; 3607 3608 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3609 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3610 return true; 3611 3612 auto Opcode = Inst.getOpcode(); 3613 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3614 if (OpNum == -1) 3615 return true; 3616 3617 const auto &Op = Inst.getOperand(OpNum); 3618 if (!Op.isImm()) 3619 return true; 3620 3621 uint64_t Offset = Op.getImm(); 3622 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3623 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3624 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3625 return true; 3626 3627 Error(getSMEMOffsetLoc(Operands), 3628 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3629 "expected a 21-bit signed offset"); 3630 3631 return false; 3632 } 3633 3634 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3635 unsigned Opcode = Inst.getOpcode(); 3636 const MCInstrDesc &Desc = MII.get(Opcode); 3637 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3638 return true; 3639 3640 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3641 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3642 3643 const int OpIndices[] = { Src0Idx, Src1Idx }; 3644 3645 unsigned NumExprs = 0; 3646 unsigned NumLiterals = 0; 3647 uint32_t LiteralValue; 3648 3649 for (int OpIdx : OpIndices) { 3650 if (OpIdx == -1) break; 3651 3652 const MCOperand &MO = Inst.getOperand(OpIdx); 3653 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3654 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3655 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3656 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3657 if (NumLiterals == 0 || LiteralValue != Value) { 3658 LiteralValue = Value; 3659 ++NumLiterals; 3660 } 3661 } else if (MO.isExpr()) { 3662 ++NumExprs; 3663 } 3664 } 3665 } 3666 3667 return NumLiterals + NumExprs <= 1; 3668 } 3669 3670 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3671 const unsigned Opc = Inst.getOpcode(); 3672 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3673 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3674 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3675 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3676 3677 if (OpSel & ~3) 3678 return false; 3679 } 3680 return true; 3681 } 3682 3683 // Check if VCC register matches wavefront size 3684 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3685 auto FB = getFeatureBits(); 3686 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3687 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3688 } 3689 3690 // VOP3 literal is only allowed in GFX10+ and only one can be used 3691 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3692 unsigned Opcode = Inst.getOpcode(); 3693 const MCInstrDesc &Desc = MII.get(Opcode); 3694 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3695 return true; 3696 3697 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3698 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3699 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3700 3701 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3702 3703 unsigned NumExprs = 0; 3704 unsigned NumLiterals = 0; 3705 uint32_t LiteralValue; 3706 3707 for (int OpIdx : OpIndices) { 3708 if (OpIdx == -1) break; 3709 3710 const MCOperand &MO = Inst.getOperand(OpIdx); 3711 if (!MO.isImm() && !MO.isExpr()) 3712 continue; 3713 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3714 continue; 3715 3716 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3717 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3718 return false; 3719 3720 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3721 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3722 if (NumLiterals == 0 || LiteralValue != Value) { 3723 LiteralValue = Value; 3724 ++NumLiterals; 3725 } 3726 } else if (MO.isExpr()) { 3727 ++NumExprs; 3728 } 3729 } 3730 NumLiterals += NumExprs; 3731 3732 return !NumLiterals || 3733 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3734 } 3735 3736 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3737 const OperandVector &Operands, 3738 const SMLoc &IDLoc) { 3739 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3740 AMDGPU::OpName::glc1); 3741 if (GLCPos != -1) { 3742 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3743 // in the asm string, and the default value means it is not present. 3744 if (Inst.getOperand(GLCPos).getImm() == -1) { 3745 Error(IDLoc, "instruction must use glc"); 3746 return false; 3747 } 3748 } 3749 3750 return true; 3751 } 3752 3753 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3754 const SMLoc &IDLoc, 3755 const OperandVector &Operands) { 3756 if (!validateLdsDirect(Inst)) { 3757 Error(IDLoc, 3758 "invalid use of lds_direct"); 3759 return false; 3760 } 3761 if (!validateSOPLiteral(Inst)) { 3762 Error(IDLoc, 3763 "only one literal operand is allowed"); 3764 return false; 3765 } 3766 if (!validateVOP3Literal(Inst)) { 3767 Error(IDLoc, 3768 "invalid literal operand"); 3769 return false; 3770 } 3771 if (!validateConstantBusLimitations(Inst)) { 3772 Error(IDLoc, 3773 "invalid operand (violates constant bus restrictions)"); 3774 return false; 3775 } 3776 if (!validateEarlyClobberLimitations(Inst)) { 3777 Error(IDLoc, 3778 "destination must be different than all sources"); 3779 return false; 3780 } 3781 if (!validateIntClampSupported(Inst)) { 3782 Error(IDLoc, 3783 "integer clamping is not supported on this GPU"); 3784 return false; 3785 } 3786 if (!validateOpSel(Inst)) { 3787 Error(IDLoc, 3788 "invalid op_sel operand"); 3789 return false; 3790 } 3791 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3792 if (!validateMIMGD16(Inst)) { 3793 Error(IDLoc, 3794 "d16 modifier is not supported on this GPU"); 3795 return false; 3796 } 3797 if (!validateMIMGDim(Inst)) { 3798 Error(IDLoc, "dim modifier is required on this GPU"); 3799 return false; 3800 } 3801 if (!validateMIMGDataSize(Inst)) { 3802 Error(IDLoc, 3803 "image data size does not match dmask and tfe"); 3804 return false; 3805 } 3806 if (!validateMIMGAddrSize(Inst)) { 3807 Error(IDLoc, 3808 "image address size does not match dim and a16"); 3809 return false; 3810 } 3811 if (!validateMIMGAtomicDMask(Inst)) { 3812 Error(IDLoc, 3813 "invalid atomic image dmask"); 3814 return false; 3815 } 3816 if (!validateMIMGGatherDMask(Inst)) { 3817 Error(IDLoc, 3818 "invalid image_gather dmask: only one bit must be set"); 3819 return false; 3820 } 3821 if (!validateMovrels(Inst)) { 3822 Error(IDLoc, "source operand must be a VGPR"); 3823 return false; 3824 } 3825 if (!validateFlatOffset(Inst, Operands)) { 3826 return false; 3827 } 3828 if (!validateSMEMOffset(Inst, Operands)) { 3829 return false; 3830 } 3831 if (!validateMAIAccWrite(Inst)) { 3832 return false; 3833 } 3834 if (!validateDivScale(Inst)) { 3835 return false; 3836 } 3837 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3838 return false; 3839 } 3840 3841 return true; 3842 } 3843 3844 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3845 const FeatureBitset &FBS, 3846 unsigned VariantID = 0); 3847 3848 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3849 const FeatureBitset &AvailableFeatures, 3850 unsigned VariantID); 3851 3852 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3853 const FeatureBitset &FBS) { 3854 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3855 } 3856 3857 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3858 const FeatureBitset &FBS, 3859 ArrayRef<unsigned> Variants) { 3860 for (auto Variant : Variants) { 3861 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3862 return true; 3863 } 3864 3865 return false; 3866 } 3867 3868 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3869 const SMLoc &IDLoc) { 3870 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3871 3872 // Check if requested instruction variant is supported. 3873 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3874 return false; 3875 3876 // This instruction is not supported. 3877 // Clear any other pending errors because they are no longer relevant. 3878 getParser().clearPendingErrors(); 3879 3880 // Requested instruction variant is not supported. 3881 // Check if any other variants are supported. 3882 StringRef VariantName = getMatchedVariantName(); 3883 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3884 return Error(IDLoc, 3885 Twine(VariantName, 3886 " variant of this instruction is not supported")); 3887 } 3888 3889 // Finally check if this instruction is supported on any other GPU. 3890 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3891 return Error(IDLoc, "instruction not supported on this GPU"); 3892 } 3893 3894 // Instruction not supported on any GPU. Probably a typo. 3895 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3896 return Error(IDLoc, "invalid instruction" + Suggestion); 3897 } 3898 3899 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3900 OperandVector &Operands, 3901 MCStreamer &Out, 3902 uint64_t &ErrorInfo, 3903 bool MatchingInlineAsm) { 3904 MCInst Inst; 3905 unsigned Result = Match_Success; 3906 for (auto Variant : getMatchedVariants()) { 3907 uint64_t EI; 3908 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3909 Variant); 3910 // We order match statuses from least to most specific. We use most specific 3911 // status as resulting 3912 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3913 if ((R == Match_Success) || 3914 (R == Match_PreferE32) || 3915 (R == Match_MissingFeature && Result != Match_PreferE32) || 3916 (R == Match_InvalidOperand && Result != Match_MissingFeature 3917 && Result != Match_PreferE32) || 3918 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3919 && Result != Match_MissingFeature 3920 && Result != Match_PreferE32)) { 3921 Result = R; 3922 ErrorInfo = EI; 3923 } 3924 if (R == Match_Success) 3925 break; 3926 } 3927 3928 if (Result == Match_Success) { 3929 if (!validateInstruction(Inst, IDLoc, Operands)) { 3930 return true; 3931 } 3932 Inst.setLoc(IDLoc); 3933 Out.emitInstruction(Inst, getSTI()); 3934 return false; 3935 } 3936 3937 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 3938 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 3939 return true; 3940 } 3941 3942 switch (Result) { 3943 default: break; 3944 case Match_MissingFeature: 3945 // It has been verified that the specified instruction 3946 // mnemonic is valid. A match was found but it requires 3947 // features which are not supported on this GPU. 3948 return Error(IDLoc, "operands are not valid for this GPU or mode"); 3949 3950 case Match_InvalidOperand: { 3951 SMLoc ErrorLoc = IDLoc; 3952 if (ErrorInfo != ~0ULL) { 3953 if (ErrorInfo >= Operands.size()) { 3954 return Error(IDLoc, "too few operands for instruction"); 3955 } 3956 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3957 if (ErrorLoc == SMLoc()) 3958 ErrorLoc = IDLoc; 3959 } 3960 return Error(ErrorLoc, "invalid operand for instruction"); 3961 } 3962 3963 case Match_PreferE32: 3964 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3965 "should be encoded as e32"); 3966 case Match_MnemonicFail: 3967 llvm_unreachable("Invalid instructions should have been handled already"); 3968 } 3969 llvm_unreachable("Implement any new match types added!"); 3970 } 3971 3972 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3973 int64_t Tmp = -1; 3974 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3975 return true; 3976 } 3977 if (getParser().parseAbsoluteExpression(Tmp)) { 3978 return true; 3979 } 3980 Ret = static_cast<uint32_t>(Tmp); 3981 return false; 3982 } 3983 3984 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3985 uint32_t &Minor) { 3986 if (ParseAsAbsoluteExpression(Major)) 3987 return TokError("invalid major version"); 3988 3989 if (getLexer().isNot(AsmToken::Comma)) 3990 return TokError("minor version number required, comma expected"); 3991 Lex(); 3992 3993 if (ParseAsAbsoluteExpression(Minor)) 3994 return TokError("invalid minor version"); 3995 3996 return false; 3997 } 3998 3999 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4000 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4001 return TokError("directive only supported for amdgcn architecture"); 4002 4003 std::string Target; 4004 4005 SMLoc TargetStart = getTok().getLoc(); 4006 if (getParser().parseEscapedString(Target)) 4007 return true; 4008 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4009 4010 std::string ExpectedTarget; 4011 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4012 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4013 4014 if (Target != ExpectedTargetOS.str()) 4015 return getParser().Error(TargetRange.Start, "target must match options", 4016 TargetRange); 4017 4018 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4019 return false; 4020 } 4021 4022 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4023 return getParser().Error(Range.Start, "value out of range", Range); 4024 } 4025 4026 bool AMDGPUAsmParser::calculateGPRBlocks( 4027 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4028 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4029 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4030 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4031 // TODO(scott.linder): These calculations are duplicated from 4032 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4033 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4034 4035 unsigned NumVGPRs = NextFreeVGPR; 4036 unsigned NumSGPRs = NextFreeSGPR; 4037 4038 if (Version.Major >= 10) 4039 NumSGPRs = 0; 4040 else { 4041 unsigned MaxAddressableNumSGPRs = 4042 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4043 4044 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4045 NumSGPRs > MaxAddressableNumSGPRs) 4046 return OutOfRangeError(SGPRRange); 4047 4048 NumSGPRs += 4049 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4050 4051 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4052 NumSGPRs > MaxAddressableNumSGPRs) 4053 return OutOfRangeError(SGPRRange); 4054 4055 if (Features.test(FeatureSGPRInitBug)) 4056 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4057 } 4058 4059 VGPRBlocks = 4060 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4061 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4062 4063 return false; 4064 } 4065 4066 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4067 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4068 return TokError("directive only supported for amdgcn architecture"); 4069 4070 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4071 return TokError("directive only supported for amdhsa OS"); 4072 4073 StringRef KernelName; 4074 if (getParser().parseIdentifier(KernelName)) 4075 return true; 4076 4077 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4078 4079 StringSet<> Seen; 4080 4081 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4082 4083 SMRange VGPRRange; 4084 uint64_t NextFreeVGPR = 0; 4085 SMRange SGPRRange; 4086 uint64_t NextFreeSGPR = 0; 4087 unsigned UserSGPRCount = 0; 4088 bool ReserveVCC = true; 4089 bool ReserveFlatScr = true; 4090 bool ReserveXNACK = hasXNACK(); 4091 Optional<bool> EnableWavefrontSize32; 4092 4093 while (true) { 4094 while (getLexer().is(AsmToken::EndOfStatement)) 4095 Lex(); 4096 4097 if (getLexer().isNot(AsmToken::Identifier)) 4098 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4099 4100 StringRef ID = getTok().getIdentifier(); 4101 SMRange IDRange = getTok().getLocRange(); 4102 Lex(); 4103 4104 if (ID == ".end_amdhsa_kernel") 4105 break; 4106 4107 if (Seen.find(ID) != Seen.end()) 4108 return TokError(".amdhsa_ directives cannot be repeated"); 4109 Seen.insert(ID); 4110 4111 SMLoc ValStart = getTok().getLoc(); 4112 int64_t IVal; 4113 if (getParser().parseAbsoluteExpression(IVal)) 4114 return true; 4115 SMLoc ValEnd = getTok().getLoc(); 4116 SMRange ValRange = SMRange(ValStart, ValEnd); 4117 4118 if (IVal < 0) 4119 return OutOfRangeError(ValRange); 4120 4121 uint64_t Val = IVal; 4122 4123 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4124 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4125 return OutOfRangeError(RANGE); \ 4126 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4127 4128 if (ID == ".amdhsa_group_segment_fixed_size") { 4129 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4130 return OutOfRangeError(ValRange); 4131 KD.group_segment_fixed_size = Val; 4132 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4133 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4134 return OutOfRangeError(ValRange); 4135 KD.private_segment_fixed_size = Val; 4136 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4137 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4138 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4139 Val, ValRange); 4140 if (Val) 4141 UserSGPRCount += 4; 4142 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4143 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4144 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4145 ValRange); 4146 if (Val) 4147 UserSGPRCount += 2; 4148 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4149 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4150 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4151 ValRange); 4152 if (Val) 4153 UserSGPRCount += 2; 4154 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4155 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4156 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4157 Val, ValRange); 4158 if (Val) 4159 UserSGPRCount += 2; 4160 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4161 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4162 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4163 ValRange); 4164 if (Val) 4165 UserSGPRCount += 2; 4166 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4167 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4168 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4169 ValRange); 4170 if (Val) 4171 UserSGPRCount += 2; 4172 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4173 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4174 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4175 Val, ValRange); 4176 if (Val) 4177 UserSGPRCount += 1; 4178 } else if (ID == ".amdhsa_wavefront_size32") { 4179 if (IVersion.Major < 10) 4180 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4181 IDRange); 4182 EnableWavefrontSize32 = Val; 4183 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4184 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4185 Val, ValRange); 4186 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4187 PARSE_BITS_ENTRY( 4188 KD.compute_pgm_rsrc2, 4189 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4190 ValRange); 4191 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4192 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4193 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4194 ValRange); 4195 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4196 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4197 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4198 ValRange); 4199 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4200 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4201 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4202 ValRange); 4203 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4204 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4205 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4206 ValRange); 4207 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4208 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4209 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4210 ValRange); 4211 } else if (ID == ".amdhsa_next_free_vgpr") { 4212 VGPRRange = ValRange; 4213 NextFreeVGPR = Val; 4214 } else if (ID == ".amdhsa_next_free_sgpr") { 4215 SGPRRange = ValRange; 4216 NextFreeSGPR = Val; 4217 } else if (ID == ".amdhsa_reserve_vcc") { 4218 if (!isUInt<1>(Val)) 4219 return OutOfRangeError(ValRange); 4220 ReserveVCC = Val; 4221 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4222 if (IVersion.Major < 7) 4223 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4224 IDRange); 4225 if (!isUInt<1>(Val)) 4226 return OutOfRangeError(ValRange); 4227 ReserveFlatScr = Val; 4228 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4229 if (IVersion.Major < 8) 4230 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4231 IDRange); 4232 if (!isUInt<1>(Val)) 4233 return OutOfRangeError(ValRange); 4234 ReserveXNACK = Val; 4235 } else if (ID == ".amdhsa_float_round_mode_32") { 4236 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4237 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4238 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4239 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4240 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4241 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4242 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4243 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4244 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4245 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4246 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4247 ValRange); 4248 } else if (ID == ".amdhsa_dx10_clamp") { 4249 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4250 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4251 } else if (ID == ".amdhsa_ieee_mode") { 4252 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4253 Val, ValRange); 4254 } else if (ID == ".amdhsa_fp16_overflow") { 4255 if (IVersion.Major < 9) 4256 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4257 IDRange); 4258 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4259 ValRange); 4260 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4261 if (IVersion.Major < 10) 4262 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4263 IDRange); 4264 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4265 ValRange); 4266 } else if (ID == ".amdhsa_memory_ordered") { 4267 if (IVersion.Major < 10) 4268 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4269 IDRange); 4270 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4271 ValRange); 4272 } else if (ID == ".amdhsa_forward_progress") { 4273 if (IVersion.Major < 10) 4274 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4275 IDRange); 4276 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4277 ValRange); 4278 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4279 PARSE_BITS_ENTRY( 4280 KD.compute_pgm_rsrc2, 4281 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4282 ValRange); 4283 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4284 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4285 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4286 Val, ValRange); 4287 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4288 PARSE_BITS_ENTRY( 4289 KD.compute_pgm_rsrc2, 4290 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4291 ValRange); 4292 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4293 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4294 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4295 Val, ValRange); 4296 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4298 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4299 Val, ValRange); 4300 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4301 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4302 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4303 Val, ValRange); 4304 } else if (ID == ".amdhsa_exception_int_div_zero") { 4305 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4306 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4307 Val, ValRange); 4308 } else { 4309 return getParser().Error(IDRange.Start, 4310 "unknown .amdhsa_kernel directive", IDRange); 4311 } 4312 4313 #undef PARSE_BITS_ENTRY 4314 } 4315 4316 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4317 return TokError(".amdhsa_next_free_vgpr directive is required"); 4318 4319 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4320 return TokError(".amdhsa_next_free_sgpr directive is required"); 4321 4322 unsigned VGPRBlocks; 4323 unsigned SGPRBlocks; 4324 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4325 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4326 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4327 SGPRBlocks)) 4328 return true; 4329 4330 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4331 VGPRBlocks)) 4332 return OutOfRangeError(VGPRRange); 4333 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4334 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4335 4336 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4337 SGPRBlocks)) 4338 return OutOfRangeError(SGPRRange); 4339 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4340 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4341 SGPRBlocks); 4342 4343 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4344 return TokError("too many user SGPRs enabled"); 4345 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4346 UserSGPRCount); 4347 4348 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4349 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4350 ReserveFlatScr, ReserveXNACK); 4351 return false; 4352 } 4353 4354 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4355 uint32_t Major; 4356 uint32_t Minor; 4357 4358 if (ParseDirectiveMajorMinor(Major, Minor)) 4359 return true; 4360 4361 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4362 return false; 4363 } 4364 4365 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4366 uint32_t Major; 4367 uint32_t Minor; 4368 uint32_t Stepping; 4369 StringRef VendorName; 4370 StringRef ArchName; 4371 4372 // If this directive has no arguments, then use the ISA version for the 4373 // targeted GPU. 4374 if (getLexer().is(AsmToken::EndOfStatement)) { 4375 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4376 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4377 ISA.Stepping, 4378 "AMD", "AMDGPU"); 4379 return false; 4380 } 4381 4382 if (ParseDirectiveMajorMinor(Major, Minor)) 4383 return true; 4384 4385 if (getLexer().isNot(AsmToken::Comma)) 4386 return TokError("stepping version number required, comma expected"); 4387 Lex(); 4388 4389 if (ParseAsAbsoluteExpression(Stepping)) 4390 return TokError("invalid stepping version"); 4391 4392 if (getLexer().isNot(AsmToken::Comma)) 4393 return TokError("vendor name required, comma expected"); 4394 Lex(); 4395 4396 if (getLexer().isNot(AsmToken::String)) 4397 return TokError("invalid vendor name"); 4398 4399 VendorName = getLexer().getTok().getStringContents(); 4400 Lex(); 4401 4402 if (getLexer().isNot(AsmToken::Comma)) 4403 return TokError("arch name required, comma expected"); 4404 Lex(); 4405 4406 if (getLexer().isNot(AsmToken::String)) 4407 return TokError("invalid arch name"); 4408 4409 ArchName = getLexer().getTok().getStringContents(); 4410 Lex(); 4411 4412 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4413 VendorName, ArchName); 4414 return false; 4415 } 4416 4417 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4418 amd_kernel_code_t &Header) { 4419 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4420 // assembly for backwards compatibility. 4421 if (ID == "max_scratch_backing_memory_byte_size") { 4422 Parser.eatToEndOfStatement(); 4423 return false; 4424 } 4425 4426 SmallString<40> ErrStr; 4427 raw_svector_ostream Err(ErrStr); 4428 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4429 return TokError(Err.str()); 4430 } 4431 Lex(); 4432 4433 if (ID == "enable_wavefront_size32") { 4434 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4435 if (!isGFX10()) 4436 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4437 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4438 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4439 } else { 4440 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4441 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4442 } 4443 } 4444 4445 if (ID == "wavefront_size") { 4446 if (Header.wavefront_size == 5) { 4447 if (!isGFX10()) 4448 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4449 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4450 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4451 } else if (Header.wavefront_size == 6) { 4452 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4453 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4454 } 4455 } 4456 4457 if (ID == "enable_wgp_mode") { 4458 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4459 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4460 } 4461 4462 if (ID == "enable_mem_ordered") { 4463 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4464 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4465 } 4466 4467 if (ID == "enable_fwd_progress") { 4468 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4469 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4470 } 4471 4472 return false; 4473 } 4474 4475 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4476 amd_kernel_code_t Header; 4477 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4478 4479 while (true) { 4480 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4481 // will set the current token to EndOfStatement. 4482 while(getLexer().is(AsmToken::EndOfStatement)) 4483 Lex(); 4484 4485 if (getLexer().isNot(AsmToken::Identifier)) 4486 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4487 4488 StringRef ID = getLexer().getTok().getIdentifier(); 4489 Lex(); 4490 4491 if (ID == ".end_amd_kernel_code_t") 4492 break; 4493 4494 if (ParseAMDKernelCodeTValue(ID, Header)) 4495 return true; 4496 } 4497 4498 getTargetStreamer().EmitAMDKernelCodeT(Header); 4499 4500 return false; 4501 } 4502 4503 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4504 if (getLexer().isNot(AsmToken::Identifier)) 4505 return TokError("expected symbol name"); 4506 4507 StringRef KernelName = Parser.getTok().getString(); 4508 4509 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4510 ELF::STT_AMDGPU_HSA_KERNEL); 4511 Lex(); 4512 4513 KernelScope.initialize(getContext()); 4514 return false; 4515 } 4516 4517 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4518 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4519 return Error(getParser().getTok().getLoc(), 4520 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4521 "architectures"); 4522 } 4523 4524 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4525 4526 std::string ISAVersionStringFromSTI; 4527 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4528 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4529 4530 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4531 return Error(getParser().getTok().getLoc(), 4532 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4533 "arguments specified through the command line"); 4534 } 4535 4536 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4537 Lex(); 4538 4539 return false; 4540 } 4541 4542 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4543 const char *AssemblerDirectiveBegin; 4544 const char *AssemblerDirectiveEnd; 4545 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4546 isHsaAbiVersion3(&getSTI()) 4547 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4548 HSAMD::V3::AssemblerDirectiveEnd) 4549 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4550 HSAMD::AssemblerDirectiveEnd); 4551 4552 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4553 return Error(getParser().getTok().getLoc(), 4554 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4555 "not available on non-amdhsa OSes")).str()); 4556 } 4557 4558 std::string HSAMetadataString; 4559 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4560 HSAMetadataString)) 4561 return true; 4562 4563 if (isHsaAbiVersion3(&getSTI())) { 4564 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4565 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4566 } else { 4567 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4568 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4569 } 4570 4571 return false; 4572 } 4573 4574 /// Common code to parse out a block of text (typically YAML) between start and 4575 /// end directives. 4576 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4577 const char *AssemblerDirectiveEnd, 4578 std::string &CollectString) { 4579 4580 raw_string_ostream CollectStream(CollectString); 4581 4582 getLexer().setSkipSpace(false); 4583 4584 bool FoundEnd = false; 4585 while (!getLexer().is(AsmToken::Eof)) { 4586 while (getLexer().is(AsmToken::Space)) { 4587 CollectStream << getLexer().getTok().getString(); 4588 Lex(); 4589 } 4590 4591 if (getLexer().is(AsmToken::Identifier)) { 4592 StringRef ID = getLexer().getTok().getIdentifier(); 4593 if (ID == AssemblerDirectiveEnd) { 4594 Lex(); 4595 FoundEnd = true; 4596 break; 4597 } 4598 } 4599 4600 CollectStream << Parser.parseStringToEndOfStatement() 4601 << getContext().getAsmInfo()->getSeparatorString(); 4602 4603 Parser.eatToEndOfStatement(); 4604 } 4605 4606 getLexer().setSkipSpace(true); 4607 4608 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4609 return TokError(Twine("expected directive ") + 4610 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4611 } 4612 4613 CollectStream.flush(); 4614 return false; 4615 } 4616 4617 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4618 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4619 std::string String; 4620 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4621 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4622 return true; 4623 4624 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4625 if (!PALMetadata->setFromString(String)) 4626 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4627 return false; 4628 } 4629 4630 /// Parse the assembler directive for old linear-format PAL metadata. 4631 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4632 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4633 return Error(getParser().getTok().getLoc(), 4634 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4635 "not available on non-amdpal OSes")).str()); 4636 } 4637 4638 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4639 PALMetadata->setLegacy(); 4640 for (;;) { 4641 uint32_t Key, Value; 4642 if (ParseAsAbsoluteExpression(Key)) { 4643 return TokError(Twine("invalid value in ") + 4644 Twine(PALMD::AssemblerDirective)); 4645 } 4646 if (getLexer().isNot(AsmToken::Comma)) { 4647 return TokError(Twine("expected an even number of values in ") + 4648 Twine(PALMD::AssemblerDirective)); 4649 } 4650 Lex(); 4651 if (ParseAsAbsoluteExpression(Value)) { 4652 return TokError(Twine("invalid value in ") + 4653 Twine(PALMD::AssemblerDirective)); 4654 } 4655 PALMetadata->setRegister(Key, Value); 4656 if (getLexer().isNot(AsmToken::Comma)) 4657 break; 4658 Lex(); 4659 } 4660 return false; 4661 } 4662 4663 /// ParseDirectiveAMDGPULDS 4664 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4665 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4666 if (getParser().checkForValidSection()) 4667 return true; 4668 4669 StringRef Name; 4670 SMLoc NameLoc = getLexer().getLoc(); 4671 if (getParser().parseIdentifier(Name)) 4672 return TokError("expected identifier in directive"); 4673 4674 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4675 if (parseToken(AsmToken::Comma, "expected ','")) 4676 return true; 4677 4678 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4679 4680 int64_t Size; 4681 SMLoc SizeLoc = getLexer().getLoc(); 4682 if (getParser().parseAbsoluteExpression(Size)) 4683 return true; 4684 if (Size < 0) 4685 return Error(SizeLoc, "size must be non-negative"); 4686 if (Size > LocalMemorySize) 4687 return Error(SizeLoc, "size is too large"); 4688 4689 int64_t Alignment = 4; 4690 if (getLexer().is(AsmToken::Comma)) { 4691 Lex(); 4692 SMLoc AlignLoc = getLexer().getLoc(); 4693 if (getParser().parseAbsoluteExpression(Alignment)) 4694 return true; 4695 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4696 return Error(AlignLoc, "alignment must be a power of two"); 4697 4698 // Alignment larger than the size of LDS is possible in theory, as long 4699 // as the linker manages to place to symbol at address 0, but we do want 4700 // to make sure the alignment fits nicely into a 32-bit integer. 4701 if (Alignment >= 1u << 31) 4702 return Error(AlignLoc, "alignment is too large"); 4703 } 4704 4705 if (parseToken(AsmToken::EndOfStatement, 4706 "unexpected token in '.amdgpu_lds' directive")) 4707 return true; 4708 4709 Symbol->redefineIfPossible(); 4710 if (!Symbol->isUndefined()) 4711 return Error(NameLoc, "invalid symbol redefinition"); 4712 4713 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4714 return false; 4715 } 4716 4717 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4718 StringRef IDVal = DirectiveID.getString(); 4719 4720 if (isHsaAbiVersion3(&getSTI())) { 4721 if (IDVal == ".amdgcn_target") 4722 return ParseDirectiveAMDGCNTarget(); 4723 4724 if (IDVal == ".amdhsa_kernel") 4725 return ParseDirectiveAMDHSAKernel(); 4726 4727 // TODO: Restructure/combine with PAL metadata directive. 4728 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4729 return ParseDirectiveHSAMetadata(); 4730 } else { 4731 if (IDVal == ".hsa_code_object_version") 4732 return ParseDirectiveHSACodeObjectVersion(); 4733 4734 if (IDVal == ".hsa_code_object_isa") 4735 return ParseDirectiveHSACodeObjectISA(); 4736 4737 if (IDVal == ".amd_kernel_code_t") 4738 return ParseDirectiveAMDKernelCodeT(); 4739 4740 if (IDVal == ".amdgpu_hsa_kernel") 4741 return ParseDirectiveAMDGPUHsaKernel(); 4742 4743 if (IDVal == ".amd_amdgpu_isa") 4744 return ParseDirectiveISAVersion(); 4745 4746 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4747 return ParseDirectiveHSAMetadata(); 4748 } 4749 4750 if (IDVal == ".amdgpu_lds") 4751 return ParseDirectiveAMDGPULDS(); 4752 4753 if (IDVal == PALMD::AssemblerDirectiveBegin) 4754 return ParseDirectivePALMetadataBegin(); 4755 4756 if (IDVal == PALMD::AssemblerDirective) 4757 return ParseDirectivePALMetadata(); 4758 4759 return true; 4760 } 4761 4762 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4763 unsigned RegNo) const { 4764 4765 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4766 R.isValid(); ++R) { 4767 if (*R == RegNo) 4768 return isGFX9Plus(); 4769 } 4770 4771 // GFX10 has 2 more SGPRs 104 and 105. 4772 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4773 R.isValid(); ++R) { 4774 if (*R == RegNo) 4775 return hasSGPR104_SGPR105(); 4776 } 4777 4778 switch (RegNo) { 4779 case AMDGPU::SRC_SHARED_BASE: 4780 case AMDGPU::SRC_SHARED_LIMIT: 4781 case AMDGPU::SRC_PRIVATE_BASE: 4782 case AMDGPU::SRC_PRIVATE_LIMIT: 4783 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4784 return !isCI() && !isSI() && !isVI(); 4785 case AMDGPU::TBA: 4786 case AMDGPU::TBA_LO: 4787 case AMDGPU::TBA_HI: 4788 case AMDGPU::TMA: 4789 case AMDGPU::TMA_LO: 4790 case AMDGPU::TMA_HI: 4791 return !isGFX9() && !isGFX10(); 4792 case AMDGPU::XNACK_MASK: 4793 case AMDGPU::XNACK_MASK_LO: 4794 case AMDGPU::XNACK_MASK_HI: 4795 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4796 case AMDGPU::SGPR_NULL: 4797 return isGFX10(); 4798 default: 4799 break; 4800 } 4801 4802 if (isCI()) 4803 return true; 4804 4805 if (isSI() || isGFX10()) { 4806 // No flat_scr on SI. 4807 // On GFX10 flat scratch is not a valid register operand and can only be 4808 // accessed with s_setreg/s_getreg. 4809 switch (RegNo) { 4810 case AMDGPU::FLAT_SCR: 4811 case AMDGPU::FLAT_SCR_LO: 4812 case AMDGPU::FLAT_SCR_HI: 4813 return false; 4814 default: 4815 return true; 4816 } 4817 } 4818 4819 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4820 // SI/CI have. 4821 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4822 R.isValid(); ++R) { 4823 if (*R == RegNo) 4824 return hasSGPR102_SGPR103(); 4825 } 4826 4827 return true; 4828 } 4829 4830 OperandMatchResultTy 4831 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4832 OperandMode Mode) { 4833 // Try to parse with a custom parser 4834 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4835 4836 // If we successfully parsed the operand or if there as an error parsing, 4837 // we are done. 4838 // 4839 // If we are parsing after we reach EndOfStatement then this means we 4840 // are appending default values to the Operands list. This is only done 4841 // by custom parser, so we shouldn't continue on to the generic parsing. 4842 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4843 getLexer().is(AsmToken::EndOfStatement)) 4844 return ResTy; 4845 4846 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4847 unsigned Prefix = Operands.size(); 4848 SMLoc LBraceLoc = getTok().getLoc(); 4849 Parser.Lex(); // eat the '[' 4850 4851 for (;;) { 4852 ResTy = parseReg(Operands); 4853 if (ResTy != MatchOperand_Success) 4854 return ResTy; 4855 4856 if (getLexer().is(AsmToken::RBrac)) 4857 break; 4858 4859 if (getLexer().isNot(AsmToken::Comma)) 4860 return MatchOperand_ParseFail; 4861 Parser.Lex(); 4862 } 4863 4864 if (Operands.size() - Prefix > 1) { 4865 Operands.insert(Operands.begin() + Prefix, 4866 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4867 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4868 getTok().getLoc())); 4869 } 4870 4871 Parser.Lex(); // eat the ']' 4872 return MatchOperand_Success; 4873 } 4874 4875 return parseRegOrImm(Operands); 4876 } 4877 4878 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4879 // Clear any forced encodings from the previous instruction. 4880 setForcedEncodingSize(0); 4881 setForcedDPP(false); 4882 setForcedSDWA(false); 4883 4884 if (Name.endswith("_e64")) { 4885 setForcedEncodingSize(64); 4886 return Name.substr(0, Name.size() - 4); 4887 } else if (Name.endswith("_e32")) { 4888 setForcedEncodingSize(32); 4889 return Name.substr(0, Name.size() - 4); 4890 } else if (Name.endswith("_dpp")) { 4891 setForcedDPP(true); 4892 return Name.substr(0, Name.size() - 4); 4893 } else if (Name.endswith("_sdwa")) { 4894 setForcedSDWA(true); 4895 return Name.substr(0, Name.size() - 5); 4896 } 4897 return Name; 4898 } 4899 4900 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4901 StringRef Name, 4902 SMLoc NameLoc, OperandVector &Operands) { 4903 // Add the instruction mnemonic 4904 Name = parseMnemonicSuffix(Name); 4905 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4906 4907 bool IsMIMG = Name.startswith("image_"); 4908 4909 while (!getLexer().is(AsmToken::EndOfStatement)) { 4910 OperandMode Mode = OperandMode_Default; 4911 if (IsMIMG && isGFX10() && Operands.size() == 2) 4912 Mode = OperandMode_NSA; 4913 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4914 4915 // Eat the comma or space if there is one. 4916 if (getLexer().is(AsmToken::Comma)) 4917 Parser.Lex(); 4918 4919 if (Res != MatchOperand_Success) { 4920 checkUnsupportedInstruction(Name, NameLoc); 4921 if (!Parser.hasPendingError()) { 4922 // FIXME: use real operand location rather than the current location. 4923 StringRef Msg = 4924 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4925 "not a valid operand."; 4926 Error(getLexer().getLoc(), Msg); 4927 } 4928 while (!getLexer().is(AsmToken::EndOfStatement)) { 4929 Parser.Lex(); 4930 } 4931 return true; 4932 } 4933 } 4934 4935 return false; 4936 } 4937 4938 //===----------------------------------------------------------------------===// 4939 // Utility functions 4940 //===----------------------------------------------------------------------===// 4941 4942 OperandMatchResultTy 4943 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4944 4945 if (!trySkipId(Prefix, AsmToken::Colon)) 4946 return MatchOperand_NoMatch; 4947 4948 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4949 } 4950 4951 OperandMatchResultTy 4952 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4953 AMDGPUOperand::ImmTy ImmTy, 4954 bool (*ConvertResult)(int64_t&)) { 4955 SMLoc S = getLoc(); 4956 int64_t Value = 0; 4957 4958 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4959 if (Res != MatchOperand_Success) 4960 return Res; 4961 4962 if (ConvertResult && !ConvertResult(Value)) { 4963 Error(S, "invalid " + StringRef(Prefix) + " value."); 4964 } 4965 4966 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4967 return MatchOperand_Success; 4968 } 4969 4970 OperandMatchResultTy 4971 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4972 OperandVector &Operands, 4973 AMDGPUOperand::ImmTy ImmTy, 4974 bool (*ConvertResult)(int64_t&)) { 4975 SMLoc S = getLoc(); 4976 if (!trySkipId(Prefix, AsmToken::Colon)) 4977 return MatchOperand_NoMatch; 4978 4979 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4980 return MatchOperand_ParseFail; 4981 4982 unsigned Val = 0; 4983 const unsigned MaxSize = 4; 4984 4985 // FIXME: How to verify the number of elements matches the number of src 4986 // operands? 4987 for (int I = 0; ; ++I) { 4988 int64_t Op; 4989 SMLoc Loc = getLoc(); 4990 if (!parseExpr(Op)) 4991 return MatchOperand_ParseFail; 4992 4993 if (Op != 0 && Op != 1) { 4994 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4995 return MatchOperand_ParseFail; 4996 } 4997 4998 Val |= (Op << I); 4999 5000 if (trySkipToken(AsmToken::RBrac)) 5001 break; 5002 5003 if (I + 1 == MaxSize) { 5004 Error(getLoc(), "expected a closing square bracket"); 5005 return MatchOperand_ParseFail; 5006 } 5007 5008 if (!skipToken(AsmToken::Comma, "expected a comma")) 5009 return MatchOperand_ParseFail; 5010 } 5011 5012 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5013 return MatchOperand_Success; 5014 } 5015 5016 OperandMatchResultTy 5017 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5018 AMDGPUOperand::ImmTy ImmTy) { 5019 int64_t Bit = 0; 5020 SMLoc S = Parser.getTok().getLoc(); 5021 5022 // We are at the end of the statement, and this is a default argument, so 5023 // use a default value. 5024 if (getLexer().isNot(AsmToken::EndOfStatement)) { 5025 switch(getLexer().getKind()) { 5026 case AsmToken::Identifier: { 5027 StringRef Tok = Parser.getTok().getString(); 5028 if (Tok == Name) { 5029 if (Tok == "r128" && !hasMIMG_R128()) 5030 Error(S, "r128 modifier is not supported on this GPU"); 5031 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5032 Error(S, "a16 modifier is not supported on this GPU"); 5033 Bit = 1; 5034 Parser.Lex(); 5035 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5036 Bit = 0; 5037 Parser.Lex(); 5038 } else { 5039 return MatchOperand_NoMatch; 5040 } 5041 break; 5042 } 5043 default: 5044 return MatchOperand_NoMatch; 5045 } 5046 } 5047 5048 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 5049 return MatchOperand_ParseFail; 5050 5051 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5052 ImmTy = AMDGPUOperand::ImmTyR128A16; 5053 5054 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5055 return MatchOperand_Success; 5056 } 5057 5058 static void addOptionalImmOperand( 5059 MCInst& Inst, const OperandVector& Operands, 5060 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5061 AMDGPUOperand::ImmTy ImmT, 5062 int64_t Default = 0) { 5063 auto i = OptionalIdx.find(ImmT); 5064 if (i != OptionalIdx.end()) { 5065 unsigned Idx = i->second; 5066 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5067 } else { 5068 Inst.addOperand(MCOperand::createImm(Default)); 5069 } 5070 } 5071 5072 OperandMatchResultTy 5073 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5074 if (getLexer().isNot(AsmToken::Identifier)) { 5075 return MatchOperand_NoMatch; 5076 } 5077 StringRef Tok = Parser.getTok().getString(); 5078 if (Tok != Prefix) { 5079 return MatchOperand_NoMatch; 5080 } 5081 5082 Parser.Lex(); 5083 if (getLexer().isNot(AsmToken::Colon)) { 5084 return MatchOperand_ParseFail; 5085 } 5086 5087 Parser.Lex(); 5088 if (getLexer().isNot(AsmToken::Identifier)) { 5089 return MatchOperand_ParseFail; 5090 } 5091 5092 Value = Parser.getTok().getString(); 5093 return MatchOperand_Success; 5094 } 5095 5096 //===----------------------------------------------------------------------===// 5097 // MTBUF format 5098 //===----------------------------------------------------------------------===// 5099 5100 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5101 int64_t MaxVal, 5102 int64_t &Fmt) { 5103 int64_t Val; 5104 SMLoc Loc = getLoc(); 5105 5106 auto Res = parseIntWithPrefix(Pref, Val); 5107 if (Res == MatchOperand_ParseFail) 5108 return false; 5109 if (Res == MatchOperand_NoMatch) 5110 return true; 5111 5112 if (Val < 0 || Val > MaxVal) { 5113 Error(Loc, Twine("out of range ", StringRef(Pref))); 5114 return false; 5115 } 5116 5117 Fmt = Val; 5118 return true; 5119 } 5120 5121 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5122 // values to live in a joint format operand in the MCInst encoding. 5123 OperandMatchResultTy 5124 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5125 using namespace llvm::AMDGPU::MTBUFFormat; 5126 5127 int64_t Dfmt = DFMT_UNDEF; 5128 int64_t Nfmt = NFMT_UNDEF; 5129 5130 // dfmt and nfmt can appear in either order, and each is optional. 5131 for (int I = 0; I < 2; ++I) { 5132 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5133 return MatchOperand_ParseFail; 5134 5135 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5136 return MatchOperand_ParseFail; 5137 } 5138 // Skip optional comma between dfmt/nfmt 5139 // but guard against 2 commas following each other. 5140 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5141 !peekToken().is(AsmToken::Comma)) { 5142 trySkipToken(AsmToken::Comma); 5143 } 5144 } 5145 5146 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5147 return MatchOperand_NoMatch; 5148 5149 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5150 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5151 5152 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5153 return MatchOperand_Success; 5154 } 5155 5156 OperandMatchResultTy 5157 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5158 using namespace llvm::AMDGPU::MTBUFFormat; 5159 5160 int64_t Fmt = UFMT_UNDEF; 5161 5162 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5163 return MatchOperand_ParseFail; 5164 5165 if (Fmt == UFMT_UNDEF) 5166 return MatchOperand_NoMatch; 5167 5168 Format = Fmt; 5169 return MatchOperand_Success; 5170 } 5171 5172 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5173 int64_t &Nfmt, 5174 StringRef FormatStr, 5175 SMLoc Loc) { 5176 using namespace llvm::AMDGPU::MTBUFFormat; 5177 int64_t Format; 5178 5179 Format = getDfmt(FormatStr); 5180 if (Format != DFMT_UNDEF) { 5181 Dfmt = Format; 5182 return true; 5183 } 5184 5185 Format = getNfmt(FormatStr, getSTI()); 5186 if (Format != NFMT_UNDEF) { 5187 Nfmt = Format; 5188 return true; 5189 } 5190 5191 Error(Loc, "unsupported format"); 5192 return false; 5193 } 5194 5195 OperandMatchResultTy 5196 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5197 SMLoc FormatLoc, 5198 int64_t &Format) { 5199 using namespace llvm::AMDGPU::MTBUFFormat; 5200 5201 int64_t Dfmt = DFMT_UNDEF; 5202 int64_t Nfmt = NFMT_UNDEF; 5203 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5204 return MatchOperand_ParseFail; 5205 5206 if (trySkipToken(AsmToken::Comma)) { 5207 StringRef Str; 5208 SMLoc Loc = getLoc(); 5209 if (!parseId(Str, "expected a format string") || 5210 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5211 return MatchOperand_ParseFail; 5212 } 5213 if (Dfmt == DFMT_UNDEF) { 5214 Error(Loc, "duplicate numeric format"); 5215 return MatchOperand_ParseFail; 5216 } else if (Nfmt == NFMT_UNDEF) { 5217 Error(Loc, "duplicate data format"); 5218 return MatchOperand_ParseFail; 5219 } 5220 } 5221 5222 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5223 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5224 5225 if (isGFX10()) { 5226 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5227 if (Ufmt == UFMT_UNDEF) { 5228 Error(FormatLoc, "unsupported format"); 5229 return MatchOperand_ParseFail; 5230 } 5231 Format = Ufmt; 5232 } else { 5233 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5234 } 5235 5236 return MatchOperand_Success; 5237 } 5238 5239 OperandMatchResultTy 5240 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5241 SMLoc Loc, 5242 int64_t &Format) { 5243 using namespace llvm::AMDGPU::MTBUFFormat; 5244 5245 auto Id = getUnifiedFormat(FormatStr); 5246 if (Id == UFMT_UNDEF) 5247 return MatchOperand_NoMatch; 5248 5249 if (!isGFX10()) { 5250 Error(Loc, "unified format is not supported on this GPU"); 5251 return MatchOperand_ParseFail; 5252 } 5253 5254 Format = Id; 5255 return MatchOperand_Success; 5256 } 5257 5258 OperandMatchResultTy 5259 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5260 using namespace llvm::AMDGPU::MTBUFFormat; 5261 SMLoc Loc = getLoc(); 5262 5263 if (!parseExpr(Format)) 5264 return MatchOperand_ParseFail; 5265 if (!isValidFormatEncoding(Format, getSTI())) { 5266 Error(Loc, "out of range format"); 5267 return MatchOperand_ParseFail; 5268 } 5269 5270 return MatchOperand_Success; 5271 } 5272 5273 OperandMatchResultTy 5274 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5275 using namespace llvm::AMDGPU::MTBUFFormat; 5276 5277 if (!trySkipId("format", AsmToken::Colon)) 5278 return MatchOperand_NoMatch; 5279 5280 if (trySkipToken(AsmToken::LBrac)) { 5281 StringRef FormatStr; 5282 SMLoc Loc = getLoc(); 5283 if (!parseId(FormatStr, "expected a format string")) 5284 return MatchOperand_ParseFail; 5285 5286 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5287 if (Res == MatchOperand_NoMatch) 5288 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5289 if (Res != MatchOperand_Success) 5290 return Res; 5291 5292 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5293 return MatchOperand_ParseFail; 5294 5295 return MatchOperand_Success; 5296 } 5297 5298 return parseNumericFormat(Format); 5299 } 5300 5301 OperandMatchResultTy 5302 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5303 using namespace llvm::AMDGPU::MTBUFFormat; 5304 5305 int64_t Format = getDefaultFormatEncoding(getSTI()); 5306 OperandMatchResultTy Res; 5307 SMLoc Loc = getLoc(); 5308 5309 // Parse legacy format syntax. 5310 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5311 if (Res == MatchOperand_ParseFail) 5312 return Res; 5313 5314 bool FormatFound = (Res == MatchOperand_Success); 5315 5316 Operands.push_back( 5317 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5318 5319 if (FormatFound) 5320 trySkipToken(AsmToken::Comma); 5321 5322 if (isToken(AsmToken::EndOfStatement)) { 5323 // We are expecting an soffset operand, 5324 // but let matcher handle the error. 5325 return MatchOperand_Success; 5326 } 5327 5328 // Parse soffset. 5329 Res = parseRegOrImm(Operands); 5330 if (Res != MatchOperand_Success) 5331 return Res; 5332 5333 trySkipToken(AsmToken::Comma); 5334 5335 if (!FormatFound) { 5336 Res = parseSymbolicOrNumericFormat(Format); 5337 if (Res == MatchOperand_ParseFail) 5338 return Res; 5339 if (Res == MatchOperand_Success) { 5340 auto Size = Operands.size(); 5341 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5342 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5343 Op.setImm(Format); 5344 } 5345 return MatchOperand_Success; 5346 } 5347 5348 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5349 Error(getLoc(), "duplicate format"); 5350 return MatchOperand_ParseFail; 5351 } 5352 return MatchOperand_Success; 5353 } 5354 5355 //===----------------------------------------------------------------------===// 5356 // ds 5357 //===----------------------------------------------------------------------===// 5358 5359 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5360 const OperandVector &Operands) { 5361 OptionalImmIndexMap OptionalIdx; 5362 5363 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5364 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5365 5366 // Add the register arguments 5367 if (Op.isReg()) { 5368 Op.addRegOperands(Inst, 1); 5369 continue; 5370 } 5371 5372 // Handle optional arguments 5373 OptionalIdx[Op.getImmTy()] = i; 5374 } 5375 5376 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5377 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5379 5380 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5381 } 5382 5383 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5384 bool IsGdsHardcoded) { 5385 OptionalImmIndexMap OptionalIdx; 5386 5387 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5388 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5389 5390 // Add the register arguments 5391 if (Op.isReg()) { 5392 Op.addRegOperands(Inst, 1); 5393 continue; 5394 } 5395 5396 if (Op.isToken() && Op.getToken() == "gds") { 5397 IsGdsHardcoded = true; 5398 continue; 5399 } 5400 5401 // Handle optional arguments 5402 OptionalIdx[Op.getImmTy()] = i; 5403 } 5404 5405 AMDGPUOperand::ImmTy OffsetType = 5406 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5407 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5408 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5409 AMDGPUOperand::ImmTyOffset; 5410 5411 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5412 5413 if (!IsGdsHardcoded) { 5414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5415 } 5416 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5417 } 5418 5419 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5420 OptionalImmIndexMap OptionalIdx; 5421 5422 unsigned OperandIdx[4]; 5423 unsigned EnMask = 0; 5424 int SrcIdx = 0; 5425 5426 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5427 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5428 5429 // Add the register arguments 5430 if (Op.isReg()) { 5431 assert(SrcIdx < 4); 5432 OperandIdx[SrcIdx] = Inst.size(); 5433 Op.addRegOperands(Inst, 1); 5434 ++SrcIdx; 5435 continue; 5436 } 5437 5438 if (Op.isOff()) { 5439 assert(SrcIdx < 4); 5440 OperandIdx[SrcIdx] = Inst.size(); 5441 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5442 ++SrcIdx; 5443 continue; 5444 } 5445 5446 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5447 Op.addImmOperands(Inst, 1); 5448 continue; 5449 } 5450 5451 if (Op.isToken() && Op.getToken() == "done") 5452 continue; 5453 5454 // Handle optional arguments 5455 OptionalIdx[Op.getImmTy()] = i; 5456 } 5457 5458 assert(SrcIdx == 4); 5459 5460 bool Compr = false; 5461 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5462 Compr = true; 5463 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5464 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5465 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5466 } 5467 5468 for (auto i = 0; i < SrcIdx; ++i) { 5469 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5470 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5471 } 5472 } 5473 5474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5475 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5476 5477 Inst.addOperand(MCOperand::createImm(EnMask)); 5478 } 5479 5480 //===----------------------------------------------------------------------===// 5481 // s_waitcnt 5482 //===----------------------------------------------------------------------===// 5483 5484 static bool 5485 encodeCnt( 5486 const AMDGPU::IsaVersion ISA, 5487 int64_t &IntVal, 5488 int64_t CntVal, 5489 bool Saturate, 5490 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5491 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5492 { 5493 bool Failed = false; 5494 5495 IntVal = encode(ISA, IntVal, CntVal); 5496 if (CntVal != decode(ISA, IntVal)) { 5497 if (Saturate) { 5498 IntVal = encode(ISA, IntVal, -1); 5499 } else { 5500 Failed = true; 5501 } 5502 } 5503 return Failed; 5504 } 5505 5506 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5507 5508 SMLoc CntLoc = getLoc(); 5509 StringRef CntName = getTokenStr(); 5510 5511 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5512 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5513 return false; 5514 5515 int64_t CntVal; 5516 SMLoc ValLoc = getLoc(); 5517 if (!parseExpr(CntVal)) 5518 return false; 5519 5520 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5521 5522 bool Failed = true; 5523 bool Sat = CntName.endswith("_sat"); 5524 5525 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5526 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5527 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5528 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5529 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5530 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5531 } else { 5532 Error(CntLoc, "invalid counter name " + CntName); 5533 return false; 5534 } 5535 5536 if (Failed) { 5537 Error(ValLoc, "too large value for " + CntName); 5538 return false; 5539 } 5540 5541 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5542 return false; 5543 5544 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5545 if (isToken(AsmToken::EndOfStatement)) { 5546 Error(getLoc(), "expected a counter name"); 5547 return false; 5548 } 5549 } 5550 5551 return true; 5552 } 5553 5554 OperandMatchResultTy 5555 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5556 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5557 int64_t Waitcnt = getWaitcntBitMask(ISA); 5558 SMLoc S = getLoc(); 5559 5560 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5561 while (!isToken(AsmToken::EndOfStatement)) { 5562 if (!parseCnt(Waitcnt)) 5563 return MatchOperand_ParseFail; 5564 } 5565 } else { 5566 if (!parseExpr(Waitcnt)) 5567 return MatchOperand_ParseFail; 5568 } 5569 5570 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5571 return MatchOperand_Success; 5572 } 5573 5574 bool 5575 AMDGPUOperand::isSWaitCnt() const { 5576 return isImm(); 5577 } 5578 5579 //===----------------------------------------------------------------------===// 5580 // hwreg 5581 //===----------------------------------------------------------------------===// 5582 5583 bool 5584 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5585 int64_t &Offset, 5586 int64_t &Width) { 5587 using namespace llvm::AMDGPU::Hwreg; 5588 5589 // The register may be specified by name or using a numeric code 5590 if (isToken(AsmToken::Identifier) && 5591 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5592 HwReg.IsSymbolic = true; 5593 lex(); // skip message name 5594 } else if (!parseExpr(HwReg.Id)) { 5595 return false; 5596 } 5597 5598 if (trySkipToken(AsmToken::RParen)) 5599 return true; 5600 5601 // parse optional params 5602 return 5603 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5604 parseExpr(Offset) && 5605 skipToken(AsmToken::Comma, "expected a comma") && 5606 parseExpr(Width) && 5607 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5608 } 5609 5610 bool 5611 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5612 const int64_t Offset, 5613 const int64_t Width, 5614 const SMLoc Loc) { 5615 5616 using namespace llvm::AMDGPU::Hwreg; 5617 5618 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5619 Error(Loc, "specified hardware register is not supported on this GPU"); 5620 return false; 5621 } else if (!isValidHwreg(HwReg.Id)) { 5622 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5623 return false; 5624 } else if (!isValidHwregOffset(Offset)) { 5625 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5626 return false; 5627 } else if (!isValidHwregWidth(Width)) { 5628 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5629 return false; 5630 } 5631 return true; 5632 } 5633 5634 OperandMatchResultTy 5635 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5636 using namespace llvm::AMDGPU::Hwreg; 5637 5638 int64_t ImmVal = 0; 5639 SMLoc Loc = getLoc(); 5640 5641 if (trySkipId("hwreg", AsmToken::LParen)) { 5642 OperandInfoTy HwReg(ID_UNKNOWN_); 5643 int64_t Offset = OFFSET_DEFAULT_; 5644 int64_t Width = WIDTH_DEFAULT_; 5645 if (parseHwregBody(HwReg, Offset, Width) && 5646 validateHwreg(HwReg, Offset, Width, Loc)) { 5647 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5648 } else { 5649 return MatchOperand_ParseFail; 5650 } 5651 } else if (parseExpr(ImmVal)) { 5652 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5653 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5654 return MatchOperand_ParseFail; 5655 } 5656 } else { 5657 return MatchOperand_ParseFail; 5658 } 5659 5660 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5661 return MatchOperand_Success; 5662 } 5663 5664 bool AMDGPUOperand::isHwreg() const { 5665 return isImmTy(ImmTyHwreg); 5666 } 5667 5668 //===----------------------------------------------------------------------===// 5669 // sendmsg 5670 //===----------------------------------------------------------------------===// 5671 5672 bool 5673 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5674 OperandInfoTy &Op, 5675 OperandInfoTy &Stream) { 5676 using namespace llvm::AMDGPU::SendMsg; 5677 5678 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5679 Msg.IsSymbolic = true; 5680 lex(); // skip message name 5681 } else if (!parseExpr(Msg.Id)) { 5682 return false; 5683 } 5684 5685 if (trySkipToken(AsmToken::Comma)) { 5686 Op.IsDefined = true; 5687 if (isToken(AsmToken::Identifier) && 5688 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5689 lex(); // skip operation name 5690 } else if (!parseExpr(Op.Id)) { 5691 return false; 5692 } 5693 5694 if (trySkipToken(AsmToken::Comma)) { 5695 Stream.IsDefined = true; 5696 if (!parseExpr(Stream.Id)) 5697 return false; 5698 } 5699 } 5700 5701 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5702 } 5703 5704 bool 5705 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5706 const OperandInfoTy &Op, 5707 const OperandInfoTy &Stream, 5708 const SMLoc S) { 5709 using namespace llvm::AMDGPU::SendMsg; 5710 5711 // Validation strictness depends on whether message is specified 5712 // in a symbolc or in a numeric form. In the latter case 5713 // only encoding possibility is checked. 5714 bool Strict = Msg.IsSymbolic; 5715 5716 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5717 Error(S, "invalid message id"); 5718 return false; 5719 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5720 Error(S, Op.IsDefined ? 5721 "message does not support operations" : 5722 "missing message operation"); 5723 return false; 5724 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5725 Error(S, "invalid operation id"); 5726 return false; 5727 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5728 Error(S, "message operation does not support streams"); 5729 return false; 5730 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5731 Error(S, "invalid message stream id"); 5732 return false; 5733 } 5734 return true; 5735 } 5736 5737 OperandMatchResultTy 5738 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5739 using namespace llvm::AMDGPU::SendMsg; 5740 5741 int64_t ImmVal = 0; 5742 SMLoc Loc = getLoc(); 5743 5744 if (trySkipId("sendmsg", AsmToken::LParen)) { 5745 OperandInfoTy Msg(ID_UNKNOWN_); 5746 OperandInfoTy Op(OP_NONE_); 5747 OperandInfoTy Stream(STREAM_ID_NONE_); 5748 if (parseSendMsgBody(Msg, Op, Stream) && 5749 validateSendMsg(Msg, Op, Stream, Loc)) { 5750 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5751 } else { 5752 return MatchOperand_ParseFail; 5753 } 5754 } else if (parseExpr(ImmVal)) { 5755 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5756 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5757 return MatchOperand_ParseFail; 5758 } 5759 } else { 5760 return MatchOperand_ParseFail; 5761 } 5762 5763 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5764 return MatchOperand_Success; 5765 } 5766 5767 bool AMDGPUOperand::isSendMsg() const { 5768 return isImmTy(ImmTySendMsg); 5769 } 5770 5771 //===----------------------------------------------------------------------===// 5772 // v_interp 5773 //===----------------------------------------------------------------------===// 5774 5775 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5776 if (getLexer().getKind() != AsmToken::Identifier) 5777 return MatchOperand_NoMatch; 5778 5779 StringRef Str = Parser.getTok().getString(); 5780 int Slot = StringSwitch<int>(Str) 5781 .Case("p10", 0) 5782 .Case("p20", 1) 5783 .Case("p0", 2) 5784 .Default(-1); 5785 5786 SMLoc S = Parser.getTok().getLoc(); 5787 if (Slot == -1) 5788 return MatchOperand_ParseFail; 5789 5790 Parser.Lex(); 5791 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5792 AMDGPUOperand::ImmTyInterpSlot)); 5793 return MatchOperand_Success; 5794 } 5795 5796 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5797 if (getLexer().getKind() != AsmToken::Identifier) 5798 return MatchOperand_NoMatch; 5799 5800 StringRef Str = Parser.getTok().getString(); 5801 if (!Str.startswith("attr")) 5802 return MatchOperand_NoMatch; 5803 5804 StringRef Chan = Str.take_back(2); 5805 int AttrChan = StringSwitch<int>(Chan) 5806 .Case(".x", 0) 5807 .Case(".y", 1) 5808 .Case(".z", 2) 5809 .Case(".w", 3) 5810 .Default(-1); 5811 if (AttrChan == -1) 5812 return MatchOperand_ParseFail; 5813 5814 Str = Str.drop_back(2).drop_front(4); 5815 5816 uint8_t Attr; 5817 if (Str.getAsInteger(10, Attr)) 5818 return MatchOperand_ParseFail; 5819 5820 SMLoc S = Parser.getTok().getLoc(); 5821 Parser.Lex(); 5822 if (Attr > 63) { 5823 Error(S, "out of bounds attr"); 5824 return MatchOperand_ParseFail; 5825 } 5826 5827 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5828 5829 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5830 AMDGPUOperand::ImmTyInterpAttr)); 5831 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5832 AMDGPUOperand::ImmTyAttrChan)); 5833 return MatchOperand_Success; 5834 } 5835 5836 //===----------------------------------------------------------------------===// 5837 // exp 5838 //===----------------------------------------------------------------------===// 5839 5840 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5841 uint8_t &Val) { 5842 if (Str == "null") { 5843 Val = 9; 5844 return MatchOperand_Success; 5845 } 5846 5847 if (Str.startswith("mrt")) { 5848 Str = Str.drop_front(3); 5849 if (Str == "z") { // == mrtz 5850 Val = 8; 5851 return MatchOperand_Success; 5852 } 5853 5854 if (Str.getAsInteger(10, Val)) 5855 return MatchOperand_ParseFail; 5856 5857 if (Val > 7) { 5858 Error(getLoc(), "invalid exp target"); 5859 return MatchOperand_ParseFail; 5860 } 5861 5862 return MatchOperand_Success; 5863 } 5864 5865 if (Str.startswith("pos")) { 5866 Str = Str.drop_front(3); 5867 if (Str.getAsInteger(10, Val)) 5868 return MatchOperand_ParseFail; 5869 5870 if (Val > 4 || (Val == 4 && !isGFX10())) { 5871 Error(getLoc(), "invalid exp target"); 5872 return MatchOperand_ParseFail; 5873 } 5874 5875 Val += 12; 5876 return MatchOperand_Success; 5877 } 5878 5879 if (isGFX10() && Str == "prim") { 5880 Val = 20; 5881 return MatchOperand_Success; 5882 } 5883 5884 if (Str.startswith("param")) { 5885 Str = Str.drop_front(5); 5886 if (Str.getAsInteger(10, Val)) 5887 return MatchOperand_ParseFail; 5888 5889 if (Val >= 32) { 5890 Error(getLoc(), "invalid exp target"); 5891 return MatchOperand_ParseFail; 5892 } 5893 5894 Val += 32; 5895 return MatchOperand_Success; 5896 } 5897 5898 if (Str.startswith("invalid_target_")) { 5899 Str = Str.drop_front(15); 5900 if (Str.getAsInteger(10, Val)) 5901 return MatchOperand_ParseFail; 5902 5903 Error(getLoc(), "invalid exp target"); 5904 return MatchOperand_ParseFail; 5905 } 5906 5907 return MatchOperand_NoMatch; 5908 } 5909 5910 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5911 uint8_t Val; 5912 StringRef Str = Parser.getTok().getString(); 5913 5914 auto Res = parseExpTgtImpl(Str, Val); 5915 if (Res != MatchOperand_Success) 5916 return Res; 5917 5918 SMLoc S = Parser.getTok().getLoc(); 5919 Parser.Lex(); 5920 5921 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5922 AMDGPUOperand::ImmTyExpTgt)); 5923 return MatchOperand_Success; 5924 } 5925 5926 //===----------------------------------------------------------------------===// 5927 // parser helpers 5928 //===----------------------------------------------------------------------===// 5929 5930 bool 5931 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5932 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5933 } 5934 5935 bool 5936 AMDGPUAsmParser::isId(const StringRef Id) const { 5937 return isId(getToken(), Id); 5938 } 5939 5940 bool 5941 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5942 return getTokenKind() == Kind; 5943 } 5944 5945 bool 5946 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5947 if (isId(Id)) { 5948 lex(); 5949 return true; 5950 } 5951 return false; 5952 } 5953 5954 bool 5955 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5956 if (isId(Id) && peekToken().is(Kind)) { 5957 lex(); 5958 lex(); 5959 return true; 5960 } 5961 return false; 5962 } 5963 5964 bool 5965 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5966 if (isToken(Kind)) { 5967 lex(); 5968 return true; 5969 } 5970 return false; 5971 } 5972 5973 bool 5974 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5975 const StringRef ErrMsg) { 5976 if (!trySkipToken(Kind)) { 5977 Error(getLoc(), ErrMsg); 5978 return false; 5979 } 5980 return true; 5981 } 5982 5983 bool 5984 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5985 return !getParser().parseAbsoluteExpression(Imm); 5986 } 5987 5988 bool 5989 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5990 SMLoc S = getLoc(); 5991 5992 const MCExpr *Expr; 5993 if (Parser.parseExpression(Expr)) 5994 return false; 5995 5996 int64_t IntVal; 5997 if (Expr->evaluateAsAbsolute(IntVal)) { 5998 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5999 } else { 6000 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6001 } 6002 return true; 6003 } 6004 6005 bool 6006 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6007 if (isToken(AsmToken::String)) { 6008 Val = getToken().getStringContents(); 6009 lex(); 6010 return true; 6011 } else { 6012 Error(getLoc(), ErrMsg); 6013 return false; 6014 } 6015 } 6016 6017 bool 6018 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6019 if (isToken(AsmToken::Identifier)) { 6020 Val = getTokenStr(); 6021 lex(); 6022 return true; 6023 } else { 6024 Error(getLoc(), ErrMsg); 6025 return false; 6026 } 6027 } 6028 6029 AsmToken 6030 AMDGPUAsmParser::getToken() const { 6031 return Parser.getTok(); 6032 } 6033 6034 AsmToken 6035 AMDGPUAsmParser::peekToken() { 6036 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6037 } 6038 6039 void 6040 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6041 auto TokCount = getLexer().peekTokens(Tokens); 6042 6043 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6044 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6045 } 6046 6047 AsmToken::TokenKind 6048 AMDGPUAsmParser::getTokenKind() const { 6049 return getLexer().getKind(); 6050 } 6051 6052 SMLoc 6053 AMDGPUAsmParser::getLoc() const { 6054 return getToken().getLoc(); 6055 } 6056 6057 StringRef 6058 AMDGPUAsmParser::getTokenStr() const { 6059 return getToken().getString(); 6060 } 6061 6062 void 6063 AMDGPUAsmParser::lex() { 6064 Parser.Lex(); 6065 } 6066 6067 //===----------------------------------------------------------------------===// 6068 // swizzle 6069 //===----------------------------------------------------------------------===// 6070 6071 LLVM_READNONE 6072 static unsigned 6073 encodeBitmaskPerm(const unsigned AndMask, 6074 const unsigned OrMask, 6075 const unsigned XorMask) { 6076 using namespace llvm::AMDGPU::Swizzle; 6077 6078 return BITMASK_PERM_ENC | 6079 (AndMask << BITMASK_AND_SHIFT) | 6080 (OrMask << BITMASK_OR_SHIFT) | 6081 (XorMask << BITMASK_XOR_SHIFT); 6082 } 6083 6084 bool 6085 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6086 const unsigned MinVal, 6087 const unsigned MaxVal, 6088 const StringRef ErrMsg) { 6089 for (unsigned i = 0; i < OpNum; ++i) { 6090 if (!skipToken(AsmToken::Comma, "expected a comma")){ 6091 return false; 6092 } 6093 SMLoc ExprLoc = Parser.getTok().getLoc(); 6094 if (!parseExpr(Op[i])) { 6095 return false; 6096 } 6097 if (Op[i] < MinVal || Op[i] > MaxVal) { 6098 Error(ExprLoc, ErrMsg); 6099 return false; 6100 } 6101 } 6102 6103 return true; 6104 } 6105 6106 bool 6107 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6108 using namespace llvm::AMDGPU::Swizzle; 6109 6110 int64_t Lane[LANE_NUM]; 6111 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6112 "expected a 2-bit lane id")) { 6113 Imm = QUAD_PERM_ENC; 6114 for (unsigned I = 0; I < LANE_NUM; ++I) { 6115 Imm |= Lane[I] << (LANE_SHIFT * I); 6116 } 6117 return true; 6118 } 6119 return false; 6120 } 6121 6122 bool 6123 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6124 using namespace llvm::AMDGPU::Swizzle; 6125 6126 SMLoc S = Parser.getTok().getLoc(); 6127 int64_t GroupSize; 6128 int64_t LaneIdx; 6129 6130 if (!parseSwizzleOperands(1, &GroupSize, 6131 2, 32, 6132 "group size must be in the interval [2,32]")) { 6133 return false; 6134 } 6135 if (!isPowerOf2_64(GroupSize)) { 6136 Error(S, "group size must be a power of two"); 6137 return false; 6138 } 6139 if (parseSwizzleOperands(1, &LaneIdx, 6140 0, GroupSize - 1, 6141 "lane id must be in the interval [0,group size - 1]")) { 6142 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6143 return true; 6144 } 6145 return false; 6146 } 6147 6148 bool 6149 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6150 using namespace llvm::AMDGPU::Swizzle; 6151 6152 SMLoc S = Parser.getTok().getLoc(); 6153 int64_t GroupSize; 6154 6155 if (!parseSwizzleOperands(1, &GroupSize, 6156 2, 32, "group size must be in the interval [2,32]")) { 6157 return false; 6158 } 6159 if (!isPowerOf2_64(GroupSize)) { 6160 Error(S, "group size must be a power of two"); 6161 return false; 6162 } 6163 6164 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6165 return true; 6166 } 6167 6168 bool 6169 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6170 using namespace llvm::AMDGPU::Swizzle; 6171 6172 SMLoc S = Parser.getTok().getLoc(); 6173 int64_t GroupSize; 6174 6175 if (!parseSwizzleOperands(1, &GroupSize, 6176 1, 16, "group size must be in the interval [1,16]")) { 6177 return false; 6178 } 6179 if (!isPowerOf2_64(GroupSize)) { 6180 Error(S, "group size must be a power of two"); 6181 return false; 6182 } 6183 6184 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6185 return true; 6186 } 6187 6188 bool 6189 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6190 using namespace llvm::AMDGPU::Swizzle; 6191 6192 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6193 return false; 6194 } 6195 6196 StringRef Ctl; 6197 SMLoc StrLoc = Parser.getTok().getLoc(); 6198 if (!parseString(Ctl)) { 6199 return false; 6200 } 6201 if (Ctl.size() != BITMASK_WIDTH) { 6202 Error(StrLoc, "expected a 5-character mask"); 6203 return false; 6204 } 6205 6206 unsigned AndMask = 0; 6207 unsigned OrMask = 0; 6208 unsigned XorMask = 0; 6209 6210 for (size_t i = 0; i < Ctl.size(); ++i) { 6211 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6212 switch(Ctl[i]) { 6213 default: 6214 Error(StrLoc, "invalid mask"); 6215 return false; 6216 case '0': 6217 break; 6218 case '1': 6219 OrMask |= Mask; 6220 break; 6221 case 'p': 6222 AndMask |= Mask; 6223 break; 6224 case 'i': 6225 AndMask |= Mask; 6226 XorMask |= Mask; 6227 break; 6228 } 6229 } 6230 6231 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6232 return true; 6233 } 6234 6235 bool 6236 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6237 6238 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6239 6240 if (!parseExpr(Imm)) { 6241 return false; 6242 } 6243 if (!isUInt<16>(Imm)) { 6244 Error(OffsetLoc, "expected a 16-bit offset"); 6245 return false; 6246 } 6247 return true; 6248 } 6249 6250 bool 6251 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6252 using namespace llvm::AMDGPU::Swizzle; 6253 6254 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6255 6256 SMLoc ModeLoc = Parser.getTok().getLoc(); 6257 bool Ok = false; 6258 6259 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6260 Ok = parseSwizzleQuadPerm(Imm); 6261 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6262 Ok = parseSwizzleBitmaskPerm(Imm); 6263 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6264 Ok = parseSwizzleBroadcast(Imm); 6265 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6266 Ok = parseSwizzleSwap(Imm); 6267 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6268 Ok = parseSwizzleReverse(Imm); 6269 } else { 6270 Error(ModeLoc, "expected a swizzle mode"); 6271 } 6272 6273 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6274 } 6275 6276 return false; 6277 } 6278 6279 OperandMatchResultTy 6280 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6281 SMLoc S = Parser.getTok().getLoc(); 6282 int64_t Imm = 0; 6283 6284 if (trySkipId("offset")) { 6285 6286 bool Ok = false; 6287 if (skipToken(AsmToken::Colon, "expected a colon")) { 6288 if (trySkipId("swizzle")) { 6289 Ok = parseSwizzleMacro(Imm); 6290 } else { 6291 Ok = parseSwizzleOffset(Imm); 6292 } 6293 } 6294 6295 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6296 6297 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6298 } else { 6299 // Swizzle "offset" operand is optional. 6300 // If it is omitted, try parsing other optional operands. 6301 return parseOptionalOpr(Operands); 6302 } 6303 } 6304 6305 bool 6306 AMDGPUOperand::isSwizzle() const { 6307 return isImmTy(ImmTySwizzle); 6308 } 6309 6310 //===----------------------------------------------------------------------===// 6311 // VGPR Index Mode 6312 //===----------------------------------------------------------------------===// 6313 6314 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6315 6316 using namespace llvm::AMDGPU::VGPRIndexMode; 6317 6318 if (trySkipToken(AsmToken::RParen)) { 6319 return OFF; 6320 } 6321 6322 int64_t Imm = 0; 6323 6324 while (true) { 6325 unsigned Mode = 0; 6326 SMLoc S = Parser.getTok().getLoc(); 6327 6328 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6329 if (trySkipId(IdSymbolic[ModeId])) { 6330 Mode = 1 << ModeId; 6331 break; 6332 } 6333 } 6334 6335 if (Mode == 0) { 6336 Error(S, (Imm == 0)? 6337 "expected a VGPR index mode or a closing parenthesis" : 6338 "expected a VGPR index mode"); 6339 return UNDEF; 6340 } 6341 6342 if (Imm & Mode) { 6343 Error(S, "duplicate VGPR index mode"); 6344 return UNDEF; 6345 } 6346 Imm |= Mode; 6347 6348 if (trySkipToken(AsmToken::RParen)) 6349 break; 6350 if (!skipToken(AsmToken::Comma, 6351 "expected a comma or a closing parenthesis")) 6352 return UNDEF; 6353 } 6354 6355 return Imm; 6356 } 6357 6358 OperandMatchResultTy 6359 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6360 6361 using namespace llvm::AMDGPU::VGPRIndexMode; 6362 6363 int64_t Imm = 0; 6364 SMLoc S = Parser.getTok().getLoc(); 6365 6366 if (getLexer().getKind() == AsmToken::Identifier && 6367 Parser.getTok().getString() == "gpr_idx" && 6368 getLexer().peekTok().is(AsmToken::LParen)) { 6369 6370 Parser.Lex(); 6371 Parser.Lex(); 6372 6373 Imm = parseGPRIdxMacro(); 6374 if (Imm == UNDEF) 6375 return MatchOperand_ParseFail; 6376 6377 } else { 6378 if (getParser().parseAbsoluteExpression(Imm)) 6379 return MatchOperand_ParseFail; 6380 if (Imm < 0 || !isUInt<4>(Imm)) { 6381 Error(S, "invalid immediate: only 4-bit values are legal"); 6382 return MatchOperand_ParseFail; 6383 } 6384 } 6385 6386 Operands.push_back( 6387 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6388 return MatchOperand_Success; 6389 } 6390 6391 bool AMDGPUOperand::isGPRIdxMode() const { 6392 return isImmTy(ImmTyGprIdxMode); 6393 } 6394 6395 //===----------------------------------------------------------------------===// 6396 // sopp branch targets 6397 //===----------------------------------------------------------------------===// 6398 6399 OperandMatchResultTy 6400 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6401 6402 // Make sure we are not parsing something 6403 // that looks like a label or an expression but is not. 6404 // This will improve error messages. 6405 if (isRegister() || isModifier()) 6406 return MatchOperand_NoMatch; 6407 6408 if (!parseExpr(Operands)) 6409 return MatchOperand_ParseFail; 6410 6411 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6412 assert(Opr.isImm() || Opr.isExpr()); 6413 SMLoc Loc = Opr.getStartLoc(); 6414 6415 // Currently we do not support arbitrary expressions as branch targets. 6416 // Only labels and absolute expressions are accepted. 6417 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6418 Error(Loc, "expected an absolute expression or a label"); 6419 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6420 Error(Loc, "expected a 16-bit signed jump offset"); 6421 } 6422 6423 return MatchOperand_Success; 6424 } 6425 6426 //===----------------------------------------------------------------------===// 6427 // Boolean holding registers 6428 //===----------------------------------------------------------------------===// 6429 6430 OperandMatchResultTy 6431 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6432 return parseReg(Operands); 6433 } 6434 6435 //===----------------------------------------------------------------------===// 6436 // mubuf 6437 //===----------------------------------------------------------------------===// 6438 6439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6440 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6441 } 6442 6443 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6444 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6445 } 6446 6447 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6448 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6449 } 6450 6451 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6452 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6453 } 6454 6455 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6456 const OperandVector &Operands, 6457 bool IsAtomic, 6458 bool IsAtomicReturn, 6459 bool IsLds) { 6460 bool IsLdsOpcode = IsLds; 6461 bool HasLdsModifier = false; 6462 OptionalImmIndexMap OptionalIdx; 6463 assert(IsAtomicReturn ? IsAtomic : true); 6464 unsigned FirstOperandIdx = 1; 6465 6466 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6467 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6468 6469 // Add the register arguments 6470 if (Op.isReg()) { 6471 Op.addRegOperands(Inst, 1); 6472 // Insert a tied src for atomic return dst. 6473 // This cannot be postponed as subsequent calls to 6474 // addImmOperands rely on correct number of MC operands. 6475 if (IsAtomicReturn && i == FirstOperandIdx) 6476 Op.addRegOperands(Inst, 1); 6477 continue; 6478 } 6479 6480 // Handle the case where soffset is an immediate 6481 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6482 Op.addImmOperands(Inst, 1); 6483 continue; 6484 } 6485 6486 HasLdsModifier |= Op.isLDS(); 6487 6488 // Handle tokens like 'offen' which are sometimes hard-coded into the 6489 // asm string. There are no MCInst operands for these. 6490 if (Op.isToken()) { 6491 continue; 6492 } 6493 assert(Op.isImm()); 6494 6495 // Handle optional arguments 6496 OptionalIdx[Op.getImmTy()] = i; 6497 } 6498 6499 // This is a workaround for an llvm quirk which may result in an 6500 // incorrect instruction selection. Lds and non-lds versions of 6501 // MUBUF instructions are identical except that lds versions 6502 // have mandatory 'lds' modifier. However this modifier follows 6503 // optional modifiers and llvm asm matcher regards this 'lds' 6504 // modifier as an optional one. As a result, an lds version 6505 // of opcode may be selected even if it has no 'lds' modifier. 6506 if (IsLdsOpcode && !HasLdsModifier) { 6507 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6508 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6509 Inst.setOpcode(NoLdsOpcode); 6510 IsLdsOpcode = false; 6511 } 6512 } 6513 6514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6515 if (!IsAtomic || IsAtomicReturn) { 6516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6517 } 6518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6519 6520 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6522 } 6523 6524 if (isGFX10()) 6525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6526 } 6527 6528 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6529 OptionalImmIndexMap OptionalIdx; 6530 6531 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6532 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6533 6534 // Add the register arguments 6535 if (Op.isReg()) { 6536 Op.addRegOperands(Inst, 1); 6537 continue; 6538 } 6539 6540 // Handle the case where soffset is an immediate 6541 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6542 Op.addImmOperands(Inst, 1); 6543 continue; 6544 } 6545 6546 // Handle tokens like 'offen' which are sometimes hard-coded into the 6547 // asm string. There are no MCInst operands for these. 6548 if (Op.isToken()) { 6549 continue; 6550 } 6551 assert(Op.isImm()); 6552 6553 // Handle optional arguments 6554 OptionalIdx[Op.getImmTy()] = i; 6555 } 6556 6557 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6558 AMDGPUOperand::ImmTyOffset); 6559 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6560 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6563 6564 if (isGFX10()) 6565 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6566 } 6567 6568 //===----------------------------------------------------------------------===// 6569 // mimg 6570 //===----------------------------------------------------------------------===// 6571 6572 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6573 bool IsAtomic) { 6574 unsigned I = 1; 6575 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6576 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6577 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6578 } 6579 6580 if (IsAtomic) { 6581 // Add src, same as dst 6582 assert(Desc.getNumDefs() == 1); 6583 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6584 } 6585 6586 OptionalImmIndexMap OptionalIdx; 6587 6588 for (unsigned E = Operands.size(); I != E; ++I) { 6589 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6590 6591 // Add the register arguments 6592 if (Op.isReg()) { 6593 Op.addRegOperands(Inst, 1); 6594 } else if (Op.isImmModifier()) { 6595 OptionalIdx[Op.getImmTy()] = I; 6596 } else if (!Op.isToken()) { 6597 llvm_unreachable("unexpected operand type"); 6598 } 6599 } 6600 6601 bool IsGFX10 = isGFX10(); 6602 6603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6604 if (IsGFX10) 6605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6606 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6607 if (IsGFX10) 6608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6612 if (IsGFX10) 6613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6616 if (!IsGFX10) 6617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6619 } 6620 6621 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6622 cvtMIMG(Inst, Operands, true); 6623 } 6624 6625 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6626 const OperandVector &Operands) { 6627 for (unsigned I = 1; I < Operands.size(); ++I) { 6628 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6629 if (Operand.isReg()) 6630 Operand.addRegOperands(Inst, 1); 6631 } 6632 6633 Inst.addOperand(MCOperand::createImm(1)); // a16 6634 } 6635 6636 //===----------------------------------------------------------------------===// 6637 // smrd 6638 //===----------------------------------------------------------------------===// 6639 6640 bool AMDGPUOperand::isSMRDOffset8() const { 6641 return isImm() && isUInt<8>(getImm()); 6642 } 6643 6644 bool AMDGPUOperand::isSMEMOffset() const { 6645 return isImm(); // Offset range is checked later by validator. 6646 } 6647 6648 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6649 // 32-bit literals are only supported on CI and we only want to use them 6650 // when the offset is > 8-bits. 6651 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6652 } 6653 6654 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6655 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6656 } 6657 6658 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6659 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6660 } 6661 6662 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6663 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6664 } 6665 6666 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6667 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6668 } 6669 6670 //===----------------------------------------------------------------------===// 6671 // vop3 6672 //===----------------------------------------------------------------------===// 6673 6674 static bool ConvertOmodMul(int64_t &Mul) { 6675 if (Mul != 1 && Mul != 2 && Mul != 4) 6676 return false; 6677 6678 Mul >>= 1; 6679 return true; 6680 } 6681 6682 static bool ConvertOmodDiv(int64_t &Div) { 6683 if (Div == 1) { 6684 Div = 0; 6685 return true; 6686 } 6687 6688 if (Div == 2) { 6689 Div = 3; 6690 return true; 6691 } 6692 6693 return false; 6694 } 6695 6696 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6697 if (BoundCtrl == 0) { 6698 BoundCtrl = 1; 6699 return true; 6700 } 6701 6702 if (BoundCtrl == -1) { 6703 BoundCtrl = 0; 6704 return true; 6705 } 6706 6707 return false; 6708 } 6709 6710 // Note: the order in this table matches the order of operands in AsmString. 6711 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6712 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6713 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6714 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6715 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6716 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6717 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6718 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6719 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6720 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6721 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6722 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6723 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6724 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6725 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6726 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6727 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6728 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6729 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6730 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6731 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6732 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6733 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6734 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6735 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6736 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6737 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6738 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6739 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6740 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6741 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6742 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6743 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6744 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6745 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6746 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6747 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6748 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6749 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6750 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6751 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6752 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6753 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6754 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6755 }; 6756 6757 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6758 6759 OperandMatchResultTy res = parseOptionalOpr(Operands); 6760 6761 // This is a hack to enable hardcoded mandatory operands which follow 6762 // optional operands. 6763 // 6764 // Current design assumes that all operands after the first optional operand 6765 // are also optional. However implementation of some instructions violates 6766 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6767 // 6768 // To alleviate this problem, we have to (implicitly) parse extra operands 6769 // to make sure autogenerated parser of custom operands never hit hardcoded 6770 // mandatory operands. 6771 6772 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6773 if (res != MatchOperand_Success || 6774 isToken(AsmToken::EndOfStatement)) 6775 break; 6776 6777 trySkipToken(AsmToken::Comma); 6778 res = parseOptionalOpr(Operands); 6779 } 6780 6781 return res; 6782 } 6783 6784 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6785 OperandMatchResultTy res; 6786 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6787 // try to parse any optional operand here 6788 if (Op.IsBit) { 6789 res = parseNamedBit(Op.Name, Operands, Op.Type); 6790 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6791 res = parseOModOperand(Operands); 6792 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6793 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6794 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6795 res = parseSDWASel(Operands, Op.Name, Op.Type); 6796 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6797 res = parseSDWADstUnused(Operands); 6798 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6799 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6800 Op.Type == AMDGPUOperand::ImmTyNegLo || 6801 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6802 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6803 Op.ConvertResult); 6804 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6805 res = parseDim(Operands); 6806 } else { 6807 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6808 } 6809 if (res != MatchOperand_NoMatch) { 6810 return res; 6811 } 6812 } 6813 return MatchOperand_NoMatch; 6814 } 6815 6816 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6817 StringRef Name = Parser.getTok().getString(); 6818 if (Name == "mul") { 6819 return parseIntWithPrefix("mul", Operands, 6820 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6821 } 6822 6823 if (Name == "div") { 6824 return parseIntWithPrefix("div", Operands, 6825 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6826 } 6827 6828 return MatchOperand_NoMatch; 6829 } 6830 6831 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6832 cvtVOP3P(Inst, Operands); 6833 6834 int Opc = Inst.getOpcode(); 6835 6836 int SrcNum; 6837 const int Ops[] = { AMDGPU::OpName::src0, 6838 AMDGPU::OpName::src1, 6839 AMDGPU::OpName::src2 }; 6840 for (SrcNum = 0; 6841 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6842 ++SrcNum); 6843 assert(SrcNum > 0); 6844 6845 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6846 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6847 6848 if ((OpSel & (1 << SrcNum)) != 0) { 6849 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6850 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6851 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6852 } 6853 } 6854 6855 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6856 // 1. This operand is input modifiers 6857 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6858 // 2. This is not last operand 6859 && Desc.NumOperands > (OpNum + 1) 6860 // 3. Next operand is register class 6861 && Desc.OpInfo[OpNum + 1].RegClass != -1 6862 // 4. Next register is not tied to any other operand 6863 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6864 } 6865 6866 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6867 { 6868 OptionalImmIndexMap OptionalIdx; 6869 unsigned Opc = Inst.getOpcode(); 6870 6871 unsigned I = 1; 6872 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6873 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6874 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6875 } 6876 6877 for (unsigned E = Operands.size(); I != E; ++I) { 6878 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6879 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6880 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6881 } else if (Op.isInterpSlot() || 6882 Op.isInterpAttr() || 6883 Op.isAttrChan()) { 6884 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6885 } else if (Op.isImmModifier()) { 6886 OptionalIdx[Op.getImmTy()] = I; 6887 } else { 6888 llvm_unreachable("unhandled operand type"); 6889 } 6890 } 6891 6892 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6894 } 6895 6896 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6897 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6898 } 6899 6900 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6901 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6902 } 6903 } 6904 6905 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6906 OptionalImmIndexMap &OptionalIdx) { 6907 unsigned Opc = Inst.getOpcode(); 6908 6909 unsigned I = 1; 6910 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6911 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6912 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6913 } 6914 6915 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6916 // This instruction has src modifiers 6917 for (unsigned E = Operands.size(); I != E; ++I) { 6918 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6919 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6920 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6921 } else if (Op.isImmModifier()) { 6922 OptionalIdx[Op.getImmTy()] = I; 6923 } else if (Op.isRegOrImm()) { 6924 Op.addRegOrImmOperands(Inst, 1); 6925 } else { 6926 llvm_unreachable("unhandled operand type"); 6927 } 6928 } 6929 } else { 6930 // No src modifiers 6931 for (unsigned E = Operands.size(); I != E; ++I) { 6932 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6933 if (Op.isMod()) { 6934 OptionalIdx[Op.getImmTy()] = I; 6935 } else { 6936 Op.addRegOrImmOperands(Inst, 1); 6937 } 6938 } 6939 } 6940 6941 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6943 } 6944 6945 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6947 } 6948 6949 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6950 // it has src2 register operand that is tied to dst operand 6951 // we don't allow modifiers for this operand in assembler so src2_modifiers 6952 // should be 0. 6953 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6954 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6955 Opc == AMDGPU::V_MAC_F32_e64_vi || 6956 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 6957 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 6958 Opc == AMDGPU::V_MAC_F16_e64_vi || 6959 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6960 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6961 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 6962 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6963 auto it = Inst.begin(); 6964 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6965 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6966 ++it; 6967 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6968 } 6969 } 6970 6971 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6972 OptionalImmIndexMap OptionalIdx; 6973 cvtVOP3(Inst, Operands, OptionalIdx); 6974 } 6975 6976 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6977 const OperandVector &Operands) { 6978 OptionalImmIndexMap OptIdx; 6979 const int Opc = Inst.getOpcode(); 6980 const MCInstrDesc &Desc = MII.get(Opc); 6981 6982 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6983 6984 cvtVOP3(Inst, Operands, OptIdx); 6985 6986 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6987 assert(!IsPacked); 6988 Inst.addOperand(Inst.getOperand(0)); 6989 } 6990 6991 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6992 // instruction, and then figure out where to actually put the modifiers 6993 6994 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6995 6996 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6997 if (OpSelHiIdx != -1) { 6998 int DefaultVal = IsPacked ? -1 : 0; 6999 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7000 DefaultVal); 7001 } 7002 7003 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7004 if (NegLoIdx != -1) { 7005 assert(IsPacked); 7006 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7007 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7008 } 7009 7010 const int Ops[] = { AMDGPU::OpName::src0, 7011 AMDGPU::OpName::src1, 7012 AMDGPU::OpName::src2 }; 7013 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7014 AMDGPU::OpName::src1_modifiers, 7015 AMDGPU::OpName::src2_modifiers }; 7016 7017 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7018 7019 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7020 unsigned OpSelHi = 0; 7021 unsigned NegLo = 0; 7022 unsigned NegHi = 0; 7023 7024 if (OpSelHiIdx != -1) { 7025 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7026 } 7027 7028 if (NegLoIdx != -1) { 7029 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7030 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7031 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7032 } 7033 7034 for (int J = 0; J < 3; ++J) { 7035 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7036 if (OpIdx == -1) 7037 break; 7038 7039 uint32_t ModVal = 0; 7040 7041 if ((OpSel & (1 << J)) != 0) 7042 ModVal |= SISrcMods::OP_SEL_0; 7043 7044 if ((OpSelHi & (1 << J)) != 0) 7045 ModVal |= SISrcMods::OP_SEL_1; 7046 7047 if ((NegLo & (1 << J)) != 0) 7048 ModVal |= SISrcMods::NEG; 7049 7050 if ((NegHi & (1 << J)) != 0) 7051 ModVal |= SISrcMods::NEG_HI; 7052 7053 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7054 7055 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7056 } 7057 } 7058 7059 //===----------------------------------------------------------------------===// 7060 // dpp 7061 //===----------------------------------------------------------------------===// 7062 7063 bool AMDGPUOperand::isDPP8() const { 7064 return isImmTy(ImmTyDPP8); 7065 } 7066 7067 bool AMDGPUOperand::isDPPCtrl() const { 7068 using namespace AMDGPU::DPP; 7069 7070 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7071 if (result) { 7072 int64_t Imm = getImm(); 7073 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7074 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7075 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7076 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7077 (Imm == DppCtrl::WAVE_SHL1) || 7078 (Imm == DppCtrl::WAVE_ROL1) || 7079 (Imm == DppCtrl::WAVE_SHR1) || 7080 (Imm == DppCtrl::WAVE_ROR1) || 7081 (Imm == DppCtrl::ROW_MIRROR) || 7082 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7083 (Imm == DppCtrl::BCAST15) || 7084 (Imm == DppCtrl::BCAST31) || 7085 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7086 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7087 } 7088 return false; 7089 } 7090 7091 //===----------------------------------------------------------------------===// 7092 // mAI 7093 //===----------------------------------------------------------------------===// 7094 7095 bool AMDGPUOperand::isBLGP() const { 7096 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7097 } 7098 7099 bool AMDGPUOperand::isCBSZ() const { 7100 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7101 } 7102 7103 bool AMDGPUOperand::isABID() const { 7104 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7105 } 7106 7107 bool AMDGPUOperand::isS16Imm() const { 7108 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7109 } 7110 7111 bool AMDGPUOperand::isU16Imm() const { 7112 return isImm() && isUInt<16>(getImm()); 7113 } 7114 7115 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7116 if (!isGFX10()) 7117 return MatchOperand_NoMatch; 7118 7119 SMLoc S = Parser.getTok().getLoc(); 7120 7121 if (getLexer().isNot(AsmToken::Identifier)) 7122 return MatchOperand_NoMatch; 7123 if (getLexer().getTok().getString() != "dim") 7124 return MatchOperand_NoMatch; 7125 7126 Parser.Lex(); 7127 if (getLexer().isNot(AsmToken::Colon)) 7128 return MatchOperand_ParseFail; 7129 7130 Parser.Lex(); 7131 7132 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7133 // integer. 7134 std::string Token; 7135 if (getLexer().is(AsmToken::Integer)) { 7136 SMLoc Loc = getLexer().getTok().getEndLoc(); 7137 Token = std::string(getLexer().getTok().getString()); 7138 Parser.Lex(); 7139 if (getLexer().getTok().getLoc() != Loc) 7140 return MatchOperand_ParseFail; 7141 } 7142 if (getLexer().isNot(AsmToken::Identifier)) 7143 return MatchOperand_ParseFail; 7144 Token += getLexer().getTok().getString(); 7145 7146 StringRef DimId = Token; 7147 if (DimId.startswith("SQ_RSRC_IMG_")) 7148 DimId = DimId.substr(12); 7149 7150 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7151 if (!DimInfo) 7152 return MatchOperand_ParseFail; 7153 7154 Parser.Lex(); 7155 7156 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7157 AMDGPUOperand::ImmTyDim)); 7158 return MatchOperand_Success; 7159 } 7160 7161 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7162 SMLoc S = Parser.getTok().getLoc(); 7163 StringRef Prefix; 7164 7165 if (getLexer().getKind() == AsmToken::Identifier) { 7166 Prefix = Parser.getTok().getString(); 7167 } else { 7168 return MatchOperand_NoMatch; 7169 } 7170 7171 if (Prefix != "dpp8") 7172 return parseDPPCtrl(Operands); 7173 if (!isGFX10()) 7174 return MatchOperand_NoMatch; 7175 7176 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7177 7178 int64_t Sels[8]; 7179 7180 Parser.Lex(); 7181 if (getLexer().isNot(AsmToken::Colon)) 7182 return MatchOperand_ParseFail; 7183 7184 Parser.Lex(); 7185 if (getLexer().isNot(AsmToken::LBrac)) 7186 return MatchOperand_ParseFail; 7187 7188 Parser.Lex(); 7189 if (getParser().parseAbsoluteExpression(Sels[0])) 7190 return MatchOperand_ParseFail; 7191 if (0 > Sels[0] || 7 < Sels[0]) 7192 return MatchOperand_ParseFail; 7193 7194 for (size_t i = 1; i < 8; ++i) { 7195 if (getLexer().isNot(AsmToken::Comma)) 7196 return MatchOperand_ParseFail; 7197 7198 Parser.Lex(); 7199 if (getParser().parseAbsoluteExpression(Sels[i])) 7200 return MatchOperand_ParseFail; 7201 if (0 > Sels[i] || 7 < Sels[i]) 7202 return MatchOperand_ParseFail; 7203 } 7204 7205 if (getLexer().isNot(AsmToken::RBrac)) 7206 return MatchOperand_ParseFail; 7207 Parser.Lex(); 7208 7209 unsigned DPP8 = 0; 7210 for (size_t i = 0; i < 8; ++i) 7211 DPP8 |= (Sels[i] << (i * 3)); 7212 7213 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7214 return MatchOperand_Success; 7215 } 7216 7217 OperandMatchResultTy 7218 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7219 using namespace AMDGPU::DPP; 7220 7221 SMLoc S = Parser.getTok().getLoc(); 7222 StringRef Prefix; 7223 int64_t Int; 7224 7225 if (getLexer().getKind() == AsmToken::Identifier) { 7226 Prefix = Parser.getTok().getString(); 7227 } else { 7228 return MatchOperand_NoMatch; 7229 } 7230 7231 if (Prefix == "row_mirror") { 7232 Int = DppCtrl::ROW_MIRROR; 7233 Parser.Lex(); 7234 } else if (Prefix == "row_half_mirror") { 7235 Int = DppCtrl::ROW_HALF_MIRROR; 7236 Parser.Lex(); 7237 } else { 7238 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7239 if (Prefix != "quad_perm" 7240 && Prefix != "row_shl" 7241 && Prefix != "row_shr" 7242 && Prefix != "row_ror" 7243 && Prefix != "wave_shl" 7244 && Prefix != "wave_rol" 7245 && Prefix != "wave_shr" 7246 && Prefix != "wave_ror" 7247 && Prefix != "row_bcast" 7248 && Prefix != "row_share" 7249 && Prefix != "row_xmask") { 7250 return MatchOperand_NoMatch; 7251 } 7252 7253 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7254 return MatchOperand_NoMatch; 7255 7256 if (!isVI() && !isGFX9() && 7257 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7258 Prefix == "wave_rol" || Prefix == "wave_ror" || 7259 Prefix == "row_bcast")) 7260 return MatchOperand_NoMatch; 7261 7262 Parser.Lex(); 7263 if (getLexer().isNot(AsmToken::Colon)) 7264 return MatchOperand_ParseFail; 7265 7266 if (Prefix == "quad_perm") { 7267 // quad_perm:[%d,%d,%d,%d] 7268 Parser.Lex(); 7269 if (getLexer().isNot(AsmToken::LBrac)) 7270 return MatchOperand_ParseFail; 7271 Parser.Lex(); 7272 7273 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7274 return MatchOperand_ParseFail; 7275 7276 for (int i = 0; i < 3; ++i) { 7277 if (getLexer().isNot(AsmToken::Comma)) 7278 return MatchOperand_ParseFail; 7279 Parser.Lex(); 7280 7281 int64_t Temp; 7282 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7283 return MatchOperand_ParseFail; 7284 const int shift = i*2 + 2; 7285 Int += (Temp << shift); 7286 } 7287 7288 if (getLexer().isNot(AsmToken::RBrac)) 7289 return MatchOperand_ParseFail; 7290 Parser.Lex(); 7291 } else { 7292 // sel:%d 7293 Parser.Lex(); 7294 if (getParser().parseAbsoluteExpression(Int)) 7295 return MatchOperand_ParseFail; 7296 7297 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7298 Int |= DppCtrl::ROW_SHL0; 7299 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7300 Int |= DppCtrl::ROW_SHR0; 7301 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7302 Int |= DppCtrl::ROW_ROR0; 7303 } else if (Prefix == "wave_shl" && 1 == Int) { 7304 Int = DppCtrl::WAVE_SHL1; 7305 } else if (Prefix == "wave_rol" && 1 == Int) { 7306 Int = DppCtrl::WAVE_ROL1; 7307 } else if (Prefix == "wave_shr" && 1 == Int) { 7308 Int = DppCtrl::WAVE_SHR1; 7309 } else if (Prefix == "wave_ror" && 1 == Int) { 7310 Int = DppCtrl::WAVE_ROR1; 7311 } else if (Prefix == "row_bcast") { 7312 if (Int == 15) { 7313 Int = DppCtrl::BCAST15; 7314 } else if (Int == 31) { 7315 Int = DppCtrl::BCAST31; 7316 } else { 7317 return MatchOperand_ParseFail; 7318 } 7319 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7320 Int |= DppCtrl::ROW_SHARE_FIRST; 7321 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7322 Int |= DppCtrl::ROW_XMASK_FIRST; 7323 } else { 7324 return MatchOperand_ParseFail; 7325 } 7326 } 7327 } 7328 7329 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7330 return MatchOperand_Success; 7331 } 7332 7333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7334 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7335 } 7336 7337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7338 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7339 } 7340 7341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7342 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7343 } 7344 7345 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7346 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7347 } 7348 7349 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7350 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7351 } 7352 7353 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7354 OptionalImmIndexMap OptionalIdx; 7355 7356 unsigned I = 1; 7357 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7358 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7359 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7360 } 7361 7362 int Fi = 0; 7363 for (unsigned E = Operands.size(); I != E; ++I) { 7364 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7365 MCOI::TIED_TO); 7366 if (TiedTo != -1) { 7367 assert((unsigned)TiedTo < Inst.getNumOperands()); 7368 // handle tied old or src2 for MAC instructions 7369 Inst.addOperand(Inst.getOperand(TiedTo)); 7370 } 7371 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7372 // Add the register arguments 7373 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7374 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7375 // Skip it. 7376 continue; 7377 } 7378 7379 if (IsDPP8) { 7380 if (Op.isDPP8()) { 7381 Op.addImmOperands(Inst, 1); 7382 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7383 Op.addRegWithFPInputModsOperands(Inst, 2); 7384 } else if (Op.isFI()) { 7385 Fi = Op.getImm(); 7386 } else if (Op.isReg()) { 7387 Op.addRegOperands(Inst, 1); 7388 } else { 7389 llvm_unreachable("Invalid operand type"); 7390 } 7391 } else { 7392 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7393 Op.addRegWithFPInputModsOperands(Inst, 2); 7394 } else if (Op.isDPPCtrl()) { 7395 Op.addImmOperands(Inst, 1); 7396 } else if (Op.isImm()) { 7397 // Handle optional arguments 7398 OptionalIdx[Op.getImmTy()] = I; 7399 } else { 7400 llvm_unreachable("Invalid operand type"); 7401 } 7402 } 7403 } 7404 7405 if (IsDPP8) { 7406 using namespace llvm::AMDGPU::DPP; 7407 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7408 } else { 7409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7410 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7412 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7413 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7414 } 7415 } 7416 } 7417 7418 //===----------------------------------------------------------------------===// 7419 // sdwa 7420 //===----------------------------------------------------------------------===// 7421 7422 OperandMatchResultTy 7423 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7424 AMDGPUOperand::ImmTy Type) { 7425 using namespace llvm::AMDGPU::SDWA; 7426 7427 SMLoc S = Parser.getTok().getLoc(); 7428 StringRef Value; 7429 OperandMatchResultTy res; 7430 7431 res = parseStringWithPrefix(Prefix, Value); 7432 if (res != MatchOperand_Success) { 7433 return res; 7434 } 7435 7436 int64_t Int; 7437 Int = StringSwitch<int64_t>(Value) 7438 .Case("BYTE_0", SdwaSel::BYTE_0) 7439 .Case("BYTE_1", SdwaSel::BYTE_1) 7440 .Case("BYTE_2", SdwaSel::BYTE_2) 7441 .Case("BYTE_3", SdwaSel::BYTE_3) 7442 .Case("WORD_0", SdwaSel::WORD_0) 7443 .Case("WORD_1", SdwaSel::WORD_1) 7444 .Case("DWORD", SdwaSel::DWORD) 7445 .Default(0xffffffff); 7446 Parser.Lex(); // eat last token 7447 7448 if (Int == 0xffffffff) { 7449 return MatchOperand_ParseFail; 7450 } 7451 7452 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7453 return MatchOperand_Success; 7454 } 7455 7456 OperandMatchResultTy 7457 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7458 using namespace llvm::AMDGPU::SDWA; 7459 7460 SMLoc S = Parser.getTok().getLoc(); 7461 StringRef Value; 7462 OperandMatchResultTy res; 7463 7464 res = parseStringWithPrefix("dst_unused", Value); 7465 if (res != MatchOperand_Success) { 7466 return res; 7467 } 7468 7469 int64_t Int; 7470 Int = StringSwitch<int64_t>(Value) 7471 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7472 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7473 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7474 .Default(0xffffffff); 7475 Parser.Lex(); // eat last token 7476 7477 if (Int == 0xffffffff) { 7478 return MatchOperand_ParseFail; 7479 } 7480 7481 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7482 return MatchOperand_Success; 7483 } 7484 7485 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7486 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7487 } 7488 7489 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7490 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7491 } 7492 7493 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7494 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7495 } 7496 7497 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7498 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7499 } 7500 7501 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7502 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7503 } 7504 7505 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7506 uint64_t BasicInstType, 7507 bool SkipDstVcc, 7508 bool SkipSrcVcc) { 7509 using namespace llvm::AMDGPU::SDWA; 7510 7511 OptionalImmIndexMap OptionalIdx; 7512 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7513 bool SkippedVcc = false; 7514 7515 unsigned I = 1; 7516 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7517 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7518 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7519 } 7520 7521 for (unsigned E = Operands.size(); I != E; ++I) { 7522 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7523 if (SkipVcc && !SkippedVcc && Op.isReg() && 7524 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7525 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7526 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7527 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7528 // Skip VCC only if we didn't skip it on previous iteration. 7529 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7530 if (BasicInstType == SIInstrFlags::VOP2 && 7531 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7532 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7533 SkippedVcc = true; 7534 continue; 7535 } else if (BasicInstType == SIInstrFlags::VOPC && 7536 Inst.getNumOperands() == 0) { 7537 SkippedVcc = true; 7538 continue; 7539 } 7540 } 7541 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7542 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7543 } else if (Op.isImm()) { 7544 // Handle optional arguments 7545 OptionalIdx[Op.getImmTy()] = I; 7546 } else { 7547 llvm_unreachable("Invalid operand type"); 7548 } 7549 SkippedVcc = false; 7550 } 7551 7552 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7553 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7554 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7555 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7556 switch (BasicInstType) { 7557 case SIInstrFlags::VOP1: 7558 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7559 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7560 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7561 } 7562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7564 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7565 break; 7566 7567 case SIInstrFlags::VOP2: 7568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7569 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7571 } 7572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7576 break; 7577 7578 case SIInstrFlags::VOPC: 7579 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7583 break; 7584 7585 default: 7586 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7587 } 7588 } 7589 7590 // special case v_mac_{f16, f32}: 7591 // it has src2 register operand that is tied to dst operand 7592 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7593 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7594 auto it = Inst.begin(); 7595 std::advance( 7596 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7597 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7598 } 7599 } 7600 7601 //===----------------------------------------------------------------------===// 7602 // mAI 7603 //===----------------------------------------------------------------------===// 7604 7605 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7606 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7607 } 7608 7609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7610 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7611 } 7612 7613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7614 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7615 } 7616 7617 /// Force static initialization. 7618 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7619 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7620 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7621 } 7622 7623 #define GET_REGISTER_MATCHER 7624 #define GET_MATCHER_IMPLEMENTATION 7625 #define GET_MNEMONIC_SPELL_CHECKER 7626 #define GET_MNEMONIC_CHECKER 7627 #include "AMDGPUGenAsmMatcher.inc" 7628 7629 // This fuction should be defined after auto-generated include so that we have 7630 // MatchClassKind enum defined 7631 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7632 unsigned Kind) { 7633 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7634 // But MatchInstructionImpl() expects to meet token and fails to validate 7635 // operand. This method checks if we are given immediate operand but expect to 7636 // get corresponding token. 7637 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7638 switch (Kind) { 7639 case MCK_addr64: 7640 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7641 case MCK_gds: 7642 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7643 case MCK_lds: 7644 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7645 case MCK_glc: 7646 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7647 case MCK_idxen: 7648 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7649 case MCK_offen: 7650 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7651 case MCK_SSrcB32: 7652 // When operands have expression values, they will return true for isToken, 7653 // because it is not possible to distinguish between a token and an 7654 // expression at parse time. MatchInstructionImpl() will always try to 7655 // match an operand as a token, when isToken returns true, and when the 7656 // name of the expression is not a valid token, the match will fail, 7657 // so we need to handle it here. 7658 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7659 case MCK_SSrcF32: 7660 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7661 case MCK_SoppBrTarget: 7662 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7663 case MCK_VReg32OrOff: 7664 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7665 case MCK_InterpSlot: 7666 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7667 case MCK_Attr: 7668 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7669 case MCK_AttrChan: 7670 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7671 case MCK_ImmSMEMOffset: 7672 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7673 case MCK_SReg_64: 7674 case MCK_SReg_64_XEXEC: 7675 // Null is defined as a 32-bit register but 7676 // it should also be enabled with 64-bit operands. 7677 // The following code enables it for SReg_64 operands 7678 // used as source and destination. Remaining source 7679 // operands are handled in isInlinableImm. 7680 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7681 default: 7682 return Match_InvalidOperand; 7683 } 7684 } 7685 7686 //===----------------------------------------------------------------------===// 7687 // endpgm 7688 //===----------------------------------------------------------------------===// 7689 7690 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7691 SMLoc S = Parser.getTok().getLoc(); 7692 int64_t Imm = 0; 7693 7694 if (!parseExpr(Imm)) { 7695 // The operand is optional, if not present default to 0 7696 Imm = 0; 7697 } 7698 7699 if (!isUInt<16>(Imm)) { 7700 Error(S, "expected a 16-bit value"); 7701 return MatchOperand_ParseFail; 7702 } 7703 7704 Operands.push_back( 7705 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7706 return MatchOperand_Success; 7707 } 7708 7709 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7710