1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 339 // value of the GLC operand. 340 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 341 bool isSLC() const { return isImmTy(ImmTySLC); } 342 bool isSWZ() const { return isImmTy(ImmTySWZ); } 343 bool isTFE() const { return isImmTy(ImmTyTFE); } 344 bool isD16() const { return isImmTy(ImmTyD16); } 345 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 346 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 347 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 348 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 349 bool isFI() const { return isImmTy(ImmTyDppFi); } 350 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 351 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 352 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 353 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 354 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 355 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 356 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 357 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 358 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 359 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 360 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 361 bool isHigh() const { return isImmTy(ImmTyHigh); } 362 363 bool isMod() const { 364 return isClampSI() || isOModSI(); 365 } 366 367 bool isRegOrImm() const { 368 return isReg() || isImm(); 369 } 370 371 bool isRegClass(unsigned RCID) const; 372 373 bool isInlineValue() const; 374 375 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 376 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 377 } 378 379 bool isSCSrcB16() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 381 } 382 383 bool isSCSrcV2B16() const { 384 return isSCSrcB16(); 385 } 386 387 bool isSCSrcB32() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 389 } 390 391 bool isSCSrcB64() const { 392 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 393 } 394 395 bool isBoolReg() const; 396 397 bool isSCSrcF16() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 399 } 400 401 bool isSCSrcV2F16() const { 402 return isSCSrcF16(); 403 } 404 405 bool isSCSrcF32() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 407 } 408 409 bool isSCSrcF64() const { 410 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 411 } 412 413 bool isSSrcB32() const { 414 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 415 } 416 417 bool isSSrcB16() const { 418 return isSCSrcB16() || isLiteralImm(MVT::i16); 419 } 420 421 bool isSSrcV2B16() const { 422 llvm_unreachable("cannot happen"); 423 return isSSrcB16(); 424 } 425 426 bool isSSrcB64() const { 427 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 428 // See isVSrc64(). 429 return isSCSrcB64() || isLiteralImm(MVT::i64); 430 } 431 432 bool isSSrcF32() const { 433 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 434 } 435 436 bool isSSrcF64() const { 437 return isSCSrcB64() || isLiteralImm(MVT::f64); 438 } 439 440 bool isSSrcF16() const { 441 return isSCSrcB16() || isLiteralImm(MVT::f16); 442 } 443 444 bool isSSrcV2F16() const { 445 llvm_unreachable("cannot happen"); 446 return isSSrcF16(); 447 } 448 449 bool isSSrcOrLdsB32() const { 450 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 451 isLiteralImm(MVT::i32) || isExpr(); 452 } 453 454 bool isVCSrcB32() const { 455 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 456 } 457 458 bool isVCSrcB64() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 460 } 461 462 bool isVCSrcB16() const { 463 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 464 } 465 466 bool isVCSrcV2B16() const { 467 return isVCSrcB16(); 468 } 469 470 bool isVCSrcF32() const { 471 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 472 } 473 474 bool isVCSrcF64() const { 475 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 476 } 477 478 bool isVCSrcF16() const { 479 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 480 } 481 482 bool isVCSrcV2F16() const { 483 return isVCSrcF16(); 484 } 485 486 bool isVSrcB32() const { 487 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 488 } 489 490 bool isVSrcB64() const { 491 return isVCSrcF64() || isLiteralImm(MVT::i64); 492 } 493 494 bool isVSrcB16() const { 495 return isVCSrcB16() || isLiteralImm(MVT::i16); 496 } 497 498 bool isVSrcV2B16() const { 499 return isVSrcB16() || isLiteralImm(MVT::v2i16); 500 } 501 502 bool isVSrcF32() const { 503 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 504 } 505 506 bool isVSrcF64() const { 507 return isVCSrcF64() || isLiteralImm(MVT::f64); 508 } 509 510 bool isVSrcF16() const { 511 return isVCSrcF16() || isLiteralImm(MVT::f16); 512 } 513 514 bool isVSrcV2F16() const { 515 return isVSrcF16() || isLiteralImm(MVT::v2f16); 516 } 517 518 bool isVISrcB32() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 520 } 521 522 bool isVISrcB16() const { 523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 524 } 525 526 bool isVISrcV2B16() const { 527 return isVISrcB16(); 528 } 529 530 bool isVISrcF32() const { 531 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 532 } 533 534 bool isVISrcF16() const { 535 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 536 } 537 538 bool isVISrcV2F16() const { 539 return isVISrcF16() || isVISrcB32(); 540 } 541 542 bool isAISrcB32() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 544 } 545 546 bool isAISrcB16() const { 547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 548 } 549 550 bool isAISrcV2B16() const { 551 return isAISrcB16(); 552 } 553 554 bool isAISrcF32() const { 555 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 556 } 557 558 bool isAISrcF16() const { 559 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 560 } 561 562 bool isAISrcV2F16() const { 563 return isAISrcF16() || isAISrcB32(); 564 } 565 566 bool isAISrc_128B32() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 568 } 569 570 bool isAISrc_128B16() const { 571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 572 } 573 574 bool isAISrc_128V2B16() const { 575 return isAISrc_128B16(); 576 } 577 578 bool isAISrc_128F32() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 580 } 581 582 bool isAISrc_128F16() const { 583 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 584 } 585 586 bool isAISrc_128V2F16() const { 587 return isAISrc_128F16() || isAISrc_128B32(); 588 } 589 590 bool isAISrc_512B32() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 592 } 593 594 bool isAISrc_512B16() const { 595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 596 } 597 598 bool isAISrc_512V2B16() const { 599 return isAISrc_512B16(); 600 } 601 602 bool isAISrc_512F32() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 604 } 605 606 bool isAISrc_512F16() const { 607 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 608 } 609 610 bool isAISrc_512V2F16() const { 611 return isAISrc_512F16() || isAISrc_512B32(); 612 } 613 614 bool isAISrc_1024B32() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 616 } 617 618 bool isAISrc_1024B16() const { 619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 620 } 621 622 bool isAISrc_1024V2B16() const { 623 return isAISrc_1024B16(); 624 } 625 626 bool isAISrc_1024F32() const { 627 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 628 } 629 630 bool isAISrc_1024F16() const { 631 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 632 } 633 634 bool isAISrc_1024V2F16() const { 635 return isAISrc_1024F16() || isAISrc_1024B32(); 636 } 637 638 bool isKImmFP32() const { 639 return isLiteralImm(MVT::f32); 640 } 641 642 bool isKImmFP16() const { 643 return isLiteralImm(MVT::f16); 644 } 645 646 bool isMem() const override { 647 return false; 648 } 649 650 bool isExpr() const { 651 return Kind == Expression; 652 } 653 654 bool isSoppBrTarget() const { 655 return isExpr() || isImm(); 656 } 657 658 bool isSWaitCnt() const; 659 bool isHwreg() const; 660 bool isSendMsg() const; 661 bool isSwizzle() const; 662 bool isSMRDOffset8() const; 663 bool isSMEMOffset() const; 664 bool isSMRDLiteralOffset() const; 665 bool isDPP8() const; 666 bool isDPPCtrl() const; 667 bool isBLGP() const; 668 bool isCBSZ() const; 669 bool isABID() const; 670 bool isGPRIdxMode() const; 671 bool isS16Imm() const; 672 bool isU16Imm() const; 673 bool isEndpgm() const; 674 675 StringRef getExpressionAsToken() const { 676 assert(isExpr()); 677 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 678 return S->getSymbol().getName(); 679 } 680 681 StringRef getToken() const { 682 assert(isToken()); 683 684 if (Kind == Expression) 685 return getExpressionAsToken(); 686 687 return StringRef(Tok.Data, Tok.Length); 688 } 689 690 int64_t getImm() const { 691 assert(isImm()); 692 return Imm.Val; 693 } 694 695 void setImm(int64_t Val) { 696 assert(isImm()); 697 Imm.Val = Val; 698 } 699 700 ImmTy getImmTy() const { 701 assert(isImm()); 702 return Imm.Type; 703 } 704 705 unsigned getReg() const override { 706 assert(isRegKind()); 707 return Reg.RegNo; 708 } 709 710 SMLoc getStartLoc() const override { 711 return StartLoc; 712 } 713 714 SMLoc getEndLoc() const override { 715 return EndLoc; 716 } 717 718 SMRange getLocRange() const { 719 return SMRange(StartLoc, EndLoc); 720 } 721 722 Modifiers getModifiers() const { 723 assert(isRegKind() || isImmTy(ImmTyNone)); 724 return isRegKind() ? Reg.Mods : Imm.Mods; 725 } 726 727 void setModifiers(Modifiers Mods) { 728 assert(isRegKind() || isImmTy(ImmTyNone)); 729 if (isRegKind()) 730 Reg.Mods = Mods; 731 else 732 Imm.Mods = Mods; 733 } 734 735 bool hasModifiers() const { 736 return getModifiers().hasModifiers(); 737 } 738 739 bool hasFPModifiers() const { 740 return getModifiers().hasFPModifiers(); 741 } 742 743 bool hasIntModifiers() const { 744 return getModifiers().hasIntModifiers(); 745 } 746 747 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 748 749 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 750 751 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 752 753 template <unsigned Bitwidth> 754 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 755 756 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 757 addKImmFPOperands<16>(Inst, N); 758 } 759 760 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 761 addKImmFPOperands<32>(Inst, N); 762 } 763 764 void addRegOperands(MCInst &Inst, unsigned N) const; 765 766 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 767 addRegOperands(Inst, N); 768 } 769 770 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 771 if (isRegKind()) 772 addRegOperands(Inst, N); 773 else if (isExpr()) 774 Inst.addOperand(MCOperand::createExpr(Expr)); 775 else 776 addImmOperands(Inst, N); 777 } 778 779 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 780 Modifiers Mods = getModifiers(); 781 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 782 if (isRegKind()) { 783 addRegOperands(Inst, N); 784 } else { 785 addImmOperands(Inst, N, false); 786 } 787 } 788 789 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 790 assert(!hasIntModifiers()); 791 addRegOrImmWithInputModsOperands(Inst, N); 792 } 793 794 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 795 assert(!hasFPModifiers()); 796 addRegOrImmWithInputModsOperands(Inst, N); 797 } 798 799 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 800 Modifiers Mods = getModifiers(); 801 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 802 assert(isRegKind()); 803 addRegOperands(Inst, N); 804 } 805 806 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 807 assert(!hasIntModifiers()); 808 addRegWithInputModsOperands(Inst, N); 809 } 810 811 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 812 assert(!hasFPModifiers()); 813 addRegWithInputModsOperands(Inst, N); 814 } 815 816 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 817 if (isImm()) 818 addImmOperands(Inst, N); 819 else { 820 assert(isExpr()); 821 Inst.addOperand(MCOperand::createExpr(Expr)); 822 } 823 } 824 825 static void printImmTy(raw_ostream& OS, ImmTy Type) { 826 switch (Type) { 827 case ImmTyNone: OS << "None"; break; 828 case ImmTyGDS: OS << "GDS"; break; 829 case ImmTyLDS: OS << "LDS"; break; 830 case ImmTyOffen: OS << "Offen"; break; 831 case ImmTyIdxen: OS << "Idxen"; break; 832 case ImmTyAddr64: OS << "Addr64"; break; 833 case ImmTyOffset: OS << "Offset"; break; 834 case ImmTyInstOffset: OS << "InstOffset"; break; 835 case ImmTyOffset0: OS << "Offset0"; break; 836 case ImmTyOffset1: OS << "Offset1"; break; 837 case ImmTyDLC: OS << "DLC"; break; 838 case ImmTyGLC: OS << "GLC"; break; 839 case ImmTySLC: OS << "SLC"; break; 840 case ImmTySWZ: OS << "SWZ"; break; 841 case ImmTyTFE: OS << "TFE"; break; 842 case ImmTyD16: OS << "D16"; break; 843 case ImmTyFORMAT: OS << "FORMAT"; break; 844 case ImmTyClampSI: OS << "ClampSI"; break; 845 case ImmTyOModSI: OS << "OModSI"; break; 846 case ImmTyDPP8: OS << "DPP8"; break; 847 case ImmTyDppCtrl: OS << "DppCtrl"; break; 848 case ImmTyDppRowMask: OS << "DppRowMask"; break; 849 case ImmTyDppBankMask: OS << "DppBankMask"; break; 850 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 851 case ImmTyDppFi: OS << "FI"; break; 852 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 853 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 854 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 855 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 856 case ImmTyDMask: OS << "DMask"; break; 857 case ImmTyDim: OS << "Dim"; break; 858 case ImmTyUNorm: OS << "UNorm"; break; 859 case ImmTyDA: OS << "DA"; break; 860 case ImmTyR128A16: OS << "R128A16"; break; 861 case ImmTyA16: OS << "A16"; break; 862 case ImmTyLWE: OS << "LWE"; break; 863 case ImmTyOff: OS << "Off"; break; 864 case ImmTyExpTgt: OS << "ExpTgt"; break; 865 case ImmTyExpCompr: OS << "ExpCompr"; break; 866 case ImmTyExpVM: OS << "ExpVM"; break; 867 case ImmTyHwreg: OS << "Hwreg"; break; 868 case ImmTySendMsg: OS << "SendMsg"; break; 869 case ImmTyInterpSlot: OS << "InterpSlot"; break; 870 case ImmTyInterpAttr: OS << "InterpAttr"; break; 871 case ImmTyAttrChan: OS << "AttrChan"; break; 872 case ImmTyOpSel: OS << "OpSel"; break; 873 case ImmTyOpSelHi: OS << "OpSelHi"; break; 874 case ImmTyNegLo: OS << "NegLo"; break; 875 case ImmTyNegHi: OS << "NegHi"; break; 876 case ImmTySwizzle: OS << "Swizzle"; break; 877 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 878 case ImmTyHigh: OS << "High"; break; 879 case ImmTyBLGP: OS << "BLGP"; break; 880 case ImmTyCBSZ: OS << "CBSZ"; break; 881 case ImmTyABID: OS << "ABID"; break; 882 case ImmTyEndpgm: OS << "Endpgm"; break; 883 } 884 } 885 886 void print(raw_ostream &OS) const override { 887 switch (Kind) { 888 case Register: 889 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 890 break; 891 case Immediate: 892 OS << '<' << getImm(); 893 if (getImmTy() != ImmTyNone) { 894 OS << " type: "; printImmTy(OS, getImmTy()); 895 } 896 OS << " mods: " << Imm.Mods << '>'; 897 break; 898 case Token: 899 OS << '\'' << getToken() << '\''; 900 break; 901 case Expression: 902 OS << "<expr " << *Expr << '>'; 903 break; 904 } 905 } 906 907 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 908 int64_t Val, SMLoc Loc, 909 ImmTy Type = ImmTyNone, 910 bool IsFPImm = false) { 911 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 912 Op->Imm.Val = Val; 913 Op->Imm.IsFPImm = IsFPImm; 914 Op->Imm.Type = Type; 915 Op->Imm.Mods = Modifiers(); 916 Op->StartLoc = Loc; 917 Op->EndLoc = Loc; 918 return Op; 919 } 920 921 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 922 StringRef Str, SMLoc Loc, 923 bool HasExplicitEncodingSize = true) { 924 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 925 Res->Tok.Data = Str.data(); 926 Res->Tok.Length = Str.size(); 927 Res->StartLoc = Loc; 928 Res->EndLoc = Loc; 929 return Res; 930 } 931 932 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 933 unsigned RegNo, SMLoc S, 934 SMLoc E) { 935 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 936 Op->Reg.RegNo = RegNo; 937 Op->Reg.Mods = Modifiers(); 938 Op->StartLoc = S; 939 Op->EndLoc = E; 940 return Op; 941 } 942 943 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 944 const class MCExpr *Expr, SMLoc S) { 945 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 946 Op->Expr = Expr; 947 Op->StartLoc = S; 948 Op->EndLoc = S; 949 return Op; 950 } 951 }; 952 953 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 954 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 955 return OS; 956 } 957 958 //===----------------------------------------------------------------------===// 959 // AsmParser 960 //===----------------------------------------------------------------------===// 961 962 // Holds info related to the current kernel, e.g. count of SGPRs used. 963 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 964 // .amdgpu_hsa_kernel or at EOF. 965 class KernelScopeInfo { 966 int SgprIndexUnusedMin = -1; 967 int VgprIndexUnusedMin = -1; 968 MCContext *Ctx = nullptr; 969 970 void usesSgprAt(int i) { 971 if (i >= SgprIndexUnusedMin) { 972 SgprIndexUnusedMin = ++i; 973 if (Ctx) { 974 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 975 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 976 } 977 } 978 } 979 980 void usesVgprAt(int i) { 981 if (i >= VgprIndexUnusedMin) { 982 VgprIndexUnusedMin = ++i; 983 if (Ctx) { 984 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 985 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 986 } 987 } 988 } 989 990 public: 991 KernelScopeInfo() = default; 992 993 void initialize(MCContext &Context) { 994 Ctx = &Context; 995 usesSgprAt(SgprIndexUnusedMin = -1); 996 usesVgprAt(VgprIndexUnusedMin = -1); 997 } 998 999 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1000 switch (RegKind) { 1001 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1002 case IS_AGPR: // fall through 1003 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1004 default: break; 1005 } 1006 } 1007 }; 1008 1009 class AMDGPUAsmParser : public MCTargetAsmParser { 1010 MCAsmParser &Parser; 1011 1012 // Number of extra operands parsed after the first optional operand. 1013 // This may be necessary to skip hardcoded mandatory operands. 1014 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1015 1016 unsigned ForcedEncodingSize = 0; 1017 bool ForcedDPP = false; 1018 bool ForcedSDWA = false; 1019 KernelScopeInfo KernelScope; 1020 1021 /// @name Auto-generated Match Functions 1022 /// { 1023 1024 #define GET_ASSEMBLER_HEADER 1025 #include "AMDGPUGenAsmMatcher.inc" 1026 1027 /// } 1028 1029 private: 1030 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1031 bool OutOfRangeError(SMRange Range); 1032 /// Calculate VGPR/SGPR blocks required for given target, reserved 1033 /// registers, and user-specified NextFreeXGPR values. 1034 /// 1035 /// \param Features [in] Target features, used for bug corrections. 1036 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1037 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1038 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1039 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1040 /// descriptor field, if valid. 1041 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1042 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1043 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1044 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1045 /// \param VGPRBlocks [out] Result VGPR block count. 1046 /// \param SGPRBlocks [out] Result SGPR block count. 1047 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1048 bool FlatScrUsed, bool XNACKUsed, 1049 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1050 SMRange VGPRRange, unsigned NextFreeSGPR, 1051 SMRange SGPRRange, unsigned &VGPRBlocks, 1052 unsigned &SGPRBlocks); 1053 bool ParseDirectiveAMDGCNTarget(); 1054 bool ParseDirectiveAMDHSAKernel(); 1055 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1056 bool ParseDirectiveHSACodeObjectVersion(); 1057 bool ParseDirectiveHSACodeObjectISA(); 1058 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1059 bool ParseDirectiveAMDKernelCodeT(); 1060 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1061 bool ParseDirectiveAMDGPUHsaKernel(); 1062 1063 bool ParseDirectiveISAVersion(); 1064 bool ParseDirectiveHSAMetadata(); 1065 bool ParseDirectivePALMetadataBegin(); 1066 bool ParseDirectivePALMetadata(); 1067 bool ParseDirectiveAMDGPULDS(); 1068 1069 /// Common code to parse out a block of text (typically YAML) between start and 1070 /// end directives. 1071 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1072 const char *AssemblerDirectiveEnd, 1073 std::string &CollectString); 1074 1075 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1076 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 bool RestoreOnFailure = false); 1080 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1081 unsigned &RegNum, unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, 1088 SmallVectorImpl<AsmToken> &Tokens); 1089 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1090 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1091 bool ParseRegRange(unsigned& Num, unsigned& Width); 1092 unsigned getRegularReg(RegisterKind RegKind, 1093 unsigned RegNum, 1094 unsigned RegWidth, 1095 SMLoc Loc); 1096 1097 bool isRegister(); 1098 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1099 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1100 void initializeGprCountSymbol(RegisterKind RegKind); 1101 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1102 unsigned RegWidth); 1103 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1104 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1105 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1106 bool IsGdsHardcoded); 1107 1108 public: 1109 enum AMDGPUMatchResultTy { 1110 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1111 }; 1112 enum OperandMode { 1113 OperandMode_Default, 1114 OperandMode_NSA, 1115 }; 1116 1117 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1118 1119 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1120 const MCInstrInfo &MII, 1121 const MCTargetOptions &Options) 1122 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1123 MCAsmParserExtension::Initialize(Parser); 1124 1125 if (getFeatureBits().none()) { 1126 // Set default features. 1127 copySTI().ToggleFeature("southern-islands"); 1128 } 1129 1130 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1131 1132 { 1133 // TODO: make those pre-defined variables read-only. 1134 // Currently there is none suitable machinery in the core llvm-mc for this. 1135 // MCSymbol::isRedefinable is intended for another purpose, and 1136 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1137 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1138 MCContext &Ctx = getContext(); 1139 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1140 MCSymbol *Sym = 1141 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1142 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1143 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1144 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1145 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1147 } else { 1148 MCSymbol *Sym = 1149 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1151 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1152 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1153 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1154 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1155 } 1156 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1157 initializeGprCountSymbol(IS_VGPR); 1158 initializeGprCountSymbol(IS_SGPR); 1159 } else 1160 KernelScope.initialize(getContext()); 1161 } 1162 } 1163 1164 bool hasXNACK() const { 1165 return AMDGPU::hasXNACK(getSTI()); 1166 } 1167 1168 bool hasMIMG_R128() const { 1169 return AMDGPU::hasMIMG_R128(getSTI()); 1170 } 1171 1172 bool hasPackedD16() const { 1173 return AMDGPU::hasPackedD16(getSTI()); 1174 } 1175 1176 bool hasGFX10A16() const { 1177 return AMDGPU::hasGFX10A16(getSTI()); 1178 } 1179 1180 bool isSI() const { 1181 return AMDGPU::isSI(getSTI()); 1182 } 1183 1184 bool isCI() const { 1185 return AMDGPU::isCI(getSTI()); 1186 } 1187 1188 bool isVI() const { 1189 return AMDGPU::isVI(getSTI()); 1190 } 1191 1192 bool isGFX9() const { 1193 return AMDGPU::isGFX9(getSTI()); 1194 } 1195 1196 bool isGFX9Plus() const { 1197 return AMDGPU::isGFX9Plus(getSTI()); 1198 } 1199 1200 bool isGFX10() const { 1201 return AMDGPU::isGFX10(getSTI()); 1202 } 1203 1204 bool isGFX10_BEncoding() const { 1205 return AMDGPU::isGFX10_BEncoding(getSTI()); 1206 } 1207 1208 bool hasInv2PiInlineImm() const { 1209 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1210 } 1211 1212 bool hasFlatOffsets() const { 1213 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1214 } 1215 1216 bool hasSGPR102_SGPR103() const { 1217 return !isVI() && !isGFX9(); 1218 } 1219 1220 bool hasSGPR104_SGPR105() const { 1221 return isGFX10(); 1222 } 1223 1224 bool hasIntClamp() const { 1225 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1226 } 1227 1228 AMDGPUTargetStreamer &getTargetStreamer() { 1229 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1230 return static_cast<AMDGPUTargetStreamer &>(TS); 1231 } 1232 1233 const MCRegisterInfo *getMRI() const { 1234 // We need this const_cast because for some reason getContext() is not const 1235 // in MCAsmParser. 1236 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1237 } 1238 1239 const MCInstrInfo *getMII() const { 1240 return &MII; 1241 } 1242 1243 const FeatureBitset &getFeatureBits() const { 1244 return getSTI().getFeatureBits(); 1245 } 1246 1247 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1248 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1249 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1250 1251 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1252 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1253 bool isForcedDPP() const { return ForcedDPP; } 1254 bool isForcedSDWA() const { return ForcedSDWA; } 1255 ArrayRef<unsigned> getMatchedVariants() const; 1256 StringRef getMatchedVariantName() const; 1257 1258 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1259 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1260 bool RestoreOnFailure); 1261 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1262 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1263 SMLoc &EndLoc) override; 1264 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1265 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1266 unsigned Kind) override; 1267 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1268 OperandVector &Operands, MCStreamer &Out, 1269 uint64_t &ErrorInfo, 1270 bool MatchingInlineAsm) override; 1271 bool ParseDirective(AsmToken DirectiveID) override; 1272 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1273 OperandMode Mode = OperandMode_Default); 1274 StringRef parseMnemonicSuffix(StringRef Name); 1275 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1276 SMLoc NameLoc, OperandVector &Operands) override; 1277 //bool ProcessInstruction(MCInst &Inst); 1278 1279 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1280 1281 OperandMatchResultTy 1282 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1283 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1284 bool (*ConvertResult)(int64_t &) = nullptr); 1285 1286 OperandMatchResultTy 1287 parseOperandArrayWithPrefix(const char *Prefix, 1288 OperandVector &Operands, 1289 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1290 bool (*ConvertResult)(int64_t&) = nullptr); 1291 1292 OperandMatchResultTy 1293 parseNamedBit(const char *Name, OperandVector &Operands, 1294 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1295 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1296 StringRef &Value); 1297 1298 bool isModifier(); 1299 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1300 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1301 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1302 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1303 bool parseSP3NegModifier(); 1304 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1305 OperandMatchResultTy parseReg(OperandVector &Operands); 1306 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1307 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1308 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1309 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1310 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1311 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1312 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1313 OperandMatchResultTy parseUfmt(int64_t &Format); 1314 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1315 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1316 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1317 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1318 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1319 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1320 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1321 1322 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1323 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1324 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1325 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1326 1327 bool parseCnt(int64_t &IntVal); 1328 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1329 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1330 1331 private: 1332 struct OperandInfoTy { 1333 int64_t Id; 1334 bool IsSymbolic = false; 1335 bool IsDefined = false; 1336 1337 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1338 }; 1339 1340 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1341 bool validateSendMsg(const OperandInfoTy &Msg, 1342 const OperandInfoTy &Op, 1343 const OperandInfoTy &Stream, 1344 const SMLoc Loc); 1345 1346 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1347 bool validateHwreg(const OperandInfoTy &HwReg, 1348 const int64_t Offset, 1349 const int64_t Width, 1350 const SMLoc Loc); 1351 1352 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1353 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1354 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1355 1356 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1357 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1358 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1359 bool validateSOPLiteral(const MCInst &Inst) const; 1360 bool validateConstantBusLimitations(const MCInst &Inst); 1361 bool validateEarlyClobberLimitations(const MCInst &Inst); 1362 bool validateIntClampSupported(const MCInst &Inst); 1363 bool validateMIMGAtomicDMask(const MCInst &Inst); 1364 bool validateMIMGGatherDMask(const MCInst &Inst); 1365 bool validateMovrels(const MCInst &Inst); 1366 bool validateMIMGDataSize(const MCInst &Inst); 1367 bool validateMIMGAddrSize(const MCInst &Inst); 1368 bool validateMIMGD16(const MCInst &Inst); 1369 bool validateMIMGDim(const MCInst &Inst); 1370 bool validateLdsDirect(const MCInst &Inst); 1371 bool validateOpSel(const MCInst &Inst); 1372 bool validateVccOperand(unsigned Reg) const; 1373 bool validateVOP3Literal(const MCInst &Inst) const; 1374 bool validateMAIAccWrite(const MCInst &Inst); 1375 bool validateDivScale(const MCInst &Inst); 1376 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1377 const SMLoc &IDLoc); 1378 unsigned getConstantBusLimit(unsigned Opcode) const; 1379 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1380 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1381 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1382 1383 bool isSupportedMnemo(StringRef Mnemo, 1384 const FeatureBitset &FBS); 1385 bool isSupportedMnemo(StringRef Mnemo, 1386 const FeatureBitset &FBS, 1387 ArrayRef<unsigned> Variants); 1388 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1389 1390 bool isId(const StringRef Id) const; 1391 bool isId(const AsmToken &Token, const StringRef Id) const; 1392 bool isToken(const AsmToken::TokenKind Kind) const; 1393 bool trySkipId(const StringRef Id); 1394 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1395 bool trySkipToken(const AsmToken::TokenKind Kind); 1396 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1397 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1398 bool parseId(StringRef &Val, const StringRef ErrMsg); 1399 1400 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1401 AsmToken::TokenKind getTokenKind() const; 1402 bool parseExpr(int64_t &Imm); 1403 bool parseExpr(OperandVector &Operands); 1404 StringRef getTokenStr() const; 1405 AsmToken peekToken(); 1406 AsmToken getToken() const; 1407 SMLoc getLoc() const; 1408 void lex(); 1409 1410 public: 1411 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1412 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1413 1414 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1415 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1416 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1417 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1418 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1419 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1420 1421 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1422 const unsigned MinVal, 1423 const unsigned MaxVal, 1424 const StringRef ErrMsg); 1425 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1426 bool parseSwizzleOffset(int64_t &Imm); 1427 bool parseSwizzleMacro(int64_t &Imm); 1428 bool parseSwizzleQuadPerm(int64_t &Imm); 1429 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1430 bool parseSwizzleBroadcast(int64_t &Imm); 1431 bool parseSwizzleSwap(int64_t &Imm); 1432 bool parseSwizzleReverse(int64_t &Imm); 1433 1434 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1435 int64_t parseGPRIdxMacro(); 1436 1437 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1438 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1439 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1440 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1441 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1442 1443 AMDGPUOperand::Ptr defaultDLC() const; 1444 AMDGPUOperand::Ptr defaultGLC() const; 1445 AMDGPUOperand::Ptr defaultGLC_1() const; 1446 AMDGPUOperand::Ptr defaultSLC() const; 1447 1448 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1449 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1450 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1451 AMDGPUOperand::Ptr defaultFlatOffset() const; 1452 1453 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1454 1455 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1456 OptionalImmIndexMap &OptionalIdx); 1457 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1458 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1459 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1460 1461 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1462 1463 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1464 bool IsAtomic = false); 1465 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1466 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1467 1468 OperandMatchResultTy parseDim(OperandVector &Operands); 1469 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1470 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1471 AMDGPUOperand::Ptr defaultRowMask() const; 1472 AMDGPUOperand::Ptr defaultBankMask() const; 1473 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1474 AMDGPUOperand::Ptr defaultFI() const; 1475 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1476 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1477 1478 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1479 AMDGPUOperand::ImmTy Type); 1480 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1481 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1482 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1483 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1484 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1485 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1486 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1487 uint64_t BasicInstType, 1488 bool SkipDstVcc = false, 1489 bool SkipSrcVcc = false); 1490 1491 AMDGPUOperand::Ptr defaultBLGP() const; 1492 AMDGPUOperand::Ptr defaultCBSZ() const; 1493 AMDGPUOperand::Ptr defaultABID() const; 1494 1495 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1496 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1497 }; 1498 1499 struct OptionalOperand { 1500 const char *Name; 1501 AMDGPUOperand::ImmTy Type; 1502 bool IsBit; 1503 bool (*ConvertResult)(int64_t&); 1504 }; 1505 1506 } // end anonymous namespace 1507 1508 // May be called with integer type with equivalent bitwidth. 1509 static const fltSemantics *getFltSemantics(unsigned Size) { 1510 switch (Size) { 1511 case 4: 1512 return &APFloat::IEEEsingle(); 1513 case 8: 1514 return &APFloat::IEEEdouble(); 1515 case 2: 1516 return &APFloat::IEEEhalf(); 1517 default: 1518 llvm_unreachable("unsupported fp type"); 1519 } 1520 } 1521 1522 static const fltSemantics *getFltSemantics(MVT VT) { 1523 return getFltSemantics(VT.getSizeInBits() / 8); 1524 } 1525 1526 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1527 switch (OperandType) { 1528 case AMDGPU::OPERAND_REG_IMM_INT32: 1529 case AMDGPU::OPERAND_REG_IMM_FP32: 1530 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1531 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1532 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1533 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1534 return &APFloat::IEEEsingle(); 1535 case AMDGPU::OPERAND_REG_IMM_INT64: 1536 case AMDGPU::OPERAND_REG_IMM_FP64: 1537 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1538 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1539 return &APFloat::IEEEdouble(); 1540 case AMDGPU::OPERAND_REG_IMM_INT16: 1541 case AMDGPU::OPERAND_REG_IMM_FP16: 1542 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1543 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1544 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1545 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1546 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1547 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1548 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1549 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1550 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1551 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1552 return &APFloat::IEEEhalf(); 1553 default: 1554 llvm_unreachable("unsupported fp type"); 1555 } 1556 } 1557 1558 //===----------------------------------------------------------------------===// 1559 // Operand 1560 //===----------------------------------------------------------------------===// 1561 1562 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1563 bool Lost; 1564 1565 // Convert literal to single precision 1566 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1567 APFloat::rmNearestTiesToEven, 1568 &Lost); 1569 // We allow precision lost but not overflow or underflow 1570 if (Status != APFloat::opOK && 1571 Lost && 1572 ((Status & APFloat::opOverflow) != 0 || 1573 (Status & APFloat::opUnderflow) != 0)) { 1574 return false; 1575 } 1576 1577 return true; 1578 } 1579 1580 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1581 return isUIntN(Size, Val) || isIntN(Size, Val); 1582 } 1583 1584 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1585 if (VT.getScalarType() == MVT::i16) { 1586 // FP immediate values are broken. 1587 return isInlinableIntLiteral(Val); 1588 } 1589 1590 // f16/v2f16 operands work correctly for all values. 1591 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1592 } 1593 1594 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1595 1596 // This is a hack to enable named inline values like 1597 // shared_base with both 32-bit and 64-bit operands. 1598 // Note that these values are defined as 1599 // 32-bit operands only. 1600 if (isInlineValue()) { 1601 return true; 1602 } 1603 1604 if (!isImmTy(ImmTyNone)) { 1605 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1606 return false; 1607 } 1608 // TODO: We should avoid using host float here. It would be better to 1609 // check the float bit values which is what a few other places do. 1610 // We've had bot failures before due to weird NaN support on mips hosts. 1611 1612 APInt Literal(64, Imm.Val); 1613 1614 if (Imm.IsFPImm) { // We got fp literal token 1615 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1616 return AMDGPU::isInlinableLiteral64(Imm.Val, 1617 AsmParser->hasInv2PiInlineImm()); 1618 } 1619 1620 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1621 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1622 return false; 1623 1624 if (type.getScalarSizeInBits() == 16) { 1625 return isInlineableLiteralOp16( 1626 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1627 type, AsmParser->hasInv2PiInlineImm()); 1628 } 1629 1630 // Check if single precision literal is inlinable 1631 return AMDGPU::isInlinableLiteral32( 1632 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1633 AsmParser->hasInv2PiInlineImm()); 1634 } 1635 1636 // We got int literal token. 1637 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1638 return AMDGPU::isInlinableLiteral64(Imm.Val, 1639 AsmParser->hasInv2PiInlineImm()); 1640 } 1641 1642 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1643 return false; 1644 } 1645 1646 if (type.getScalarSizeInBits() == 16) { 1647 return isInlineableLiteralOp16( 1648 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1649 type, AsmParser->hasInv2PiInlineImm()); 1650 } 1651 1652 return AMDGPU::isInlinableLiteral32( 1653 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1654 AsmParser->hasInv2PiInlineImm()); 1655 } 1656 1657 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1658 // Check that this immediate can be added as literal 1659 if (!isImmTy(ImmTyNone)) { 1660 return false; 1661 } 1662 1663 if (!Imm.IsFPImm) { 1664 // We got int literal token. 1665 1666 if (type == MVT::f64 && hasFPModifiers()) { 1667 // Cannot apply fp modifiers to int literals preserving the same semantics 1668 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1669 // disable these cases. 1670 return false; 1671 } 1672 1673 unsigned Size = type.getSizeInBits(); 1674 if (Size == 64) 1675 Size = 32; 1676 1677 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1678 // types. 1679 return isSafeTruncation(Imm.Val, Size); 1680 } 1681 1682 // We got fp literal token 1683 if (type == MVT::f64) { // Expected 64-bit fp operand 1684 // We would set low 64-bits of literal to zeroes but we accept this literals 1685 return true; 1686 } 1687 1688 if (type == MVT::i64) { // Expected 64-bit int operand 1689 // We don't allow fp literals in 64-bit integer instructions. It is 1690 // unclear how we should encode them. 1691 return false; 1692 } 1693 1694 // We allow fp literals with f16x2 operands assuming that the specified 1695 // literal goes into the lower half and the upper half is zero. We also 1696 // require that the literal may be losslesly converted to f16. 1697 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1698 (type == MVT::v2i16)? MVT::i16 : type; 1699 1700 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1701 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1702 } 1703 1704 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1705 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1706 } 1707 1708 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1709 if (AsmParser->isVI()) 1710 return isVReg32(); 1711 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1712 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1713 else 1714 return false; 1715 } 1716 1717 bool AMDGPUOperand::isSDWAFP16Operand() const { 1718 return isSDWAOperand(MVT::f16); 1719 } 1720 1721 bool AMDGPUOperand::isSDWAFP32Operand() const { 1722 return isSDWAOperand(MVT::f32); 1723 } 1724 1725 bool AMDGPUOperand::isSDWAInt16Operand() const { 1726 return isSDWAOperand(MVT::i16); 1727 } 1728 1729 bool AMDGPUOperand::isSDWAInt32Operand() const { 1730 return isSDWAOperand(MVT::i32); 1731 } 1732 1733 bool AMDGPUOperand::isBoolReg() const { 1734 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1735 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1736 } 1737 1738 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1739 { 1740 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1741 assert(Size == 2 || Size == 4 || Size == 8); 1742 1743 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1744 1745 if (Imm.Mods.Abs) { 1746 Val &= ~FpSignMask; 1747 } 1748 if (Imm.Mods.Neg) { 1749 Val ^= FpSignMask; 1750 } 1751 1752 return Val; 1753 } 1754 1755 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1756 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1757 Inst.getNumOperands())) { 1758 addLiteralImmOperand(Inst, Imm.Val, 1759 ApplyModifiers & 1760 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1761 } else { 1762 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1763 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1764 } 1765 } 1766 1767 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1768 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1769 auto OpNum = Inst.getNumOperands(); 1770 // Check that this operand accepts literals 1771 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1772 1773 if (ApplyModifiers) { 1774 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1775 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1776 Val = applyInputFPModifiers(Val, Size); 1777 } 1778 1779 APInt Literal(64, Val); 1780 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1781 1782 if (Imm.IsFPImm) { // We got fp literal token 1783 switch (OpTy) { 1784 case AMDGPU::OPERAND_REG_IMM_INT64: 1785 case AMDGPU::OPERAND_REG_IMM_FP64: 1786 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1787 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1788 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1789 AsmParser->hasInv2PiInlineImm())) { 1790 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1791 return; 1792 } 1793 1794 // Non-inlineable 1795 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1796 // For fp operands we check if low 32 bits are zeros 1797 if (Literal.getLoBits(32) != 0) { 1798 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1799 "Can't encode literal as exact 64-bit floating-point operand. " 1800 "Low 32-bits will be set to zero"); 1801 } 1802 1803 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1804 return; 1805 } 1806 1807 // We don't allow fp literals in 64-bit integer instructions. It is 1808 // unclear how we should encode them. This case should be checked earlier 1809 // in predicate methods (isLiteralImm()) 1810 llvm_unreachable("fp literal in 64-bit integer instruction."); 1811 1812 case AMDGPU::OPERAND_REG_IMM_INT32: 1813 case AMDGPU::OPERAND_REG_IMM_FP32: 1814 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1815 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1816 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1817 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1818 case AMDGPU::OPERAND_REG_IMM_INT16: 1819 case AMDGPU::OPERAND_REG_IMM_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1822 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1823 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1824 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1828 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1829 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1830 bool lost; 1831 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1832 // Convert literal to single precision 1833 FPLiteral.convert(*getOpFltSemantics(OpTy), 1834 APFloat::rmNearestTiesToEven, &lost); 1835 // We allow precision lost but not overflow or underflow. This should be 1836 // checked earlier in isLiteralImm() 1837 1838 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1839 Inst.addOperand(MCOperand::createImm(ImmVal)); 1840 return; 1841 } 1842 default: 1843 llvm_unreachable("invalid operand size"); 1844 } 1845 1846 return; 1847 } 1848 1849 // We got int literal token. 1850 // Only sign extend inline immediates. 1851 switch (OpTy) { 1852 case AMDGPU::OPERAND_REG_IMM_INT32: 1853 case AMDGPU::OPERAND_REG_IMM_FP32: 1854 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1855 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1856 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1857 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1858 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1859 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1860 if (isSafeTruncation(Val, 32) && 1861 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1862 AsmParser->hasInv2PiInlineImm())) { 1863 Inst.addOperand(MCOperand::createImm(Val)); 1864 return; 1865 } 1866 1867 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1868 return; 1869 1870 case AMDGPU::OPERAND_REG_IMM_INT64: 1871 case AMDGPU::OPERAND_REG_IMM_FP64: 1872 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1873 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1874 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1875 Inst.addOperand(MCOperand::createImm(Val)); 1876 return; 1877 } 1878 1879 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1880 return; 1881 1882 case AMDGPU::OPERAND_REG_IMM_INT16: 1883 case AMDGPU::OPERAND_REG_IMM_FP16: 1884 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1885 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1886 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1887 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1888 if (isSafeTruncation(Val, 16) && 1889 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1890 AsmParser->hasInv2PiInlineImm())) { 1891 Inst.addOperand(MCOperand::createImm(Val)); 1892 return; 1893 } 1894 1895 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1896 return; 1897 1898 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1899 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1900 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1901 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1902 assert(isSafeTruncation(Val, 16)); 1903 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1904 AsmParser->hasInv2PiInlineImm())); 1905 1906 Inst.addOperand(MCOperand::createImm(Val)); 1907 return; 1908 } 1909 default: 1910 llvm_unreachable("invalid operand size"); 1911 } 1912 } 1913 1914 template <unsigned Bitwidth> 1915 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1916 APInt Literal(64, Imm.Val); 1917 1918 if (!Imm.IsFPImm) { 1919 // We got int literal token. 1920 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1921 return; 1922 } 1923 1924 bool Lost; 1925 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1926 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1927 APFloat::rmNearestTiesToEven, &Lost); 1928 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1929 } 1930 1931 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1932 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1933 } 1934 1935 static bool isInlineValue(unsigned Reg) { 1936 switch (Reg) { 1937 case AMDGPU::SRC_SHARED_BASE: 1938 case AMDGPU::SRC_SHARED_LIMIT: 1939 case AMDGPU::SRC_PRIVATE_BASE: 1940 case AMDGPU::SRC_PRIVATE_LIMIT: 1941 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1942 return true; 1943 case AMDGPU::SRC_VCCZ: 1944 case AMDGPU::SRC_EXECZ: 1945 case AMDGPU::SRC_SCC: 1946 return true; 1947 case AMDGPU::SGPR_NULL: 1948 return true; 1949 default: 1950 return false; 1951 } 1952 } 1953 1954 bool AMDGPUOperand::isInlineValue() const { 1955 return isRegKind() && ::isInlineValue(getReg()); 1956 } 1957 1958 //===----------------------------------------------------------------------===// 1959 // AsmParser 1960 //===----------------------------------------------------------------------===// 1961 1962 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1963 if (Is == IS_VGPR) { 1964 switch (RegWidth) { 1965 default: return -1; 1966 case 1: return AMDGPU::VGPR_32RegClassID; 1967 case 2: return AMDGPU::VReg_64RegClassID; 1968 case 3: return AMDGPU::VReg_96RegClassID; 1969 case 4: return AMDGPU::VReg_128RegClassID; 1970 case 5: return AMDGPU::VReg_160RegClassID; 1971 case 6: return AMDGPU::VReg_192RegClassID; 1972 case 8: return AMDGPU::VReg_256RegClassID; 1973 case 16: return AMDGPU::VReg_512RegClassID; 1974 case 32: return AMDGPU::VReg_1024RegClassID; 1975 } 1976 } else if (Is == IS_TTMP) { 1977 switch (RegWidth) { 1978 default: return -1; 1979 case 1: return AMDGPU::TTMP_32RegClassID; 1980 case 2: return AMDGPU::TTMP_64RegClassID; 1981 case 4: return AMDGPU::TTMP_128RegClassID; 1982 case 8: return AMDGPU::TTMP_256RegClassID; 1983 case 16: return AMDGPU::TTMP_512RegClassID; 1984 } 1985 } else if (Is == IS_SGPR) { 1986 switch (RegWidth) { 1987 default: return -1; 1988 case 1: return AMDGPU::SGPR_32RegClassID; 1989 case 2: return AMDGPU::SGPR_64RegClassID; 1990 case 3: return AMDGPU::SGPR_96RegClassID; 1991 case 4: return AMDGPU::SGPR_128RegClassID; 1992 case 5: return AMDGPU::SGPR_160RegClassID; 1993 case 6: return AMDGPU::SGPR_192RegClassID; 1994 case 8: return AMDGPU::SGPR_256RegClassID; 1995 case 16: return AMDGPU::SGPR_512RegClassID; 1996 } 1997 } else if (Is == IS_AGPR) { 1998 switch (RegWidth) { 1999 default: return -1; 2000 case 1: return AMDGPU::AGPR_32RegClassID; 2001 case 2: return AMDGPU::AReg_64RegClassID; 2002 case 3: return AMDGPU::AReg_96RegClassID; 2003 case 4: return AMDGPU::AReg_128RegClassID; 2004 case 5: return AMDGPU::AReg_160RegClassID; 2005 case 6: return AMDGPU::AReg_192RegClassID; 2006 case 8: return AMDGPU::AReg_256RegClassID; 2007 case 16: return AMDGPU::AReg_512RegClassID; 2008 case 32: return AMDGPU::AReg_1024RegClassID; 2009 } 2010 } 2011 return -1; 2012 } 2013 2014 static unsigned getSpecialRegForName(StringRef RegName) { 2015 return StringSwitch<unsigned>(RegName) 2016 .Case("exec", AMDGPU::EXEC) 2017 .Case("vcc", AMDGPU::VCC) 2018 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2019 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2020 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2021 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2022 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2023 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2024 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2025 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2026 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2027 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2028 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2029 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2030 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2031 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2032 .Case("m0", AMDGPU::M0) 2033 .Case("vccz", AMDGPU::SRC_VCCZ) 2034 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2035 .Case("execz", AMDGPU::SRC_EXECZ) 2036 .Case("src_execz", AMDGPU::SRC_EXECZ) 2037 .Case("scc", AMDGPU::SRC_SCC) 2038 .Case("src_scc", AMDGPU::SRC_SCC) 2039 .Case("tba", AMDGPU::TBA) 2040 .Case("tma", AMDGPU::TMA) 2041 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2042 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2043 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2044 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2045 .Case("vcc_lo", AMDGPU::VCC_LO) 2046 .Case("vcc_hi", AMDGPU::VCC_HI) 2047 .Case("exec_lo", AMDGPU::EXEC_LO) 2048 .Case("exec_hi", AMDGPU::EXEC_HI) 2049 .Case("tma_lo", AMDGPU::TMA_LO) 2050 .Case("tma_hi", AMDGPU::TMA_HI) 2051 .Case("tba_lo", AMDGPU::TBA_LO) 2052 .Case("tba_hi", AMDGPU::TBA_HI) 2053 .Case("pc", AMDGPU::PC_REG) 2054 .Case("null", AMDGPU::SGPR_NULL) 2055 .Default(AMDGPU::NoRegister); 2056 } 2057 2058 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2059 SMLoc &EndLoc, bool RestoreOnFailure) { 2060 auto R = parseRegister(); 2061 if (!R) return true; 2062 assert(R->isReg()); 2063 RegNo = R->getReg(); 2064 StartLoc = R->getStartLoc(); 2065 EndLoc = R->getEndLoc(); 2066 return false; 2067 } 2068 2069 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2070 SMLoc &EndLoc) { 2071 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2072 } 2073 2074 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2075 SMLoc &StartLoc, 2076 SMLoc &EndLoc) { 2077 bool Result = 2078 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2079 bool PendingErrors = getParser().hasPendingError(); 2080 getParser().clearPendingErrors(); 2081 if (PendingErrors) 2082 return MatchOperand_ParseFail; 2083 if (Result) 2084 return MatchOperand_NoMatch; 2085 return MatchOperand_Success; 2086 } 2087 2088 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2089 RegisterKind RegKind, unsigned Reg1, 2090 SMLoc Loc) { 2091 switch (RegKind) { 2092 case IS_SPECIAL: 2093 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2094 Reg = AMDGPU::EXEC; 2095 RegWidth = 2; 2096 return true; 2097 } 2098 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2099 Reg = AMDGPU::FLAT_SCR; 2100 RegWidth = 2; 2101 return true; 2102 } 2103 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2104 Reg = AMDGPU::XNACK_MASK; 2105 RegWidth = 2; 2106 return true; 2107 } 2108 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2109 Reg = AMDGPU::VCC; 2110 RegWidth = 2; 2111 return true; 2112 } 2113 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2114 Reg = AMDGPU::TBA; 2115 RegWidth = 2; 2116 return true; 2117 } 2118 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2119 Reg = AMDGPU::TMA; 2120 RegWidth = 2; 2121 return true; 2122 } 2123 Error(Loc, "register does not fit in the list"); 2124 return false; 2125 case IS_VGPR: 2126 case IS_SGPR: 2127 case IS_AGPR: 2128 case IS_TTMP: 2129 if (Reg1 != Reg + RegWidth) { 2130 Error(Loc, "registers in a list must have consecutive indices"); 2131 return false; 2132 } 2133 RegWidth++; 2134 return true; 2135 default: 2136 llvm_unreachable("unexpected register kind"); 2137 } 2138 } 2139 2140 struct RegInfo { 2141 StringLiteral Name; 2142 RegisterKind Kind; 2143 }; 2144 2145 static constexpr RegInfo RegularRegisters[] = { 2146 {{"v"}, IS_VGPR}, 2147 {{"s"}, IS_SGPR}, 2148 {{"ttmp"}, IS_TTMP}, 2149 {{"acc"}, IS_AGPR}, 2150 {{"a"}, IS_AGPR}, 2151 }; 2152 2153 static bool isRegularReg(RegisterKind Kind) { 2154 return Kind == IS_VGPR || 2155 Kind == IS_SGPR || 2156 Kind == IS_TTMP || 2157 Kind == IS_AGPR; 2158 } 2159 2160 static const RegInfo* getRegularRegInfo(StringRef Str) { 2161 for (const RegInfo &Reg : RegularRegisters) 2162 if (Str.startswith(Reg.Name)) 2163 return &Reg; 2164 return nullptr; 2165 } 2166 2167 static bool getRegNum(StringRef Str, unsigned& Num) { 2168 return !Str.getAsInteger(10, Num); 2169 } 2170 2171 bool 2172 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2173 const AsmToken &NextToken) const { 2174 2175 // A list of consecutive registers: [s0,s1,s2,s3] 2176 if (Token.is(AsmToken::LBrac)) 2177 return true; 2178 2179 if (!Token.is(AsmToken::Identifier)) 2180 return false; 2181 2182 // A single register like s0 or a range of registers like s[0:1] 2183 2184 StringRef Str = Token.getString(); 2185 const RegInfo *Reg = getRegularRegInfo(Str); 2186 if (Reg) { 2187 StringRef RegName = Reg->Name; 2188 StringRef RegSuffix = Str.substr(RegName.size()); 2189 if (!RegSuffix.empty()) { 2190 unsigned Num; 2191 // A single register with an index: rXX 2192 if (getRegNum(RegSuffix, Num)) 2193 return true; 2194 } else { 2195 // A range of registers: r[XX:YY]. 2196 if (NextToken.is(AsmToken::LBrac)) 2197 return true; 2198 } 2199 } 2200 2201 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2202 } 2203 2204 bool 2205 AMDGPUAsmParser::isRegister() 2206 { 2207 return isRegister(getToken(), peekToken()); 2208 } 2209 2210 unsigned 2211 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2212 unsigned RegNum, 2213 unsigned RegWidth, 2214 SMLoc Loc) { 2215 2216 assert(isRegularReg(RegKind)); 2217 2218 unsigned AlignSize = 1; 2219 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2220 // SGPR and TTMP registers must be aligned. 2221 // Max required alignment is 4 dwords. 2222 AlignSize = std::min(RegWidth, 4u); 2223 } 2224 2225 if (RegNum % AlignSize != 0) { 2226 Error(Loc, "invalid register alignment"); 2227 return AMDGPU::NoRegister; 2228 } 2229 2230 unsigned RegIdx = RegNum / AlignSize; 2231 int RCID = getRegClass(RegKind, RegWidth); 2232 if (RCID == -1) { 2233 Error(Loc, "invalid or unsupported register size"); 2234 return AMDGPU::NoRegister; 2235 } 2236 2237 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2238 const MCRegisterClass RC = TRI->getRegClass(RCID); 2239 if (RegIdx >= RC.getNumRegs()) { 2240 Error(Loc, "register index is out of range"); 2241 return AMDGPU::NoRegister; 2242 } 2243 2244 return RC.getRegister(RegIdx); 2245 } 2246 2247 bool 2248 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2249 int64_t RegLo, RegHi; 2250 if (!skipToken(AsmToken::LBrac, "missing register index")) 2251 return false; 2252 2253 SMLoc FirstIdxLoc = getLoc(); 2254 SMLoc SecondIdxLoc; 2255 2256 if (!parseExpr(RegLo)) 2257 return false; 2258 2259 if (trySkipToken(AsmToken::Colon)) { 2260 SecondIdxLoc = getLoc(); 2261 if (!parseExpr(RegHi)) 2262 return false; 2263 } else { 2264 RegHi = RegLo; 2265 } 2266 2267 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2268 return false; 2269 2270 if (!isUInt<32>(RegLo)) { 2271 Error(FirstIdxLoc, "invalid register index"); 2272 return false; 2273 } 2274 2275 if (!isUInt<32>(RegHi)) { 2276 Error(SecondIdxLoc, "invalid register index"); 2277 return false; 2278 } 2279 2280 if (RegLo > RegHi) { 2281 Error(FirstIdxLoc, "first register index should not exceed second index"); 2282 return false; 2283 } 2284 2285 Num = static_cast<unsigned>(RegLo); 2286 Width = (RegHi - RegLo) + 1; 2287 return true; 2288 } 2289 2290 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2291 unsigned &RegNum, unsigned &RegWidth, 2292 SmallVectorImpl<AsmToken> &Tokens) { 2293 assert(isToken(AsmToken::Identifier)); 2294 unsigned Reg = getSpecialRegForName(getTokenStr()); 2295 if (Reg) { 2296 RegNum = 0; 2297 RegWidth = 1; 2298 RegKind = IS_SPECIAL; 2299 Tokens.push_back(getToken()); 2300 lex(); // skip register name 2301 } 2302 return Reg; 2303 } 2304 2305 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2306 unsigned &RegNum, unsigned &RegWidth, 2307 SmallVectorImpl<AsmToken> &Tokens) { 2308 assert(isToken(AsmToken::Identifier)); 2309 StringRef RegName = getTokenStr(); 2310 auto Loc = getLoc(); 2311 2312 const RegInfo *RI = getRegularRegInfo(RegName); 2313 if (!RI) { 2314 Error(Loc, "invalid register name"); 2315 return AMDGPU::NoRegister; 2316 } 2317 2318 Tokens.push_back(getToken()); 2319 lex(); // skip register name 2320 2321 RegKind = RI->Kind; 2322 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2323 if (!RegSuffix.empty()) { 2324 // Single 32-bit register: vXX. 2325 if (!getRegNum(RegSuffix, RegNum)) { 2326 Error(Loc, "invalid register index"); 2327 return AMDGPU::NoRegister; 2328 } 2329 RegWidth = 1; 2330 } else { 2331 // Range of registers: v[XX:YY]. ":YY" is optional. 2332 if (!ParseRegRange(RegNum, RegWidth)) 2333 return AMDGPU::NoRegister; 2334 } 2335 2336 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2337 } 2338 2339 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2340 unsigned &RegWidth, 2341 SmallVectorImpl<AsmToken> &Tokens) { 2342 unsigned Reg = AMDGPU::NoRegister; 2343 auto ListLoc = getLoc(); 2344 2345 if (!skipToken(AsmToken::LBrac, 2346 "expected a register or a list of registers")) { 2347 return AMDGPU::NoRegister; 2348 } 2349 2350 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2351 2352 auto Loc = getLoc(); 2353 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2354 return AMDGPU::NoRegister; 2355 if (RegWidth != 1) { 2356 Error(Loc, "expected a single 32-bit register"); 2357 return AMDGPU::NoRegister; 2358 } 2359 2360 for (; trySkipToken(AsmToken::Comma); ) { 2361 RegisterKind NextRegKind; 2362 unsigned NextReg, NextRegNum, NextRegWidth; 2363 Loc = getLoc(); 2364 2365 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2366 NextRegNum, NextRegWidth, 2367 Tokens)) { 2368 return AMDGPU::NoRegister; 2369 } 2370 if (NextRegWidth != 1) { 2371 Error(Loc, "expected a single 32-bit register"); 2372 return AMDGPU::NoRegister; 2373 } 2374 if (NextRegKind != RegKind) { 2375 Error(Loc, "registers in a list must be of the same kind"); 2376 return AMDGPU::NoRegister; 2377 } 2378 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2379 return AMDGPU::NoRegister; 2380 } 2381 2382 if (!skipToken(AsmToken::RBrac, 2383 "expected a comma or a closing square bracket")) { 2384 return AMDGPU::NoRegister; 2385 } 2386 2387 if (isRegularReg(RegKind)) 2388 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2389 2390 return Reg; 2391 } 2392 2393 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2394 unsigned &RegNum, unsigned &RegWidth, 2395 SmallVectorImpl<AsmToken> &Tokens) { 2396 auto Loc = getLoc(); 2397 Reg = AMDGPU::NoRegister; 2398 2399 if (isToken(AsmToken::Identifier)) { 2400 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2401 if (Reg == AMDGPU::NoRegister) 2402 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2403 } else { 2404 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2405 } 2406 2407 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2408 if (Reg == AMDGPU::NoRegister) { 2409 assert(Parser.hasPendingError()); 2410 return false; 2411 } 2412 2413 if (!subtargetHasRegister(*TRI, Reg)) { 2414 if (Reg == AMDGPU::SGPR_NULL) { 2415 Error(Loc, "'null' operand is not supported on this GPU"); 2416 } else { 2417 Error(Loc, "register not available on this GPU"); 2418 } 2419 return false; 2420 } 2421 2422 return true; 2423 } 2424 2425 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2426 unsigned &RegNum, unsigned &RegWidth, 2427 bool RestoreOnFailure /*=false*/) { 2428 Reg = AMDGPU::NoRegister; 2429 2430 SmallVector<AsmToken, 1> Tokens; 2431 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2432 if (RestoreOnFailure) { 2433 while (!Tokens.empty()) { 2434 getLexer().UnLex(Tokens.pop_back_val()); 2435 } 2436 } 2437 return true; 2438 } 2439 return false; 2440 } 2441 2442 Optional<StringRef> 2443 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2444 switch (RegKind) { 2445 case IS_VGPR: 2446 return StringRef(".amdgcn.next_free_vgpr"); 2447 case IS_SGPR: 2448 return StringRef(".amdgcn.next_free_sgpr"); 2449 default: 2450 return None; 2451 } 2452 } 2453 2454 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2455 auto SymbolName = getGprCountSymbolName(RegKind); 2456 assert(SymbolName && "initializing invalid register kind"); 2457 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2458 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2459 } 2460 2461 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2462 unsigned DwordRegIndex, 2463 unsigned RegWidth) { 2464 // Symbols are only defined for GCN targets 2465 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2466 return true; 2467 2468 auto SymbolName = getGprCountSymbolName(RegKind); 2469 if (!SymbolName) 2470 return true; 2471 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2472 2473 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2474 int64_t OldCount; 2475 2476 if (!Sym->isVariable()) 2477 return !Error(getParser().getTok().getLoc(), 2478 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2479 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2480 return !Error( 2481 getParser().getTok().getLoc(), 2482 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2483 2484 if (OldCount <= NewMax) 2485 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2486 2487 return true; 2488 } 2489 2490 std::unique_ptr<AMDGPUOperand> 2491 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2492 const auto &Tok = Parser.getTok(); 2493 SMLoc StartLoc = Tok.getLoc(); 2494 SMLoc EndLoc = Tok.getEndLoc(); 2495 RegisterKind RegKind; 2496 unsigned Reg, RegNum, RegWidth; 2497 2498 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2499 return nullptr; 2500 } 2501 if (isHsaAbiVersion3(&getSTI())) { 2502 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2503 return nullptr; 2504 } else 2505 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2506 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2507 } 2508 2509 OperandMatchResultTy 2510 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2511 // TODO: add syntactic sugar for 1/(2*PI) 2512 2513 assert(!isRegister()); 2514 assert(!isModifier()); 2515 2516 const auto& Tok = getToken(); 2517 const auto& NextTok = peekToken(); 2518 bool IsReal = Tok.is(AsmToken::Real); 2519 SMLoc S = getLoc(); 2520 bool Negate = false; 2521 2522 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2523 lex(); 2524 IsReal = true; 2525 Negate = true; 2526 } 2527 2528 if (IsReal) { 2529 // Floating-point expressions are not supported. 2530 // Can only allow floating-point literals with an 2531 // optional sign. 2532 2533 StringRef Num = getTokenStr(); 2534 lex(); 2535 2536 APFloat RealVal(APFloat::IEEEdouble()); 2537 auto roundMode = APFloat::rmNearestTiesToEven; 2538 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2539 return MatchOperand_ParseFail; 2540 } 2541 if (Negate) 2542 RealVal.changeSign(); 2543 2544 Operands.push_back( 2545 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2546 AMDGPUOperand::ImmTyNone, true)); 2547 2548 return MatchOperand_Success; 2549 2550 } else { 2551 int64_t IntVal; 2552 const MCExpr *Expr; 2553 SMLoc S = getLoc(); 2554 2555 if (HasSP3AbsModifier) { 2556 // This is a workaround for handling expressions 2557 // as arguments of SP3 'abs' modifier, for example: 2558 // |1.0| 2559 // |-1| 2560 // |1+x| 2561 // This syntax is not compatible with syntax of standard 2562 // MC expressions (due to the trailing '|'). 2563 SMLoc EndLoc; 2564 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2565 return MatchOperand_ParseFail; 2566 } else { 2567 if (Parser.parseExpression(Expr)) 2568 return MatchOperand_ParseFail; 2569 } 2570 2571 if (Expr->evaluateAsAbsolute(IntVal)) { 2572 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2573 } else { 2574 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2575 } 2576 2577 return MatchOperand_Success; 2578 } 2579 2580 return MatchOperand_NoMatch; 2581 } 2582 2583 OperandMatchResultTy 2584 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2585 if (!isRegister()) 2586 return MatchOperand_NoMatch; 2587 2588 if (auto R = parseRegister()) { 2589 assert(R->isReg()); 2590 Operands.push_back(std::move(R)); 2591 return MatchOperand_Success; 2592 } 2593 return MatchOperand_ParseFail; 2594 } 2595 2596 OperandMatchResultTy 2597 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2598 auto res = parseReg(Operands); 2599 if (res != MatchOperand_NoMatch) { 2600 return res; 2601 } else if (isModifier()) { 2602 return MatchOperand_NoMatch; 2603 } else { 2604 return parseImm(Operands, HasSP3AbsMod); 2605 } 2606 } 2607 2608 bool 2609 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2610 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2611 const auto &str = Token.getString(); 2612 return str == "abs" || str == "neg" || str == "sext"; 2613 } 2614 return false; 2615 } 2616 2617 bool 2618 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2619 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2620 } 2621 2622 bool 2623 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2624 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2625 } 2626 2627 bool 2628 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2629 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2630 } 2631 2632 // Check if this is an operand modifier or an opcode modifier 2633 // which may look like an expression but it is not. We should 2634 // avoid parsing these modifiers as expressions. Currently 2635 // recognized sequences are: 2636 // |...| 2637 // abs(...) 2638 // neg(...) 2639 // sext(...) 2640 // -reg 2641 // -|...| 2642 // -abs(...) 2643 // name:... 2644 // Note that simple opcode modifiers like 'gds' may be parsed as 2645 // expressions; this is a special case. See getExpressionAsToken. 2646 // 2647 bool 2648 AMDGPUAsmParser::isModifier() { 2649 2650 AsmToken Tok = getToken(); 2651 AsmToken NextToken[2]; 2652 peekTokens(NextToken); 2653 2654 return isOperandModifier(Tok, NextToken[0]) || 2655 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2656 isOpcodeModifierWithVal(Tok, NextToken[0]); 2657 } 2658 2659 // Check if the current token is an SP3 'neg' modifier. 2660 // Currently this modifier is allowed in the following context: 2661 // 2662 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2663 // 2. Before an 'abs' modifier: -abs(...) 2664 // 3. Before an SP3 'abs' modifier: -|...| 2665 // 2666 // In all other cases "-" is handled as a part 2667 // of an expression that follows the sign. 2668 // 2669 // Note: When "-" is followed by an integer literal, 2670 // this is interpreted as integer negation rather 2671 // than a floating-point NEG modifier applied to N. 2672 // Beside being contr-intuitive, such use of floating-point 2673 // NEG modifier would have resulted in different meaning 2674 // of integer literals used with VOP1/2/C and VOP3, 2675 // for example: 2676 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2677 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2678 // Negative fp literals with preceding "-" are 2679 // handled likewise for unifomtity 2680 // 2681 bool 2682 AMDGPUAsmParser::parseSP3NegModifier() { 2683 2684 AsmToken NextToken[2]; 2685 peekTokens(NextToken); 2686 2687 if (isToken(AsmToken::Minus) && 2688 (isRegister(NextToken[0], NextToken[1]) || 2689 NextToken[0].is(AsmToken::Pipe) || 2690 isId(NextToken[0], "abs"))) { 2691 lex(); 2692 return true; 2693 } 2694 2695 return false; 2696 } 2697 2698 OperandMatchResultTy 2699 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2700 bool AllowImm) { 2701 bool Neg, SP3Neg; 2702 bool Abs, SP3Abs; 2703 SMLoc Loc; 2704 2705 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2706 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2707 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2708 return MatchOperand_ParseFail; 2709 } 2710 2711 SP3Neg = parseSP3NegModifier(); 2712 2713 Loc = getLoc(); 2714 Neg = trySkipId("neg"); 2715 if (Neg && SP3Neg) { 2716 Error(Loc, "expected register or immediate"); 2717 return MatchOperand_ParseFail; 2718 } 2719 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2720 return MatchOperand_ParseFail; 2721 2722 Abs = trySkipId("abs"); 2723 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2724 return MatchOperand_ParseFail; 2725 2726 Loc = getLoc(); 2727 SP3Abs = trySkipToken(AsmToken::Pipe); 2728 if (Abs && SP3Abs) { 2729 Error(Loc, "expected register or immediate"); 2730 return MatchOperand_ParseFail; 2731 } 2732 2733 OperandMatchResultTy Res; 2734 if (AllowImm) { 2735 Res = parseRegOrImm(Operands, SP3Abs); 2736 } else { 2737 Res = parseReg(Operands); 2738 } 2739 if (Res != MatchOperand_Success) { 2740 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2741 } 2742 2743 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2744 return MatchOperand_ParseFail; 2745 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2746 return MatchOperand_ParseFail; 2747 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2748 return MatchOperand_ParseFail; 2749 2750 AMDGPUOperand::Modifiers Mods; 2751 Mods.Abs = Abs || SP3Abs; 2752 Mods.Neg = Neg || SP3Neg; 2753 2754 if (Mods.hasFPModifiers()) { 2755 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2756 if (Op.isExpr()) { 2757 Error(Op.getStartLoc(), "expected an absolute expression"); 2758 return MatchOperand_ParseFail; 2759 } 2760 Op.setModifiers(Mods); 2761 } 2762 return MatchOperand_Success; 2763 } 2764 2765 OperandMatchResultTy 2766 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2767 bool AllowImm) { 2768 bool Sext = trySkipId("sext"); 2769 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2770 return MatchOperand_ParseFail; 2771 2772 OperandMatchResultTy Res; 2773 if (AllowImm) { 2774 Res = parseRegOrImm(Operands); 2775 } else { 2776 Res = parseReg(Operands); 2777 } 2778 if (Res != MatchOperand_Success) { 2779 return Sext? MatchOperand_ParseFail : Res; 2780 } 2781 2782 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2783 return MatchOperand_ParseFail; 2784 2785 AMDGPUOperand::Modifiers Mods; 2786 Mods.Sext = Sext; 2787 2788 if (Mods.hasIntModifiers()) { 2789 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2790 if (Op.isExpr()) { 2791 Error(Op.getStartLoc(), "expected an absolute expression"); 2792 return MatchOperand_ParseFail; 2793 } 2794 Op.setModifiers(Mods); 2795 } 2796 2797 return MatchOperand_Success; 2798 } 2799 2800 OperandMatchResultTy 2801 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2802 return parseRegOrImmWithFPInputMods(Operands, false); 2803 } 2804 2805 OperandMatchResultTy 2806 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2807 return parseRegOrImmWithIntInputMods(Operands, false); 2808 } 2809 2810 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2811 auto Loc = getLoc(); 2812 if (trySkipId("off")) { 2813 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2814 AMDGPUOperand::ImmTyOff, false)); 2815 return MatchOperand_Success; 2816 } 2817 2818 if (!isRegister()) 2819 return MatchOperand_NoMatch; 2820 2821 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2822 if (Reg) { 2823 Operands.push_back(std::move(Reg)); 2824 return MatchOperand_Success; 2825 } 2826 2827 return MatchOperand_ParseFail; 2828 2829 } 2830 2831 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2832 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2833 2834 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2835 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2836 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2837 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2838 return Match_InvalidOperand; 2839 2840 if ((TSFlags & SIInstrFlags::VOP3) && 2841 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2842 getForcedEncodingSize() != 64) 2843 return Match_PreferE32; 2844 2845 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2846 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2847 // v_mac_f32/16 allow only dst_sel == DWORD; 2848 auto OpNum = 2849 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2850 const auto &Op = Inst.getOperand(OpNum); 2851 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2852 return Match_InvalidOperand; 2853 } 2854 } 2855 2856 return Match_Success; 2857 } 2858 2859 static ArrayRef<unsigned> getAllVariants() { 2860 static const unsigned Variants[] = { 2861 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2862 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2863 }; 2864 2865 return makeArrayRef(Variants); 2866 } 2867 2868 // What asm variants we should check 2869 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2870 if (getForcedEncodingSize() == 32) { 2871 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2872 return makeArrayRef(Variants); 2873 } 2874 2875 if (isForcedVOP3()) { 2876 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2877 return makeArrayRef(Variants); 2878 } 2879 2880 if (isForcedSDWA()) { 2881 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2882 AMDGPUAsmVariants::SDWA9}; 2883 return makeArrayRef(Variants); 2884 } 2885 2886 if (isForcedDPP()) { 2887 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2888 return makeArrayRef(Variants); 2889 } 2890 2891 return getAllVariants(); 2892 } 2893 2894 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2895 if (getForcedEncodingSize() == 32) 2896 return "e32"; 2897 2898 if (isForcedVOP3()) 2899 return "e64"; 2900 2901 if (isForcedSDWA()) 2902 return "sdwa"; 2903 2904 if (isForcedDPP()) 2905 return "dpp"; 2906 2907 return ""; 2908 } 2909 2910 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2911 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2912 const unsigned Num = Desc.getNumImplicitUses(); 2913 for (unsigned i = 0; i < Num; ++i) { 2914 unsigned Reg = Desc.ImplicitUses[i]; 2915 switch (Reg) { 2916 case AMDGPU::FLAT_SCR: 2917 case AMDGPU::VCC: 2918 case AMDGPU::VCC_LO: 2919 case AMDGPU::VCC_HI: 2920 case AMDGPU::M0: 2921 return Reg; 2922 default: 2923 break; 2924 } 2925 } 2926 return AMDGPU::NoRegister; 2927 } 2928 2929 // NB: This code is correct only when used to check constant 2930 // bus limitations because GFX7 support no f16 inline constants. 2931 // Note that there are no cases when a GFX7 opcode violates 2932 // constant bus limitations due to the use of an f16 constant. 2933 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2934 unsigned OpIdx) const { 2935 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2936 2937 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2938 return false; 2939 } 2940 2941 const MCOperand &MO = Inst.getOperand(OpIdx); 2942 2943 int64_t Val = MO.getImm(); 2944 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2945 2946 switch (OpSize) { // expected operand size 2947 case 8: 2948 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2949 case 4: 2950 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2951 case 2: { 2952 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2953 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2954 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2955 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2956 return AMDGPU::isInlinableIntLiteral(Val); 2957 2958 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2959 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2960 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2961 return AMDGPU::isInlinableIntLiteralV216(Val); 2962 2963 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2964 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2965 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2966 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2967 2968 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2969 } 2970 default: 2971 llvm_unreachable("invalid operand size"); 2972 } 2973 } 2974 2975 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2976 if (!isGFX10()) 2977 return 1; 2978 2979 switch (Opcode) { 2980 // 64-bit shift instructions can use only one scalar value input 2981 case AMDGPU::V_LSHLREV_B64: 2982 case AMDGPU::V_LSHLREV_B64_gfx10: 2983 case AMDGPU::V_LSHL_B64: 2984 case AMDGPU::V_LSHRREV_B64: 2985 case AMDGPU::V_LSHRREV_B64_gfx10: 2986 case AMDGPU::V_LSHR_B64: 2987 case AMDGPU::V_ASHRREV_I64: 2988 case AMDGPU::V_ASHRREV_I64_gfx10: 2989 case AMDGPU::V_ASHR_I64: 2990 return 1; 2991 default: 2992 return 2; 2993 } 2994 } 2995 2996 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2997 const MCOperand &MO = Inst.getOperand(OpIdx); 2998 if (MO.isImm()) { 2999 return !isInlineConstant(Inst, OpIdx); 3000 } else if (MO.isReg()) { 3001 auto Reg = MO.getReg(); 3002 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3003 auto PReg = mc2PseudoReg(Reg); 3004 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3005 } else { 3006 return true; 3007 } 3008 } 3009 3010 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 3011 const unsigned Opcode = Inst.getOpcode(); 3012 const MCInstrDesc &Desc = MII.get(Opcode); 3013 unsigned ConstantBusUseCount = 0; 3014 unsigned NumLiterals = 0; 3015 unsigned LiteralSize; 3016 3017 if (Desc.TSFlags & 3018 (SIInstrFlags::VOPC | 3019 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3020 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3021 SIInstrFlags::SDWA)) { 3022 // Check special imm operands (used by madmk, etc) 3023 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3024 ++ConstantBusUseCount; 3025 } 3026 3027 SmallDenseSet<unsigned> SGPRsUsed; 3028 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3029 if (SGPRUsed != AMDGPU::NoRegister) { 3030 SGPRsUsed.insert(SGPRUsed); 3031 ++ConstantBusUseCount; 3032 } 3033 3034 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3035 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3036 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3037 3038 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3039 3040 for (int OpIdx : OpIndices) { 3041 if (OpIdx == -1) break; 3042 3043 const MCOperand &MO = Inst.getOperand(OpIdx); 3044 if (usesConstantBus(Inst, OpIdx)) { 3045 if (MO.isReg()) { 3046 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3047 // Pairs of registers with a partial intersections like these 3048 // s0, s[0:1] 3049 // flat_scratch_lo, flat_scratch 3050 // flat_scratch_lo, flat_scratch_hi 3051 // are theoretically valid but they are disabled anyway. 3052 // Note that this code mimics SIInstrInfo::verifyInstruction 3053 if (!SGPRsUsed.count(Reg)) { 3054 SGPRsUsed.insert(Reg); 3055 ++ConstantBusUseCount; 3056 } 3057 } else { // Expression or a literal 3058 3059 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3060 continue; // special operand like VINTERP attr_chan 3061 3062 // An instruction may use only one literal. 3063 // This has been validated on the previous step. 3064 // See validateVOP3Literal. 3065 // This literal may be used as more than one operand. 3066 // If all these operands are of the same size, 3067 // this literal counts as one scalar value. 3068 // Otherwise it counts as 2 scalar values. 3069 // See "GFX10 Shader Programming", section 3.6.2.3. 3070 3071 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3072 if (Size < 4) Size = 4; 3073 3074 if (NumLiterals == 0) { 3075 NumLiterals = 1; 3076 LiteralSize = Size; 3077 } else if (LiteralSize != Size) { 3078 NumLiterals = 2; 3079 } 3080 } 3081 } 3082 } 3083 } 3084 ConstantBusUseCount += NumLiterals; 3085 3086 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3087 } 3088 3089 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3090 const unsigned Opcode = Inst.getOpcode(); 3091 const MCInstrDesc &Desc = MII.get(Opcode); 3092 3093 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3094 if (DstIdx == -1 || 3095 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3096 return true; 3097 } 3098 3099 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3100 3101 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3102 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3103 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3104 3105 assert(DstIdx != -1); 3106 const MCOperand &Dst = Inst.getOperand(DstIdx); 3107 assert(Dst.isReg()); 3108 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3109 3110 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3111 3112 for (int SrcIdx : SrcIndices) { 3113 if (SrcIdx == -1) break; 3114 const MCOperand &Src = Inst.getOperand(SrcIdx); 3115 if (Src.isReg()) { 3116 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3117 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3118 return false; 3119 } 3120 } 3121 } 3122 3123 return true; 3124 } 3125 3126 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3127 3128 const unsigned Opc = Inst.getOpcode(); 3129 const MCInstrDesc &Desc = MII.get(Opc); 3130 3131 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3132 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3133 assert(ClampIdx != -1); 3134 return Inst.getOperand(ClampIdx).getImm() == 0; 3135 } 3136 3137 return true; 3138 } 3139 3140 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3141 3142 const unsigned Opc = Inst.getOpcode(); 3143 const MCInstrDesc &Desc = MII.get(Opc); 3144 3145 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3146 return true; 3147 3148 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3149 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3150 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3151 3152 assert(VDataIdx != -1); 3153 3154 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3155 return true; 3156 3157 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3158 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3159 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3160 if (DMask == 0) 3161 DMask = 1; 3162 3163 unsigned DataSize = 3164 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3165 if (hasPackedD16()) { 3166 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3167 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3168 DataSize = (DataSize + 1) / 2; 3169 } 3170 3171 return (VDataSize / 4) == DataSize + TFESize; 3172 } 3173 3174 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3175 const unsigned Opc = Inst.getOpcode(); 3176 const MCInstrDesc &Desc = MII.get(Opc); 3177 3178 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3179 return true; 3180 3181 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3182 3183 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3184 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3185 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3186 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3187 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3188 3189 assert(VAddr0Idx != -1); 3190 assert(SrsrcIdx != -1); 3191 assert(SrsrcIdx > VAddr0Idx); 3192 3193 if (DimIdx == -1) 3194 return true; // intersect_ray 3195 3196 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3197 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3198 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3199 unsigned VAddrSize = 3200 IsNSA ? SrsrcIdx - VAddr0Idx 3201 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3202 3203 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3204 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3205 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3206 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3207 if (!IsNSA) { 3208 if (AddrSize > 8) 3209 AddrSize = 16; 3210 else if (AddrSize > 4) 3211 AddrSize = 8; 3212 } 3213 3214 return VAddrSize == AddrSize; 3215 } 3216 3217 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3218 3219 const unsigned Opc = Inst.getOpcode(); 3220 const MCInstrDesc &Desc = MII.get(Opc); 3221 3222 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3223 return true; 3224 if (!Desc.mayLoad() || !Desc.mayStore()) 3225 return true; // Not atomic 3226 3227 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3228 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3229 3230 // This is an incomplete check because image_atomic_cmpswap 3231 // may only use 0x3 and 0xf while other atomic operations 3232 // may use 0x1 and 0x3. However these limitations are 3233 // verified when we check that dmask matches dst size. 3234 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3235 } 3236 3237 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3238 3239 const unsigned Opc = Inst.getOpcode(); 3240 const MCInstrDesc &Desc = MII.get(Opc); 3241 3242 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3243 return true; 3244 3245 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3246 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3247 3248 // GATHER4 instructions use dmask in a different fashion compared to 3249 // other MIMG instructions. The only useful DMASK values are 3250 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3251 // (red,red,red,red) etc.) The ISA document doesn't mention 3252 // this. 3253 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3254 } 3255 3256 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3257 { 3258 switch (Opcode) { 3259 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3260 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3261 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3262 return true; 3263 default: 3264 return false; 3265 } 3266 } 3267 3268 // movrels* opcodes should only allow VGPRS as src0. 3269 // This is specified in .td description for vop1/vop3, 3270 // but sdwa is handled differently. See isSDWAOperand. 3271 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3272 3273 const unsigned Opc = Inst.getOpcode(); 3274 const MCInstrDesc &Desc = MII.get(Opc); 3275 3276 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3277 return true; 3278 3279 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3280 assert(Src0Idx != -1); 3281 3282 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3283 if (!Src0.isReg()) 3284 return false; 3285 3286 auto Reg = Src0.getReg(); 3287 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3288 return !isSGPR(mc2PseudoReg(Reg), TRI); 3289 } 3290 3291 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3292 3293 const unsigned Opc = Inst.getOpcode(); 3294 3295 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3296 return true; 3297 3298 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3299 assert(Src0Idx != -1); 3300 3301 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3302 if (!Src0.isReg()) 3303 return true; 3304 3305 auto Reg = Src0.getReg(); 3306 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3307 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3308 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3309 return false; 3310 } 3311 3312 return true; 3313 } 3314 3315 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3316 switch (Inst.getOpcode()) { 3317 default: 3318 return true; 3319 case V_DIV_SCALE_F32_gfx6_gfx7: 3320 case V_DIV_SCALE_F32_vi: 3321 case V_DIV_SCALE_F32_gfx10: 3322 case V_DIV_SCALE_F64_gfx6_gfx7: 3323 case V_DIV_SCALE_F64_vi: 3324 case V_DIV_SCALE_F64_gfx10: 3325 break; 3326 } 3327 3328 // TODO: Check that src0 = src1 or src2. 3329 3330 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3331 AMDGPU::OpName::src2_modifiers, 3332 AMDGPU::OpName::src2_modifiers}) { 3333 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3334 .getImm() & 3335 SISrcMods::ABS) { 3336 Error(getLoc(), "ABS not allowed in VOP3B instructions"); 3337 return false; 3338 } 3339 } 3340 3341 return true; 3342 } 3343 3344 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3345 3346 const unsigned Opc = Inst.getOpcode(); 3347 const MCInstrDesc &Desc = MII.get(Opc); 3348 3349 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3350 return true; 3351 3352 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3353 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3354 if (isCI() || isSI()) 3355 return false; 3356 } 3357 3358 return true; 3359 } 3360 3361 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3362 const unsigned Opc = Inst.getOpcode(); 3363 const MCInstrDesc &Desc = MII.get(Opc); 3364 3365 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3366 return true; 3367 3368 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3369 if (DimIdx < 0) 3370 return true; 3371 3372 long Imm = Inst.getOperand(DimIdx).getImm(); 3373 if (Imm < 0 || Imm >= 8) 3374 return false; 3375 3376 return true; 3377 } 3378 3379 static bool IsRevOpcode(const unsigned Opcode) 3380 { 3381 switch (Opcode) { 3382 case AMDGPU::V_SUBREV_F32_e32: 3383 case AMDGPU::V_SUBREV_F32_e64: 3384 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3385 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3386 case AMDGPU::V_SUBREV_F32_e32_vi: 3387 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3388 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3389 case AMDGPU::V_SUBREV_F32_e64_vi: 3390 3391 case AMDGPU::V_SUBREV_CO_U32_e32: 3392 case AMDGPU::V_SUBREV_CO_U32_e64: 3393 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3394 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3395 3396 case AMDGPU::V_SUBBREV_U32_e32: 3397 case AMDGPU::V_SUBBREV_U32_e64: 3398 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3399 case AMDGPU::V_SUBBREV_U32_e32_vi: 3400 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3401 case AMDGPU::V_SUBBREV_U32_e64_vi: 3402 3403 case AMDGPU::V_SUBREV_U32_e32: 3404 case AMDGPU::V_SUBREV_U32_e64: 3405 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3406 case AMDGPU::V_SUBREV_U32_e32_vi: 3407 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3408 case AMDGPU::V_SUBREV_U32_e64_vi: 3409 3410 case AMDGPU::V_SUBREV_F16_e32: 3411 case AMDGPU::V_SUBREV_F16_e64: 3412 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3413 case AMDGPU::V_SUBREV_F16_e32_vi: 3414 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3415 case AMDGPU::V_SUBREV_F16_e64_vi: 3416 3417 case AMDGPU::V_SUBREV_U16_e32: 3418 case AMDGPU::V_SUBREV_U16_e64: 3419 case AMDGPU::V_SUBREV_U16_e32_vi: 3420 case AMDGPU::V_SUBREV_U16_e64_vi: 3421 3422 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3423 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3424 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3425 3426 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3427 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3428 3429 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3430 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3431 3432 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3433 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3434 3435 case AMDGPU::V_LSHRREV_B32_e32: 3436 case AMDGPU::V_LSHRREV_B32_e64: 3437 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3438 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3439 case AMDGPU::V_LSHRREV_B32_e32_vi: 3440 case AMDGPU::V_LSHRREV_B32_e64_vi: 3441 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3442 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3443 3444 case AMDGPU::V_ASHRREV_I32_e32: 3445 case AMDGPU::V_ASHRREV_I32_e64: 3446 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3447 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3448 case AMDGPU::V_ASHRREV_I32_e32_vi: 3449 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3450 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3451 case AMDGPU::V_ASHRREV_I32_e64_vi: 3452 3453 case AMDGPU::V_LSHLREV_B32_e32: 3454 case AMDGPU::V_LSHLREV_B32_e64: 3455 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3456 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3457 case AMDGPU::V_LSHLREV_B32_e32_vi: 3458 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3459 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3460 case AMDGPU::V_LSHLREV_B32_e64_vi: 3461 3462 case AMDGPU::V_LSHLREV_B16_e32: 3463 case AMDGPU::V_LSHLREV_B16_e64: 3464 case AMDGPU::V_LSHLREV_B16_e32_vi: 3465 case AMDGPU::V_LSHLREV_B16_e64_vi: 3466 case AMDGPU::V_LSHLREV_B16_gfx10: 3467 3468 case AMDGPU::V_LSHRREV_B16_e32: 3469 case AMDGPU::V_LSHRREV_B16_e64: 3470 case AMDGPU::V_LSHRREV_B16_e32_vi: 3471 case AMDGPU::V_LSHRREV_B16_e64_vi: 3472 case AMDGPU::V_LSHRREV_B16_gfx10: 3473 3474 case AMDGPU::V_ASHRREV_I16_e32: 3475 case AMDGPU::V_ASHRREV_I16_e64: 3476 case AMDGPU::V_ASHRREV_I16_e32_vi: 3477 case AMDGPU::V_ASHRREV_I16_e64_vi: 3478 case AMDGPU::V_ASHRREV_I16_gfx10: 3479 3480 case AMDGPU::V_LSHLREV_B64: 3481 case AMDGPU::V_LSHLREV_B64_gfx10: 3482 case AMDGPU::V_LSHLREV_B64_vi: 3483 3484 case AMDGPU::V_LSHRREV_B64: 3485 case AMDGPU::V_LSHRREV_B64_gfx10: 3486 case AMDGPU::V_LSHRREV_B64_vi: 3487 3488 case AMDGPU::V_ASHRREV_I64: 3489 case AMDGPU::V_ASHRREV_I64_gfx10: 3490 case AMDGPU::V_ASHRREV_I64_vi: 3491 3492 case AMDGPU::V_PK_LSHLREV_B16: 3493 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3494 case AMDGPU::V_PK_LSHLREV_B16_vi: 3495 3496 case AMDGPU::V_PK_LSHRREV_B16: 3497 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3498 case AMDGPU::V_PK_LSHRREV_B16_vi: 3499 case AMDGPU::V_PK_ASHRREV_I16: 3500 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3501 case AMDGPU::V_PK_ASHRREV_I16_vi: 3502 return true; 3503 default: 3504 return false; 3505 } 3506 } 3507 3508 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3509 3510 using namespace SIInstrFlags; 3511 const unsigned Opcode = Inst.getOpcode(); 3512 const MCInstrDesc &Desc = MII.get(Opcode); 3513 3514 // lds_direct register is defined so that it can be used 3515 // with 9-bit operands only. Ignore encodings which do not accept these. 3516 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3517 return true; 3518 3519 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3520 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3521 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3522 3523 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3524 3525 // lds_direct cannot be specified as either src1 or src2. 3526 for (int SrcIdx : SrcIndices) { 3527 if (SrcIdx == -1) break; 3528 const MCOperand &Src = Inst.getOperand(SrcIdx); 3529 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3530 return false; 3531 } 3532 } 3533 3534 if (Src0Idx == -1) 3535 return true; 3536 3537 const MCOperand &Src = Inst.getOperand(Src0Idx); 3538 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3539 return true; 3540 3541 // lds_direct is specified as src0. Check additional limitations. 3542 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3543 } 3544 3545 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3546 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3547 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3548 if (Op.isFlatOffset()) 3549 return Op.getStartLoc(); 3550 } 3551 return getLoc(); 3552 } 3553 3554 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3555 const OperandVector &Operands) { 3556 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3557 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3558 return true; 3559 3560 auto Opcode = Inst.getOpcode(); 3561 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3562 assert(OpNum != -1); 3563 3564 const auto &Op = Inst.getOperand(OpNum); 3565 if (!hasFlatOffsets() && Op.getImm() != 0) { 3566 Error(getFlatOffsetLoc(Operands), 3567 "flat offset modifier is not supported on this GPU"); 3568 return false; 3569 } 3570 3571 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3572 // For FLAT segment the offset must be positive; 3573 // MSB is ignored and forced to zero. 3574 unsigned OffsetSize = isGFX9() ? 13 : 12; 3575 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3576 if (!isIntN(OffsetSize, Op.getImm())) { 3577 Error(getFlatOffsetLoc(Operands), 3578 isGFX9() ? "expected a 13-bit signed offset" : 3579 "expected a 12-bit signed offset"); 3580 return false; 3581 } 3582 } else { 3583 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3584 Error(getFlatOffsetLoc(Operands), 3585 isGFX9() ? "expected a 12-bit unsigned offset" : 3586 "expected an 11-bit unsigned offset"); 3587 return false; 3588 } 3589 } 3590 3591 return true; 3592 } 3593 3594 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3595 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3596 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3597 if (Op.isSMEMOffset()) 3598 return Op.getStartLoc(); 3599 } 3600 return getLoc(); 3601 } 3602 3603 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3604 const OperandVector &Operands) { 3605 if (isCI() || isSI()) 3606 return true; 3607 3608 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3609 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3610 return true; 3611 3612 auto Opcode = Inst.getOpcode(); 3613 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3614 if (OpNum == -1) 3615 return true; 3616 3617 const auto &Op = Inst.getOperand(OpNum); 3618 if (!Op.isImm()) 3619 return true; 3620 3621 uint64_t Offset = Op.getImm(); 3622 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3623 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3624 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3625 return true; 3626 3627 Error(getSMEMOffsetLoc(Operands), 3628 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3629 "expected a 21-bit signed offset"); 3630 3631 return false; 3632 } 3633 3634 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3635 unsigned Opcode = Inst.getOpcode(); 3636 const MCInstrDesc &Desc = MII.get(Opcode); 3637 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3638 return true; 3639 3640 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3641 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3642 3643 const int OpIndices[] = { Src0Idx, Src1Idx }; 3644 3645 unsigned NumExprs = 0; 3646 unsigned NumLiterals = 0; 3647 uint32_t LiteralValue; 3648 3649 for (int OpIdx : OpIndices) { 3650 if (OpIdx == -1) break; 3651 3652 const MCOperand &MO = Inst.getOperand(OpIdx); 3653 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3654 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3655 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3656 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3657 if (NumLiterals == 0 || LiteralValue != Value) { 3658 LiteralValue = Value; 3659 ++NumLiterals; 3660 } 3661 } else if (MO.isExpr()) { 3662 ++NumExprs; 3663 } 3664 } 3665 } 3666 3667 return NumLiterals + NumExprs <= 1; 3668 } 3669 3670 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3671 const unsigned Opc = Inst.getOpcode(); 3672 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3673 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3674 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3675 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3676 3677 if (OpSel & ~3) 3678 return false; 3679 } 3680 return true; 3681 } 3682 3683 // Check if VCC register matches wavefront size 3684 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3685 auto FB = getFeatureBits(); 3686 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3687 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3688 } 3689 3690 // VOP3 literal is only allowed in GFX10+ and only one can be used 3691 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3692 unsigned Opcode = Inst.getOpcode(); 3693 const MCInstrDesc &Desc = MII.get(Opcode); 3694 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3695 return true; 3696 3697 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3698 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3699 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3700 3701 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3702 3703 unsigned NumExprs = 0; 3704 unsigned NumLiterals = 0; 3705 uint32_t LiteralValue; 3706 3707 for (int OpIdx : OpIndices) { 3708 if (OpIdx == -1) break; 3709 3710 const MCOperand &MO = Inst.getOperand(OpIdx); 3711 if (!MO.isImm() && !MO.isExpr()) 3712 continue; 3713 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3714 continue; 3715 3716 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3717 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3718 return false; 3719 3720 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3721 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3722 if (NumLiterals == 0 || LiteralValue != Value) { 3723 LiteralValue = Value; 3724 ++NumLiterals; 3725 } 3726 } else if (MO.isExpr()) { 3727 ++NumExprs; 3728 } 3729 } 3730 NumLiterals += NumExprs; 3731 3732 return !NumLiterals || 3733 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3734 } 3735 3736 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3737 const OperandVector &Operands, 3738 const SMLoc &IDLoc) { 3739 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3740 AMDGPU::OpName::glc1); 3741 if (GLCPos != -1) { 3742 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3743 // in the asm string, and the default value means it is not present. 3744 if (Inst.getOperand(GLCPos).getImm() == -1) { 3745 Error(IDLoc, "instruction must use glc"); 3746 return false; 3747 } 3748 } 3749 3750 return true; 3751 } 3752 3753 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3754 const SMLoc &IDLoc, 3755 const OperandVector &Operands) { 3756 if (!validateLdsDirect(Inst)) { 3757 Error(IDLoc, 3758 "invalid use of lds_direct"); 3759 return false; 3760 } 3761 if (!validateSOPLiteral(Inst)) { 3762 Error(IDLoc, 3763 "only one literal operand is allowed"); 3764 return false; 3765 } 3766 if (!validateVOP3Literal(Inst)) { 3767 Error(IDLoc, 3768 "invalid literal operand"); 3769 return false; 3770 } 3771 if (!validateConstantBusLimitations(Inst)) { 3772 Error(IDLoc, 3773 "invalid operand (violates constant bus restrictions)"); 3774 return false; 3775 } 3776 if (!validateEarlyClobberLimitations(Inst)) { 3777 Error(IDLoc, 3778 "destination must be different than all sources"); 3779 return false; 3780 } 3781 if (!validateIntClampSupported(Inst)) { 3782 Error(IDLoc, 3783 "integer clamping is not supported on this GPU"); 3784 return false; 3785 } 3786 if (!validateOpSel(Inst)) { 3787 Error(IDLoc, 3788 "invalid op_sel operand"); 3789 return false; 3790 } 3791 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3792 if (!validateMIMGD16(Inst)) { 3793 Error(IDLoc, 3794 "d16 modifier is not supported on this GPU"); 3795 return false; 3796 } 3797 if (!validateMIMGDim(Inst)) { 3798 Error(IDLoc, "dim modifier is required on this GPU"); 3799 return false; 3800 } 3801 if (!validateMIMGDataSize(Inst)) { 3802 Error(IDLoc, 3803 "image data size does not match dmask and tfe"); 3804 return false; 3805 } 3806 if (!validateMIMGAddrSize(Inst)) { 3807 Error(IDLoc, 3808 "image address size does not match dim and a16"); 3809 return false; 3810 } 3811 if (!validateMIMGAtomicDMask(Inst)) { 3812 Error(IDLoc, 3813 "invalid atomic image dmask"); 3814 return false; 3815 } 3816 if (!validateMIMGGatherDMask(Inst)) { 3817 Error(IDLoc, 3818 "invalid image_gather dmask: only one bit must be set"); 3819 return false; 3820 } 3821 if (!validateMovrels(Inst)) { 3822 Error(IDLoc, "source operand must be a VGPR"); 3823 return false; 3824 } 3825 if (!validateFlatOffset(Inst, Operands)) { 3826 return false; 3827 } 3828 if (!validateSMEMOffset(Inst, Operands)) { 3829 return false; 3830 } 3831 if (!validateMAIAccWrite(Inst)) { 3832 return false; 3833 } 3834 if (!validateDivScale(Inst)) { 3835 return false; 3836 } 3837 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3838 return false; 3839 } 3840 3841 return true; 3842 } 3843 3844 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3845 const FeatureBitset &FBS, 3846 unsigned VariantID = 0); 3847 3848 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3849 const FeatureBitset &AvailableFeatures, 3850 unsigned VariantID); 3851 3852 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3853 const FeatureBitset &FBS) { 3854 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3855 } 3856 3857 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3858 const FeatureBitset &FBS, 3859 ArrayRef<unsigned> Variants) { 3860 for (auto Variant : Variants) { 3861 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3862 return true; 3863 } 3864 3865 return false; 3866 } 3867 3868 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3869 const SMLoc &IDLoc) { 3870 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3871 3872 // Check if requested instruction variant is supported. 3873 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3874 return false; 3875 3876 // This instruction is not supported. 3877 // Clear any other pending errors because they are no longer relevant. 3878 getParser().clearPendingErrors(); 3879 3880 // Requested instruction variant is not supported. 3881 // Check if any other variants are supported. 3882 StringRef VariantName = getMatchedVariantName(); 3883 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3884 return Error(IDLoc, 3885 Twine(VariantName, 3886 " variant of this instruction is not supported")); 3887 } 3888 3889 // Finally check if this instruction is supported on any other GPU. 3890 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3891 return Error(IDLoc, "instruction not supported on this GPU"); 3892 } 3893 3894 // Instruction not supported on any GPU. Probably a typo. 3895 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3896 return Error(IDLoc, "invalid instruction" + Suggestion); 3897 } 3898 3899 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3900 OperandVector &Operands, 3901 MCStreamer &Out, 3902 uint64_t &ErrorInfo, 3903 bool MatchingInlineAsm) { 3904 MCInst Inst; 3905 unsigned Result = Match_Success; 3906 for (auto Variant : getMatchedVariants()) { 3907 uint64_t EI; 3908 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3909 Variant); 3910 // We order match statuses from least to most specific. We use most specific 3911 // status as resulting 3912 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3913 if ((R == Match_Success) || 3914 (R == Match_PreferE32) || 3915 (R == Match_MissingFeature && Result != Match_PreferE32) || 3916 (R == Match_InvalidOperand && Result != Match_MissingFeature 3917 && Result != Match_PreferE32) || 3918 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3919 && Result != Match_MissingFeature 3920 && Result != Match_PreferE32)) { 3921 Result = R; 3922 ErrorInfo = EI; 3923 } 3924 if (R == Match_Success) 3925 break; 3926 } 3927 3928 if (Result == Match_Success) { 3929 if (!validateInstruction(Inst, IDLoc, Operands)) { 3930 return true; 3931 } 3932 Inst.setLoc(IDLoc); 3933 Out.emitInstruction(Inst, getSTI()); 3934 return false; 3935 } 3936 3937 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 3938 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 3939 return true; 3940 } 3941 3942 switch (Result) { 3943 default: break; 3944 case Match_MissingFeature: 3945 // It has been verified that the specified instruction 3946 // mnemonic is valid. A match was found but it requires 3947 // features which are not supported on this GPU. 3948 return Error(IDLoc, "operands are not valid for this GPU or mode"); 3949 3950 case Match_InvalidOperand: { 3951 SMLoc ErrorLoc = IDLoc; 3952 if (ErrorInfo != ~0ULL) { 3953 if (ErrorInfo >= Operands.size()) { 3954 return Error(IDLoc, "too few operands for instruction"); 3955 } 3956 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3957 if (ErrorLoc == SMLoc()) 3958 ErrorLoc = IDLoc; 3959 } 3960 return Error(ErrorLoc, "invalid operand for instruction"); 3961 } 3962 3963 case Match_PreferE32: 3964 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3965 "should be encoded as e32"); 3966 case Match_MnemonicFail: 3967 llvm_unreachable("Invalid instructions should have been handled already"); 3968 } 3969 llvm_unreachable("Implement any new match types added!"); 3970 } 3971 3972 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3973 int64_t Tmp = -1; 3974 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3975 return true; 3976 } 3977 if (getParser().parseAbsoluteExpression(Tmp)) { 3978 return true; 3979 } 3980 Ret = static_cast<uint32_t>(Tmp); 3981 return false; 3982 } 3983 3984 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3985 uint32_t &Minor) { 3986 if (ParseAsAbsoluteExpression(Major)) 3987 return TokError("invalid major version"); 3988 3989 if (getLexer().isNot(AsmToken::Comma)) 3990 return TokError("minor version number required, comma expected"); 3991 Lex(); 3992 3993 if (ParseAsAbsoluteExpression(Minor)) 3994 return TokError("invalid minor version"); 3995 3996 return false; 3997 } 3998 3999 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4000 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4001 return TokError("directive only supported for amdgcn architecture"); 4002 4003 std::string Target; 4004 4005 SMLoc TargetStart = getTok().getLoc(); 4006 if (getParser().parseEscapedString(Target)) 4007 return true; 4008 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4009 4010 std::string ExpectedTarget; 4011 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4012 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4013 4014 if (Target != ExpectedTargetOS.str()) 4015 return getParser().Error(TargetRange.Start, "target must match options", 4016 TargetRange); 4017 4018 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4019 return false; 4020 } 4021 4022 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4023 return getParser().Error(Range.Start, "value out of range", Range); 4024 } 4025 4026 bool AMDGPUAsmParser::calculateGPRBlocks( 4027 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4028 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4029 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4030 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4031 // TODO(scott.linder): These calculations are duplicated from 4032 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4033 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4034 4035 unsigned NumVGPRs = NextFreeVGPR; 4036 unsigned NumSGPRs = NextFreeSGPR; 4037 4038 if (Version.Major >= 10) 4039 NumSGPRs = 0; 4040 else { 4041 unsigned MaxAddressableNumSGPRs = 4042 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4043 4044 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4045 NumSGPRs > MaxAddressableNumSGPRs) 4046 return OutOfRangeError(SGPRRange); 4047 4048 NumSGPRs += 4049 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4050 4051 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4052 NumSGPRs > MaxAddressableNumSGPRs) 4053 return OutOfRangeError(SGPRRange); 4054 4055 if (Features.test(FeatureSGPRInitBug)) 4056 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4057 } 4058 4059 VGPRBlocks = 4060 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4061 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4062 4063 return false; 4064 } 4065 4066 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4067 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4068 return TokError("directive only supported for amdgcn architecture"); 4069 4070 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4071 return TokError("directive only supported for amdhsa OS"); 4072 4073 StringRef KernelName; 4074 if (getParser().parseIdentifier(KernelName)) 4075 return true; 4076 4077 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4078 4079 StringSet<> Seen; 4080 4081 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4082 4083 SMRange VGPRRange; 4084 uint64_t NextFreeVGPR = 0; 4085 SMRange SGPRRange; 4086 uint64_t NextFreeSGPR = 0; 4087 unsigned UserSGPRCount = 0; 4088 bool ReserveVCC = true; 4089 bool ReserveFlatScr = true; 4090 bool ReserveXNACK = hasXNACK(); 4091 Optional<bool> EnableWavefrontSize32; 4092 4093 while (true) { 4094 while (getLexer().is(AsmToken::EndOfStatement)) 4095 Lex(); 4096 4097 if (getLexer().isNot(AsmToken::Identifier)) 4098 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4099 4100 StringRef ID = getTok().getIdentifier(); 4101 SMRange IDRange = getTok().getLocRange(); 4102 Lex(); 4103 4104 if (ID == ".end_amdhsa_kernel") 4105 break; 4106 4107 if (Seen.find(ID) != Seen.end()) 4108 return TokError(".amdhsa_ directives cannot be repeated"); 4109 Seen.insert(ID); 4110 4111 SMLoc ValStart = getTok().getLoc(); 4112 int64_t IVal; 4113 if (getParser().parseAbsoluteExpression(IVal)) 4114 return true; 4115 SMLoc ValEnd = getTok().getLoc(); 4116 SMRange ValRange = SMRange(ValStart, ValEnd); 4117 4118 if (IVal < 0) 4119 return OutOfRangeError(ValRange); 4120 4121 uint64_t Val = IVal; 4122 4123 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4124 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4125 return OutOfRangeError(RANGE); \ 4126 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4127 4128 if (ID == ".amdhsa_group_segment_fixed_size") { 4129 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4130 return OutOfRangeError(ValRange); 4131 KD.group_segment_fixed_size = Val; 4132 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4133 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4134 return OutOfRangeError(ValRange); 4135 KD.private_segment_fixed_size = Val; 4136 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4137 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4138 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4139 Val, ValRange); 4140 if (Val) 4141 UserSGPRCount += 4; 4142 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4143 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4144 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4145 ValRange); 4146 if (Val) 4147 UserSGPRCount += 2; 4148 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4149 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4150 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4151 ValRange); 4152 if (Val) 4153 UserSGPRCount += 2; 4154 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4155 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4156 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4157 Val, ValRange); 4158 if (Val) 4159 UserSGPRCount += 2; 4160 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4161 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4162 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4163 ValRange); 4164 if (Val) 4165 UserSGPRCount += 2; 4166 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4167 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4168 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4169 ValRange); 4170 if (Val) 4171 UserSGPRCount += 2; 4172 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4173 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4174 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4175 Val, ValRange); 4176 if (Val) 4177 UserSGPRCount += 1; 4178 } else if (ID == ".amdhsa_wavefront_size32") { 4179 if (IVersion.Major < 10) 4180 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4181 IDRange); 4182 EnableWavefrontSize32 = Val; 4183 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4184 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4185 Val, ValRange); 4186 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4187 PARSE_BITS_ENTRY( 4188 KD.compute_pgm_rsrc2, 4189 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4190 ValRange); 4191 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4192 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4193 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4194 ValRange); 4195 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4196 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4197 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4198 ValRange); 4199 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4200 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4201 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4202 ValRange); 4203 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4204 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4205 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4206 ValRange); 4207 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4208 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4209 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4210 ValRange); 4211 } else if (ID == ".amdhsa_next_free_vgpr") { 4212 VGPRRange = ValRange; 4213 NextFreeVGPR = Val; 4214 } else if (ID == ".amdhsa_next_free_sgpr") { 4215 SGPRRange = ValRange; 4216 NextFreeSGPR = Val; 4217 } else if (ID == ".amdhsa_reserve_vcc") { 4218 if (!isUInt<1>(Val)) 4219 return OutOfRangeError(ValRange); 4220 ReserveVCC = Val; 4221 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4222 if (IVersion.Major < 7) 4223 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4224 IDRange); 4225 if (!isUInt<1>(Val)) 4226 return OutOfRangeError(ValRange); 4227 ReserveFlatScr = Val; 4228 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4229 if (IVersion.Major < 8) 4230 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4231 IDRange); 4232 if (!isUInt<1>(Val)) 4233 return OutOfRangeError(ValRange); 4234 ReserveXNACK = Val; 4235 } else if (ID == ".amdhsa_float_round_mode_32") { 4236 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4237 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4238 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4239 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4240 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4241 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4242 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4243 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4244 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4245 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4246 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4247 ValRange); 4248 } else if (ID == ".amdhsa_dx10_clamp") { 4249 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4250 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4251 } else if (ID == ".amdhsa_ieee_mode") { 4252 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4253 Val, ValRange); 4254 } else if (ID == ".amdhsa_fp16_overflow") { 4255 if (IVersion.Major < 9) 4256 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4257 IDRange); 4258 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4259 ValRange); 4260 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4261 if (IVersion.Major < 10) 4262 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4263 IDRange); 4264 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4265 ValRange); 4266 } else if (ID == ".amdhsa_memory_ordered") { 4267 if (IVersion.Major < 10) 4268 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4269 IDRange); 4270 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4271 ValRange); 4272 } else if (ID == ".amdhsa_forward_progress") { 4273 if (IVersion.Major < 10) 4274 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4275 IDRange); 4276 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4277 ValRange); 4278 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4279 PARSE_BITS_ENTRY( 4280 KD.compute_pgm_rsrc2, 4281 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4282 ValRange); 4283 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4284 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4285 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4286 Val, ValRange); 4287 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4288 PARSE_BITS_ENTRY( 4289 KD.compute_pgm_rsrc2, 4290 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4291 ValRange); 4292 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4293 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4294 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4295 Val, ValRange); 4296 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4298 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4299 Val, ValRange); 4300 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4301 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4302 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4303 Val, ValRange); 4304 } else if (ID == ".amdhsa_exception_int_div_zero") { 4305 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4306 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4307 Val, ValRange); 4308 } else { 4309 return getParser().Error(IDRange.Start, 4310 "unknown .amdhsa_kernel directive", IDRange); 4311 } 4312 4313 #undef PARSE_BITS_ENTRY 4314 } 4315 4316 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4317 return TokError(".amdhsa_next_free_vgpr directive is required"); 4318 4319 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4320 return TokError(".amdhsa_next_free_sgpr directive is required"); 4321 4322 unsigned VGPRBlocks; 4323 unsigned SGPRBlocks; 4324 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4325 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4326 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4327 SGPRBlocks)) 4328 return true; 4329 4330 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4331 VGPRBlocks)) 4332 return OutOfRangeError(VGPRRange); 4333 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4334 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4335 4336 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4337 SGPRBlocks)) 4338 return OutOfRangeError(SGPRRange); 4339 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4340 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4341 SGPRBlocks); 4342 4343 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4344 return TokError("too many user SGPRs enabled"); 4345 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4346 UserSGPRCount); 4347 4348 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4349 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4350 ReserveFlatScr, ReserveXNACK); 4351 return false; 4352 } 4353 4354 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4355 uint32_t Major; 4356 uint32_t Minor; 4357 4358 if (ParseDirectiveMajorMinor(Major, Minor)) 4359 return true; 4360 4361 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4362 return false; 4363 } 4364 4365 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4366 uint32_t Major; 4367 uint32_t Minor; 4368 uint32_t Stepping; 4369 StringRef VendorName; 4370 StringRef ArchName; 4371 4372 // If this directive has no arguments, then use the ISA version for the 4373 // targeted GPU. 4374 if (getLexer().is(AsmToken::EndOfStatement)) { 4375 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4376 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4377 ISA.Stepping, 4378 "AMD", "AMDGPU"); 4379 return false; 4380 } 4381 4382 if (ParseDirectiveMajorMinor(Major, Minor)) 4383 return true; 4384 4385 if (getLexer().isNot(AsmToken::Comma)) 4386 return TokError("stepping version number required, comma expected"); 4387 Lex(); 4388 4389 if (ParseAsAbsoluteExpression(Stepping)) 4390 return TokError("invalid stepping version"); 4391 4392 if (getLexer().isNot(AsmToken::Comma)) 4393 return TokError("vendor name required, comma expected"); 4394 Lex(); 4395 4396 if (getLexer().isNot(AsmToken::String)) 4397 return TokError("invalid vendor name"); 4398 4399 VendorName = getLexer().getTok().getStringContents(); 4400 Lex(); 4401 4402 if (getLexer().isNot(AsmToken::Comma)) 4403 return TokError("arch name required, comma expected"); 4404 Lex(); 4405 4406 if (getLexer().isNot(AsmToken::String)) 4407 return TokError("invalid arch name"); 4408 4409 ArchName = getLexer().getTok().getStringContents(); 4410 Lex(); 4411 4412 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4413 VendorName, ArchName); 4414 return false; 4415 } 4416 4417 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4418 amd_kernel_code_t &Header) { 4419 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4420 // assembly for backwards compatibility. 4421 if (ID == "max_scratch_backing_memory_byte_size") { 4422 Parser.eatToEndOfStatement(); 4423 return false; 4424 } 4425 4426 SmallString<40> ErrStr; 4427 raw_svector_ostream Err(ErrStr); 4428 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4429 return TokError(Err.str()); 4430 } 4431 Lex(); 4432 4433 if (ID == "enable_wavefront_size32") { 4434 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4435 if (!isGFX10()) 4436 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4437 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4438 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4439 } else { 4440 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4441 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4442 } 4443 } 4444 4445 if (ID == "wavefront_size") { 4446 if (Header.wavefront_size == 5) { 4447 if (!isGFX10()) 4448 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4449 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4450 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4451 } else if (Header.wavefront_size == 6) { 4452 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4453 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4454 } 4455 } 4456 4457 if (ID == "enable_wgp_mode") { 4458 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4459 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4460 } 4461 4462 if (ID == "enable_mem_ordered") { 4463 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4464 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4465 } 4466 4467 if (ID == "enable_fwd_progress") { 4468 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4469 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4470 } 4471 4472 return false; 4473 } 4474 4475 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4476 amd_kernel_code_t Header; 4477 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4478 4479 while (true) { 4480 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4481 // will set the current token to EndOfStatement. 4482 while(getLexer().is(AsmToken::EndOfStatement)) 4483 Lex(); 4484 4485 if (getLexer().isNot(AsmToken::Identifier)) 4486 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4487 4488 StringRef ID = getLexer().getTok().getIdentifier(); 4489 Lex(); 4490 4491 if (ID == ".end_amd_kernel_code_t") 4492 break; 4493 4494 if (ParseAMDKernelCodeTValue(ID, Header)) 4495 return true; 4496 } 4497 4498 getTargetStreamer().EmitAMDKernelCodeT(Header); 4499 4500 return false; 4501 } 4502 4503 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4504 if (getLexer().isNot(AsmToken::Identifier)) 4505 return TokError("expected symbol name"); 4506 4507 StringRef KernelName = Parser.getTok().getString(); 4508 4509 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4510 ELF::STT_AMDGPU_HSA_KERNEL); 4511 Lex(); 4512 4513 KernelScope.initialize(getContext()); 4514 return false; 4515 } 4516 4517 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4518 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4519 return Error(getParser().getTok().getLoc(), 4520 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4521 "architectures"); 4522 } 4523 4524 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4525 4526 std::string ISAVersionStringFromSTI; 4527 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4528 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4529 4530 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4531 return Error(getParser().getTok().getLoc(), 4532 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4533 "arguments specified through the command line"); 4534 } 4535 4536 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4537 Lex(); 4538 4539 return false; 4540 } 4541 4542 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4543 const char *AssemblerDirectiveBegin; 4544 const char *AssemblerDirectiveEnd; 4545 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4546 isHsaAbiVersion3(&getSTI()) 4547 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4548 HSAMD::V3::AssemblerDirectiveEnd) 4549 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4550 HSAMD::AssemblerDirectiveEnd); 4551 4552 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4553 return Error(getParser().getTok().getLoc(), 4554 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4555 "not available on non-amdhsa OSes")).str()); 4556 } 4557 4558 std::string HSAMetadataString; 4559 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4560 HSAMetadataString)) 4561 return true; 4562 4563 if (isHsaAbiVersion3(&getSTI())) { 4564 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4565 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4566 } else { 4567 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4568 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4569 } 4570 4571 return false; 4572 } 4573 4574 /// Common code to parse out a block of text (typically YAML) between start and 4575 /// end directives. 4576 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4577 const char *AssemblerDirectiveEnd, 4578 std::string &CollectString) { 4579 4580 raw_string_ostream CollectStream(CollectString); 4581 4582 getLexer().setSkipSpace(false); 4583 4584 bool FoundEnd = false; 4585 while (!getLexer().is(AsmToken::Eof)) { 4586 while (getLexer().is(AsmToken::Space)) { 4587 CollectStream << getLexer().getTok().getString(); 4588 Lex(); 4589 } 4590 4591 if (getLexer().is(AsmToken::Identifier)) { 4592 StringRef ID = getLexer().getTok().getIdentifier(); 4593 if (ID == AssemblerDirectiveEnd) { 4594 Lex(); 4595 FoundEnd = true; 4596 break; 4597 } 4598 } 4599 4600 CollectStream << Parser.parseStringToEndOfStatement() 4601 << getContext().getAsmInfo()->getSeparatorString(); 4602 4603 Parser.eatToEndOfStatement(); 4604 } 4605 4606 getLexer().setSkipSpace(true); 4607 4608 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4609 return TokError(Twine("expected directive ") + 4610 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4611 } 4612 4613 CollectStream.flush(); 4614 return false; 4615 } 4616 4617 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4618 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4619 std::string String; 4620 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4621 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4622 return true; 4623 4624 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4625 if (!PALMetadata->setFromString(String)) 4626 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4627 return false; 4628 } 4629 4630 /// Parse the assembler directive for old linear-format PAL metadata. 4631 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4632 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4633 return Error(getParser().getTok().getLoc(), 4634 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4635 "not available on non-amdpal OSes")).str()); 4636 } 4637 4638 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4639 PALMetadata->setLegacy(); 4640 for (;;) { 4641 uint32_t Key, Value; 4642 if (ParseAsAbsoluteExpression(Key)) { 4643 return TokError(Twine("invalid value in ") + 4644 Twine(PALMD::AssemblerDirective)); 4645 } 4646 if (getLexer().isNot(AsmToken::Comma)) { 4647 return TokError(Twine("expected an even number of values in ") + 4648 Twine(PALMD::AssemblerDirective)); 4649 } 4650 Lex(); 4651 if (ParseAsAbsoluteExpression(Value)) { 4652 return TokError(Twine("invalid value in ") + 4653 Twine(PALMD::AssemblerDirective)); 4654 } 4655 PALMetadata->setRegister(Key, Value); 4656 if (getLexer().isNot(AsmToken::Comma)) 4657 break; 4658 Lex(); 4659 } 4660 return false; 4661 } 4662 4663 /// ParseDirectiveAMDGPULDS 4664 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4665 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4666 if (getParser().checkForValidSection()) 4667 return true; 4668 4669 StringRef Name; 4670 SMLoc NameLoc = getLexer().getLoc(); 4671 if (getParser().parseIdentifier(Name)) 4672 return TokError("expected identifier in directive"); 4673 4674 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4675 if (parseToken(AsmToken::Comma, "expected ','")) 4676 return true; 4677 4678 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4679 4680 int64_t Size; 4681 SMLoc SizeLoc = getLexer().getLoc(); 4682 if (getParser().parseAbsoluteExpression(Size)) 4683 return true; 4684 if (Size < 0) 4685 return Error(SizeLoc, "size must be non-negative"); 4686 if (Size > LocalMemorySize) 4687 return Error(SizeLoc, "size is too large"); 4688 4689 int64_t Alignment = 4; 4690 if (getLexer().is(AsmToken::Comma)) { 4691 Lex(); 4692 SMLoc AlignLoc = getLexer().getLoc(); 4693 if (getParser().parseAbsoluteExpression(Alignment)) 4694 return true; 4695 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4696 return Error(AlignLoc, "alignment must be a power of two"); 4697 4698 // Alignment larger than the size of LDS is possible in theory, as long 4699 // as the linker manages to place to symbol at address 0, but we do want 4700 // to make sure the alignment fits nicely into a 32-bit integer. 4701 if (Alignment >= 1u << 31) 4702 return Error(AlignLoc, "alignment is too large"); 4703 } 4704 4705 if (parseToken(AsmToken::EndOfStatement, 4706 "unexpected token in '.amdgpu_lds' directive")) 4707 return true; 4708 4709 Symbol->redefineIfPossible(); 4710 if (!Symbol->isUndefined()) 4711 return Error(NameLoc, "invalid symbol redefinition"); 4712 4713 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4714 return false; 4715 } 4716 4717 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4718 StringRef IDVal = DirectiveID.getString(); 4719 4720 if (isHsaAbiVersion3(&getSTI())) { 4721 if (IDVal == ".amdgcn_target") 4722 return ParseDirectiveAMDGCNTarget(); 4723 4724 if (IDVal == ".amdhsa_kernel") 4725 return ParseDirectiveAMDHSAKernel(); 4726 4727 // TODO: Restructure/combine with PAL metadata directive. 4728 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4729 return ParseDirectiveHSAMetadata(); 4730 } else { 4731 if (IDVal == ".hsa_code_object_version") 4732 return ParseDirectiveHSACodeObjectVersion(); 4733 4734 if (IDVal == ".hsa_code_object_isa") 4735 return ParseDirectiveHSACodeObjectISA(); 4736 4737 if (IDVal == ".amd_kernel_code_t") 4738 return ParseDirectiveAMDKernelCodeT(); 4739 4740 if (IDVal == ".amdgpu_hsa_kernel") 4741 return ParseDirectiveAMDGPUHsaKernel(); 4742 4743 if (IDVal == ".amd_amdgpu_isa") 4744 return ParseDirectiveISAVersion(); 4745 4746 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4747 return ParseDirectiveHSAMetadata(); 4748 } 4749 4750 if (IDVal == ".amdgpu_lds") 4751 return ParseDirectiveAMDGPULDS(); 4752 4753 if (IDVal == PALMD::AssemblerDirectiveBegin) 4754 return ParseDirectivePALMetadataBegin(); 4755 4756 if (IDVal == PALMD::AssemblerDirective) 4757 return ParseDirectivePALMetadata(); 4758 4759 return true; 4760 } 4761 4762 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4763 unsigned RegNo) const { 4764 4765 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4766 R.isValid(); ++R) { 4767 if (*R == RegNo) 4768 return isGFX9Plus(); 4769 } 4770 4771 // GFX10 has 2 more SGPRs 104 and 105. 4772 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4773 R.isValid(); ++R) { 4774 if (*R == RegNo) 4775 return hasSGPR104_SGPR105(); 4776 } 4777 4778 switch (RegNo) { 4779 case AMDGPU::SRC_SHARED_BASE: 4780 case AMDGPU::SRC_SHARED_LIMIT: 4781 case AMDGPU::SRC_PRIVATE_BASE: 4782 case AMDGPU::SRC_PRIVATE_LIMIT: 4783 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4784 return !isCI() && !isSI() && !isVI(); 4785 case AMDGPU::TBA: 4786 case AMDGPU::TBA_LO: 4787 case AMDGPU::TBA_HI: 4788 case AMDGPU::TMA: 4789 case AMDGPU::TMA_LO: 4790 case AMDGPU::TMA_HI: 4791 return !isGFX9() && !isGFX10(); 4792 case AMDGPU::XNACK_MASK: 4793 case AMDGPU::XNACK_MASK_LO: 4794 case AMDGPU::XNACK_MASK_HI: 4795 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4796 case AMDGPU::SGPR_NULL: 4797 return isGFX10(); 4798 default: 4799 break; 4800 } 4801 4802 if (isCI()) 4803 return true; 4804 4805 if (isSI() || isGFX10()) { 4806 // No flat_scr on SI. 4807 // On GFX10 flat scratch is not a valid register operand and can only be 4808 // accessed with s_setreg/s_getreg. 4809 switch (RegNo) { 4810 case AMDGPU::FLAT_SCR: 4811 case AMDGPU::FLAT_SCR_LO: 4812 case AMDGPU::FLAT_SCR_HI: 4813 return false; 4814 default: 4815 return true; 4816 } 4817 } 4818 4819 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4820 // SI/CI have. 4821 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4822 R.isValid(); ++R) { 4823 if (*R == RegNo) 4824 return hasSGPR102_SGPR103(); 4825 } 4826 4827 return true; 4828 } 4829 4830 OperandMatchResultTy 4831 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4832 OperandMode Mode) { 4833 // Try to parse with a custom parser 4834 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4835 4836 // If we successfully parsed the operand or if there as an error parsing, 4837 // we are done. 4838 // 4839 // If we are parsing after we reach EndOfStatement then this means we 4840 // are appending default values to the Operands list. This is only done 4841 // by custom parser, so we shouldn't continue on to the generic parsing. 4842 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4843 getLexer().is(AsmToken::EndOfStatement)) 4844 return ResTy; 4845 4846 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4847 unsigned Prefix = Operands.size(); 4848 SMLoc LBraceLoc = getTok().getLoc(); 4849 Parser.Lex(); // eat the '[' 4850 4851 for (;;) { 4852 ResTy = parseReg(Operands); 4853 if (ResTy != MatchOperand_Success) 4854 return ResTy; 4855 4856 if (getLexer().is(AsmToken::RBrac)) 4857 break; 4858 4859 if (getLexer().isNot(AsmToken::Comma)) 4860 return MatchOperand_ParseFail; 4861 Parser.Lex(); 4862 } 4863 4864 if (Operands.size() - Prefix > 1) { 4865 Operands.insert(Operands.begin() + Prefix, 4866 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4867 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4868 getTok().getLoc())); 4869 } 4870 4871 Parser.Lex(); // eat the ']' 4872 return MatchOperand_Success; 4873 } 4874 4875 return parseRegOrImm(Operands); 4876 } 4877 4878 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4879 // Clear any forced encodings from the previous instruction. 4880 setForcedEncodingSize(0); 4881 setForcedDPP(false); 4882 setForcedSDWA(false); 4883 4884 if (Name.endswith("_e64")) { 4885 setForcedEncodingSize(64); 4886 return Name.substr(0, Name.size() - 4); 4887 } else if (Name.endswith("_e32")) { 4888 setForcedEncodingSize(32); 4889 return Name.substr(0, Name.size() - 4); 4890 } else if (Name.endswith("_dpp")) { 4891 setForcedDPP(true); 4892 return Name.substr(0, Name.size() - 4); 4893 } else if (Name.endswith("_sdwa")) { 4894 setForcedSDWA(true); 4895 return Name.substr(0, Name.size() - 5); 4896 } 4897 return Name; 4898 } 4899 4900 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4901 StringRef Name, 4902 SMLoc NameLoc, OperandVector &Operands) { 4903 // Add the instruction mnemonic 4904 Name = parseMnemonicSuffix(Name); 4905 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4906 4907 bool IsMIMG = Name.startswith("image_"); 4908 4909 while (!getLexer().is(AsmToken::EndOfStatement)) { 4910 OperandMode Mode = OperandMode_Default; 4911 if (IsMIMG && isGFX10() && Operands.size() == 2) 4912 Mode = OperandMode_NSA; 4913 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4914 4915 // Eat the comma or space if there is one. 4916 if (getLexer().is(AsmToken::Comma)) 4917 Parser.Lex(); 4918 4919 if (Res != MatchOperand_Success) { 4920 checkUnsupportedInstruction(Name, NameLoc); 4921 if (!Parser.hasPendingError()) { 4922 // FIXME: use real operand location rather than the current location. 4923 StringRef Msg = 4924 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4925 "not a valid operand."; 4926 Error(getLexer().getLoc(), Msg); 4927 } 4928 while (!getLexer().is(AsmToken::EndOfStatement)) { 4929 Parser.Lex(); 4930 } 4931 return true; 4932 } 4933 } 4934 4935 return false; 4936 } 4937 4938 //===----------------------------------------------------------------------===// 4939 // Utility functions 4940 //===----------------------------------------------------------------------===// 4941 4942 OperandMatchResultTy 4943 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4944 4945 if (!trySkipId(Prefix, AsmToken::Colon)) 4946 return MatchOperand_NoMatch; 4947 4948 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4949 } 4950 4951 OperandMatchResultTy 4952 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4953 AMDGPUOperand::ImmTy ImmTy, 4954 bool (*ConvertResult)(int64_t&)) { 4955 SMLoc S = getLoc(); 4956 int64_t Value = 0; 4957 4958 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4959 if (Res != MatchOperand_Success) 4960 return Res; 4961 4962 if (ConvertResult && !ConvertResult(Value)) { 4963 Error(S, "invalid " + StringRef(Prefix) + " value."); 4964 } 4965 4966 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4967 return MatchOperand_Success; 4968 } 4969 4970 OperandMatchResultTy 4971 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4972 OperandVector &Operands, 4973 AMDGPUOperand::ImmTy ImmTy, 4974 bool (*ConvertResult)(int64_t&)) { 4975 SMLoc S = getLoc(); 4976 if (!trySkipId(Prefix, AsmToken::Colon)) 4977 return MatchOperand_NoMatch; 4978 4979 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4980 return MatchOperand_ParseFail; 4981 4982 unsigned Val = 0; 4983 const unsigned MaxSize = 4; 4984 4985 // FIXME: How to verify the number of elements matches the number of src 4986 // operands? 4987 for (int I = 0; ; ++I) { 4988 int64_t Op; 4989 SMLoc Loc = getLoc(); 4990 if (!parseExpr(Op)) 4991 return MatchOperand_ParseFail; 4992 4993 if (Op != 0 && Op != 1) { 4994 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4995 return MatchOperand_ParseFail; 4996 } 4997 4998 Val |= (Op << I); 4999 5000 if (trySkipToken(AsmToken::RBrac)) 5001 break; 5002 5003 if (I + 1 == MaxSize) { 5004 Error(getLoc(), "expected a closing square bracket"); 5005 return MatchOperand_ParseFail; 5006 } 5007 5008 if (!skipToken(AsmToken::Comma, "expected a comma")) 5009 return MatchOperand_ParseFail; 5010 } 5011 5012 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5013 return MatchOperand_Success; 5014 } 5015 5016 OperandMatchResultTy 5017 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5018 AMDGPUOperand::ImmTy ImmTy) { 5019 int64_t Bit = 0; 5020 SMLoc S = Parser.getTok().getLoc(); 5021 5022 // We are at the end of the statement, and this is a default argument, so 5023 // use a default value. 5024 if (getLexer().isNot(AsmToken::EndOfStatement)) { 5025 switch(getLexer().getKind()) { 5026 case AsmToken::Identifier: { 5027 StringRef Tok = Parser.getTok().getString(); 5028 if (Tok == Name) { 5029 if (Tok == "r128" && !hasMIMG_R128()) 5030 Error(S, "r128 modifier is not supported on this GPU"); 5031 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5032 Error(S, "a16 modifier is not supported on this GPU"); 5033 Bit = 1; 5034 Parser.Lex(); 5035 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5036 Bit = 0; 5037 Parser.Lex(); 5038 } else { 5039 return MatchOperand_NoMatch; 5040 } 5041 break; 5042 } 5043 default: 5044 return MatchOperand_NoMatch; 5045 } 5046 } 5047 5048 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 5049 return MatchOperand_ParseFail; 5050 5051 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5052 ImmTy = AMDGPUOperand::ImmTyR128A16; 5053 5054 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5055 return MatchOperand_Success; 5056 } 5057 5058 static void addOptionalImmOperand( 5059 MCInst& Inst, const OperandVector& Operands, 5060 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5061 AMDGPUOperand::ImmTy ImmT, 5062 int64_t Default = 0) { 5063 auto i = OptionalIdx.find(ImmT); 5064 if (i != OptionalIdx.end()) { 5065 unsigned Idx = i->second; 5066 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5067 } else { 5068 Inst.addOperand(MCOperand::createImm(Default)); 5069 } 5070 } 5071 5072 OperandMatchResultTy 5073 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5074 if (getLexer().isNot(AsmToken::Identifier)) { 5075 return MatchOperand_NoMatch; 5076 } 5077 StringRef Tok = Parser.getTok().getString(); 5078 if (Tok != Prefix) { 5079 return MatchOperand_NoMatch; 5080 } 5081 5082 Parser.Lex(); 5083 if (getLexer().isNot(AsmToken::Colon)) { 5084 return MatchOperand_ParseFail; 5085 } 5086 5087 Parser.Lex(); 5088 if (getLexer().isNot(AsmToken::Identifier)) { 5089 return MatchOperand_ParseFail; 5090 } 5091 5092 Value = Parser.getTok().getString(); 5093 return MatchOperand_Success; 5094 } 5095 5096 //===----------------------------------------------------------------------===// 5097 // MTBUF format 5098 //===----------------------------------------------------------------------===// 5099 5100 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5101 int64_t MaxVal, 5102 int64_t &Fmt) { 5103 int64_t Val; 5104 SMLoc Loc = getLoc(); 5105 5106 auto Res = parseIntWithPrefix(Pref, Val); 5107 if (Res == MatchOperand_ParseFail) 5108 return false; 5109 if (Res == MatchOperand_NoMatch) 5110 return true; 5111 5112 if (Val < 0 || Val > MaxVal) { 5113 Error(Loc, Twine("out of range ", StringRef(Pref))); 5114 return false; 5115 } 5116 5117 Fmt = Val; 5118 return true; 5119 } 5120 5121 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5122 // values to live in a joint format operand in the MCInst encoding. 5123 OperandMatchResultTy 5124 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5125 using namespace llvm::AMDGPU::MTBUFFormat; 5126 5127 int64_t Dfmt = DFMT_UNDEF; 5128 int64_t Nfmt = NFMT_UNDEF; 5129 5130 // dfmt and nfmt can appear in either order, and each is optional. 5131 for (int I = 0; I < 2; ++I) { 5132 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5133 return MatchOperand_ParseFail; 5134 5135 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5136 return MatchOperand_ParseFail; 5137 } 5138 // Skip optional comma between dfmt/nfmt 5139 // but guard against 2 commas following each other. 5140 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5141 !peekToken().is(AsmToken::Comma)) { 5142 trySkipToken(AsmToken::Comma); 5143 } 5144 } 5145 5146 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5147 return MatchOperand_NoMatch; 5148 5149 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5150 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5151 5152 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5153 return MatchOperand_Success; 5154 } 5155 5156 OperandMatchResultTy 5157 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5158 using namespace llvm::AMDGPU::MTBUFFormat; 5159 5160 int64_t Fmt = UFMT_UNDEF; 5161 5162 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5163 return MatchOperand_ParseFail; 5164 5165 if (Fmt == UFMT_UNDEF) 5166 return MatchOperand_NoMatch; 5167 5168 Format = Fmt; 5169 return MatchOperand_Success; 5170 } 5171 5172 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5173 int64_t &Nfmt, 5174 StringRef FormatStr, 5175 SMLoc Loc) { 5176 using namespace llvm::AMDGPU::MTBUFFormat; 5177 int64_t Format; 5178 5179 Format = getDfmt(FormatStr); 5180 if (Format != DFMT_UNDEF) { 5181 Dfmt = Format; 5182 return true; 5183 } 5184 5185 Format = getNfmt(FormatStr, getSTI()); 5186 if (Format != NFMT_UNDEF) { 5187 Nfmt = Format; 5188 return true; 5189 } 5190 5191 Error(Loc, "unsupported format"); 5192 return false; 5193 } 5194 5195 OperandMatchResultTy 5196 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5197 SMLoc FormatLoc, 5198 int64_t &Format) { 5199 using namespace llvm::AMDGPU::MTBUFFormat; 5200 5201 int64_t Dfmt = DFMT_UNDEF; 5202 int64_t Nfmt = NFMT_UNDEF; 5203 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5204 return MatchOperand_ParseFail; 5205 5206 if (trySkipToken(AsmToken::Comma)) { 5207 StringRef Str; 5208 SMLoc Loc = getLoc(); 5209 if (!parseId(Str, "expected a format string") || 5210 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5211 return MatchOperand_ParseFail; 5212 } 5213 if (Dfmt == DFMT_UNDEF) { 5214 Error(Loc, "duplicate numeric format"); 5215 return MatchOperand_ParseFail; 5216 } else if (Nfmt == NFMT_UNDEF) { 5217 Error(Loc, "duplicate data format"); 5218 return MatchOperand_ParseFail; 5219 } 5220 } 5221 5222 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5223 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5224 5225 if (isGFX10()) { 5226 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5227 if (Ufmt == UFMT_UNDEF) { 5228 Error(FormatLoc, "unsupported format"); 5229 return MatchOperand_ParseFail; 5230 } 5231 Format = Ufmt; 5232 } else { 5233 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5234 } 5235 5236 return MatchOperand_Success; 5237 } 5238 5239 OperandMatchResultTy 5240 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5241 SMLoc Loc, 5242 int64_t &Format) { 5243 using namespace llvm::AMDGPU::MTBUFFormat; 5244 5245 auto Id = getUnifiedFormat(FormatStr); 5246 if (Id == UFMT_UNDEF) 5247 return MatchOperand_NoMatch; 5248 5249 if (!isGFX10()) { 5250 Error(Loc, "unified format is not supported on this GPU"); 5251 return MatchOperand_ParseFail; 5252 } 5253 5254 Format = Id; 5255 return MatchOperand_Success; 5256 } 5257 5258 OperandMatchResultTy 5259 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5260 using namespace llvm::AMDGPU::MTBUFFormat; 5261 SMLoc Loc = getLoc(); 5262 5263 if (!parseExpr(Format)) 5264 return MatchOperand_ParseFail; 5265 if (!isValidFormatEncoding(Format, getSTI())) { 5266 Error(Loc, "out of range format"); 5267 return MatchOperand_ParseFail; 5268 } 5269 5270 return MatchOperand_Success; 5271 } 5272 5273 OperandMatchResultTy 5274 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5275 using namespace llvm::AMDGPU::MTBUFFormat; 5276 5277 if (!trySkipId("format", AsmToken::Colon)) 5278 return MatchOperand_NoMatch; 5279 5280 if (trySkipToken(AsmToken::LBrac)) { 5281 StringRef FormatStr; 5282 SMLoc Loc = getLoc(); 5283 if (!parseId(FormatStr, "expected a format string")) 5284 return MatchOperand_ParseFail; 5285 5286 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5287 if (Res == MatchOperand_NoMatch) 5288 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5289 if (Res != MatchOperand_Success) 5290 return Res; 5291 5292 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5293 return MatchOperand_ParseFail; 5294 5295 return MatchOperand_Success; 5296 } 5297 5298 return parseNumericFormat(Format); 5299 } 5300 5301 OperandMatchResultTy 5302 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5303 using namespace llvm::AMDGPU::MTBUFFormat; 5304 5305 int64_t Format = getDefaultFormatEncoding(getSTI()); 5306 OperandMatchResultTy Res; 5307 SMLoc Loc = getLoc(); 5308 5309 // Parse legacy format syntax. 5310 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5311 if (Res == MatchOperand_ParseFail) 5312 return Res; 5313 5314 bool FormatFound = (Res == MatchOperand_Success); 5315 5316 Operands.push_back( 5317 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5318 5319 if (FormatFound) 5320 trySkipToken(AsmToken::Comma); 5321 5322 if (isToken(AsmToken::EndOfStatement)) { 5323 // We are expecting an soffset operand, 5324 // but let matcher handle the error. 5325 return MatchOperand_Success; 5326 } 5327 5328 // Parse soffset. 5329 Res = parseRegOrImm(Operands); 5330 if (Res != MatchOperand_Success) 5331 return Res; 5332 5333 trySkipToken(AsmToken::Comma); 5334 5335 if (!FormatFound) { 5336 Res = parseSymbolicOrNumericFormat(Format); 5337 if (Res == MatchOperand_ParseFail) 5338 return Res; 5339 if (Res == MatchOperand_Success) { 5340 auto Size = Operands.size(); 5341 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5342 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5343 Op.setImm(Format); 5344 } 5345 return MatchOperand_Success; 5346 } 5347 5348 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5349 Error(getLoc(), "duplicate format"); 5350 return MatchOperand_ParseFail; 5351 } 5352 return MatchOperand_Success; 5353 } 5354 5355 //===----------------------------------------------------------------------===// 5356 // ds 5357 //===----------------------------------------------------------------------===// 5358 5359 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5360 const OperandVector &Operands) { 5361 OptionalImmIndexMap OptionalIdx; 5362 5363 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5364 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5365 5366 // Add the register arguments 5367 if (Op.isReg()) { 5368 Op.addRegOperands(Inst, 1); 5369 continue; 5370 } 5371 5372 // Handle optional arguments 5373 OptionalIdx[Op.getImmTy()] = i; 5374 } 5375 5376 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5377 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5379 5380 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5381 } 5382 5383 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5384 bool IsGdsHardcoded) { 5385 OptionalImmIndexMap OptionalIdx; 5386 5387 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5388 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5389 5390 // Add the register arguments 5391 if (Op.isReg()) { 5392 Op.addRegOperands(Inst, 1); 5393 continue; 5394 } 5395 5396 if (Op.isToken() && Op.getToken() == "gds") { 5397 IsGdsHardcoded = true; 5398 continue; 5399 } 5400 5401 // Handle optional arguments 5402 OptionalIdx[Op.getImmTy()] = i; 5403 } 5404 5405 AMDGPUOperand::ImmTy OffsetType = 5406 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5407 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5408 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5409 AMDGPUOperand::ImmTyOffset; 5410 5411 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5412 5413 if (!IsGdsHardcoded) { 5414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5415 } 5416 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5417 } 5418 5419 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5420 OptionalImmIndexMap OptionalIdx; 5421 5422 unsigned OperandIdx[4]; 5423 unsigned EnMask = 0; 5424 int SrcIdx = 0; 5425 5426 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5427 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5428 5429 // Add the register arguments 5430 if (Op.isReg()) { 5431 assert(SrcIdx < 4); 5432 OperandIdx[SrcIdx] = Inst.size(); 5433 Op.addRegOperands(Inst, 1); 5434 ++SrcIdx; 5435 continue; 5436 } 5437 5438 if (Op.isOff()) { 5439 assert(SrcIdx < 4); 5440 OperandIdx[SrcIdx] = Inst.size(); 5441 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5442 ++SrcIdx; 5443 continue; 5444 } 5445 5446 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5447 Op.addImmOperands(Inst, 1); 5448 continue; 5449 } 5450 5451 if (Op.isToken() && Op.getToken() == "done") 5452 continue; 5453 5454 // Handle optional arguments 5455 OptionalIdx[Op.getImmTy()] = i; 5456 } 5457 5458 assert(SrcIdx == 4); 5459 5460 bool Compr = false; 5461 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5462 Compr = true; 5463 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5464 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5465 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5466 } 5467 5468 for (auto i = 0; i < SrcIdx; ++i) { 5469 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5470 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5471 } 5472 } 5473 5474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5475 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5476 5477 Inst.addOperand(MCOperand::createImm(EnMask)); 5478 } 5479 5480 //===----------------------------------------------------------------------===// 5481 // s_waitcnt 5482 //===----------------------------------------------------------------------===// 5483 5484 static bool 5485 encodeCnt( 5486 const AMDGPU::IsaVersion ISA, 5487 int64_t &IntVal, 5488 int64_t CntVal, 5489 bool Saturate, 5490 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5491 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5492 { 5493 bool Failed = false; 5494 5495 IntVal = encode(ISA, IntVal, CntVal); 5496 if (CntVal != decode(ISA, IntVal)) { 5497 if (Saturate) { 5498 IntVal = encode(ISA, IntVal, -1); 5499 } else { 5500 Failed = true; 5501 } 5502 } 5503 return Failed; 5504 } 5505 5506 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5507 5508 SMLoc CntLoc = getLoc(); 5509 StringRef CntName = getTokenStr(); 5510 5511 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5512 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5513 return false; 5514 5515 int64_t CntVal; 5516 SMLoc ValLoc = getLoc(); 5517 if (!parseExpr(CntVal)) 5518 return false; 5519 5520 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5521 5522 bool Failed = true; 5523 bool Sat = CntName.endswith("_sat"); 5524 5525 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5526 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5527 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5528 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5529 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5530 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5531 } else { 5532 Error(CntLoc, "invalid counter name " + CntName); 5533 return false; 5534 } 5535 5536 if (Failed) { 5537 Error(ValLoc, "too large value for " + CntName); 5538 return false; 5539 } 5540 5541 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5542 return false; 5543 5544 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5545 if (isToken(AsmToken::EndOfStatement)) { 5546 Error(getLoc(), "expected a counter name"); 5547 return false; 5548 } 5549 } 5550 5551 return true; 5552 } 5553 5554 OperandMatchResultTy 5555 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5556 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5557 int64_t Waitcnt = getWaitcntBitMask(ISA); 5558 SMLoc S = getLoc(); 5559 5560 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5561 while (!isToken(AsmToken::EndOfStatement)) { 5562 if (!parseCnt(Waitcnt)) 5563 return MatchOperand_ParseFail; 5564 } 5565 } else { 5566 if (!parseExpr(Waitcnt)) 5567 return MatchOperand_ParseFail; 5568 } 5569 5570 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5571 return MatchOperand_Success; 5572 } 5573 5574 bool 5575 AMDGPUOperand::isSWaitCnt() const { 5576 return isImm(); 5577 } 5578 5579 //===----------------------------------------------------------------------===// 5580 // hwreg 5581 //===----------------------------------------------------------------------===// 5582 5583 bool 5584 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5585 int64_t &Offset, 5586 int64_t &Width) { 5587 using namespace llvm::AMDGPU::Hwreg; 5588 5589 // The register may be specified by name or using a numeric code 5590 if (isToken(AsmToken::Identifier) && 5591 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5592 HwReg.IsSymbolic = true; 5593 lex(); // skip message name 5594 } else if (!parseExpr(HwReg.Id)) { 5595 return false; 5596 } 5597 5598 if (trySkipToken(AsmToken::RParen)) 5599 return true; 5600 5601 // parse optional params 5602 return 5603 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5604 parseExpr(Offset) && 5605 skipToken(AsmToken::Comma, "expected a comma") && 5606 parseExpr(Width) && 5607 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5608 } 5609 5610 bool 5611 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5612 const int64_t Offset, 5613 const int64_t Width, 5614 const SMLoc Loc) { 5615 5616 using namespace llvm::AMDGPU::Hwreg; 5617 5618 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5619 Error(Loc, "specified hardware register is not supported on this GPU"); 5620 return false; 5621 } else if (!isValidHwreg(HwReg.Id)) { 5622 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5623 return false; 5624 } else if (!isValidHwregOffset(Offset)) { 5625 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5626 return false; 5627 } else if (!isValidHwregWidth(Width)) { 5628 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5629 return false; 5630 } 5631 return true; 5632 } 5633 5634 OperandMatchResultTy 5635 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5636 using namespace llvm::AMDGPU::Hwreg; 5637 5638 int64_t ImmVal = 0; 5639 SMLoc Loc = getLoc(); 5640 5641 if (trySkipId("hwreg", AsmToken::LParen)) { 5642 OperandInfoTy HwReg(ID_UNKNOWN_); 5643 int64_t Offset = OFFSET_DEFAULT_; 5644 int64_t Width = WIDTH_DEFAULT_; 5645 if (parseHwregBody(HwReg, Offset, Width) && 5646 validateHwreg(HwReg, Offset, Width, Loc)) { 5647 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5648 } else { 5649 return MatchOperand_ParseFail; 5650 } 5651 } else if (parseExpr(ImmVal)) { 5652 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5653 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5654 return MatchOperand_ParseFail; 5655 } 5656 } else { 5657 return MatchOperand_ParseFail; 5658 } 5659 5660 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5661 return MatchOperand_Success; 5662 } 5663 5664 bool AMDGPUOperand::isHwreg() const { 5665 return isImmTy(ImmTyHwreg); 5666 } 5667 5668 //===----------------------------------------------------------------------===// 5669 // sendmsg 5670 //===----------------------------------------------------------------------===// 5671 5672 bool 5673 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5674 OperandInfoTy &Op, 5675 OperandInfoTy &Stream) { 5676 using namespace llvm::AMDGPU::SendMsg; 5677 5678 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5679 Msg.IsSymbolic = true; 5680 lex(); // skip message name 5681 } else if (!parseExpr(Msg.Id)) { 5682 return false; 5683 } 5684 5685 if (trySkipToken(AsmToken::Comma)) { 5686 Op.IsDefined = true; 5687 if (isToken(AsmToken::Identifier) && 5688 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5689 lex(); // skip operation name 5690 } else if (!parseExpr(Op.Id)) { 5691 return false; 5692 } 5693 5694 if (trySkipToken(AsmToken::Comma)) { 5695 Stream.IsDefined = true; 5696 if (!parseExpr(Stream.Id)) 5697 return false; 5698 } 5699 } 5700 5701 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5702 } 5703 5704 bool 5705 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5706 const OperandInfoTy &Op, 5707 const OperandInfoTy &Stream, 5708 const SMLoc S) { 5709 using namespace llvm::AMDGPU::SendMsg; 5710 5711 // Validation strictness depends on whether message is specified 5712 // in a symbolc or in a numeric form. In the latter case 5713 // only encoding possibility is checked. 5714 bool Strict = Msg.IsSymbolic; 5715 5716 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5717 Error(S, "invalid message id"); 5718 return false; 5719 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5720 Error(S, Op.IsDefined ? 5721 "message does not support operations" : 5722 "missing message operation"); 5723 return false; 5724 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5725 Error(S, "invalid operation id"); 5726 return false; 5727 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5728 Error(S, "message operation does not support streams"); 5729 return false; 5730 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5731 Error(S, "invalid message stream id"); 5732 return false; 5733 } 5734 return true; 5735 } 5736 5737 OperandMatchResultTy 5738 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5739 using namespace llvm::AMDGPU::SendMsg; 5740 5741 int64_t ImmVal = 0; 5742 SMLoc Loc = getLoc(); 5743 5744 if (trySkipId("sendmsg", AsmToken::LParen)) { 5745 OperandInfoTy Msg(ID_UNKNOWN_); 5746 OperandInfoTy Op(OP_NONE_); 5747 OperandInfoTy Stream(STREAM_ID_NONE_); 5748 if (parseSendMsgBody(Msg, Op, Stream) && 5749 validateSendMsg(Msg, Op, Stream, Loc)) { 5750 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5751 } else { 5752 return MatchOperand_ParseFail; 5753 } 5754 } else if (parseExpr(ImmVal)) { 5755 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5756 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5757 return MatchOperand_ParseFail; 5758 } 5759 } else { 5760 return MatchOperand_ParseFail; 5761 } 5762 5763 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5764 return MatchOperand_Success; 5765 } 5766 5767 bool AMDGPUOperand::isSendMsg() const { 5768 return isImmTy(ImmTySendMsg); 5769 } 5770 5771 //===----------------------------------------------------------------------===// 5772 // v_interp 5773 //===----------------------------------------------------------------------===// 5774 5775 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5776 if (getLexer().getKind() != AsmToken::Identifier) 5777 return MatchOperand_NoMatch; 5778 5779 StringRef Str = Parser.getTok().getString(); 5780 int Slot = StringSwitch<int>(Str) 5781 .Case("p10", 0) 5782 .Case("p20", 1) 5783 .Case("p0", 2) 5784 .Default(-1); 5785 5786 SMLoc S = Parser.getTok().getLoc(); 5787 if (Slot == -1) 5788 return MatchOperand_ParseFail; 5789 5790 Parser.Lex(); 5791 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5792 AMDGPUOperand::ImmTyInterpSlot)); 5793 return MatchOperand_Success; 5794 } 5795 5796 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5797 if (getLexer().getKind() != AsmToken::Identifier) 5798 return MatchOperand_NoMatch; 5799 5800 StringRef Str = Parser.getTok().getString(); 5801 if (!Str.startswith("attr")) 5802 return MatchOperand_NoMatch; 5803 5804 StringRef Chan = Str.take_back(2); 5805 int AttrChan = StringSwitch<int>(Chan) 5806 .Case(".x", 0) 5807 .Case(".y", 1) 5808 .Case(".z", 2) 5809 .Case(".w", 3) 5810 .Default(-1); 5811 if (AttrChan == -1) 5812 return MatchOperand_ParseFail; 5813 5814 Str = Str.drop_back(2).drop_front(4); 5815 5816 uint8_t Attr; 5817 if (Str.getAsInteger(10, Attr)) 5818 return MatchOperand_ParseFail; 5819 5820 SMLoc S = Parser.getTok().getLoc(); 5821 Parser.Lex(); 5822 if (Attr > 63) { 5823 Error(S, "out of bounds attr"); 5824 return MatchOperand_ParseFail; 5825 } 5826 5827 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5828 5829 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5830 AMDGPUOperand::ImmTyInterpAttr)); 5831 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5832 AMDGPUOperand::ImmTyAttrChan)); 5833 return MatchOperand_Success; 5834 } 5835 5836 //===----------------------------------------------------------------------===// 5837 // exp 5838 //===----------------------------------------------------------------------===// 5839 5840 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5841 uint8_t &Val) { 5842 if (Str == "null") { 5843 Val = 9; 5844 return MatchOperand_Success; 5845 } 5846 5847 if (Str.startswith("mrt")) { 5848 Str = Str.drop_front(3); 5849 if (Str == "z") { // == mrtz 5850 Val = 8; 5851 return MatchOperand_Success; 5852 } 5853 5854 if (Str.getAsInteger(10, Val)) 5855 return MatchOperand_ParseFail; 5856 5857 if (Val > 7) 5858 return MatchOperand_ParseFail; 5859 5860 return MatchOperand_Success; 5861 } 5862 5863 if (Str.startswith("pos")) { 5864 Str = Str.drop_front(3); 5865 if (Str.getAsInteger(10, Val)) 5866 return MatchOperand_ParseFail; 5867 5868 if (Val > 4 || (Val == 4 && !isGFX10())) 5869 return MatchOperand_ParseFail; 5870 5871 Val += 12; 5872 return MatchOperand_Success; 5873 } 5874 5875 if (isGFX10() && Str == "prim") { 5876 Val = 20; 5877 return MatchOperand_Success; 5878 } 5879 5880 if (Str.startswith("param")) { 5881 Str = Str.drop_front(5); 5882 if (Str.getAsInteger(10, Val)) 5883 return MatchOperand_ParseFail; 5884 5885 if (Val >= 32) 5886 return MatchOperand_ParseFail; 5887 5888 Val += 32; 5889 return MatchOperand_Success; 5890 } 5891 5892 return MatchOperand_ParseFail; 5893 } 5894 5895 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5896 if (!isToken(AsmToken::Identifier)) 5897 return MatchOperand_NoMatch; 5898 5899 SMLoc S = getLoc(); 5900 5901 uint8_t Val; 5902 auto Res = parseExpTgtImpl(getTokenStr(), Val); 5903 if (Res != MatchOperand_Success) { 5904 Error(S, "invalid exp target"); 5905 return Res; 5906 } 5907 5908 Parser.Lex(); 5909 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5910 AMDGPUOperand::ImmTyExpTgt)); 5911 return MatchOperand_Success; 5912 } 5913 5914 //===----------------------------------------------------------------------===// 5915 // parser helpers 5916 //===----------------------------------------------------------------------===// 5917 5918 bool 5919 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5920 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5921 } 5922 5923 bool 5924 AMDGPUAsmParser::isId(const StringRef Id) const { 5925 return isId(getToken(), Id); 5926 } 5927 5928 bool 5929 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5930 return getTokenKind() == Kind; 5931 } 5932 5933 bool 5934 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5935 if (isId(Id)) { 5936 lex(); 5937 return true; 5938 } 5939 return false; 5940 } 5941 5942 bool 5943 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5944 if (isId(Id) && peekToken().is(Kind)) { 5945 lex(); 5946 lex(); 5947 return true; 5948 } 5949 return false; 5950 } 5951 5952 bool 5953 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5954 if (isToken(Kind)) { 5955 lex(); 5956 return true; 5957 } 5958 return false; 5959 } 5960 5961 bool 5962 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5963 const StringRef ErrMsg) { 5964 if (!trySkipToken(Kind)) { 5965 Error(getLoc(), ErrMsg); 5966 return false; 5967 } 5968 return true; 5969 } 5970 5971 bool 5972 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5973 return !getParser().parseAbsoluteExpression(Imm); 5974 } 5975 5976 bool 5977 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5978 SMLoc S = getLoc(); 5979 5980 const MCExpr *Expr; 5981 if (Parser.parseExpression(Expr)) 5982 return false; 5983 5984 int64_t IntVal; 5985 if (Expr->evaluateAsAbsolute(IntVal)) { 5986 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5987 } else { 5988 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5989 } 5990 return true; 5991 } 5992 5993 bool 5994 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5995 if (isToken(AsmToken::String)) { 5996 Val = getToken().getStringContents(); 5997 lex(); 5998 return true; 5999 } else { 6000 Error(getLoc(), ErrMsg); 6001 return false; 6002 } 6003 } 6004 6005 bool 6006 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6007 if (isToken(AsmToken::Identifier)) { 6008 Val = getTokenStr(); 6009 lex(); 6010 return true; 6011 } else { 6012 Error(getLoc(), ErrMsg); 6013 return false; 6014 } 6015 } 6016 6017 AsmToken 6018 AMDGPUAsmParser::getToken() const { 6019 return Parser.getTok(); 6020 } 6021 6022 AsmToken 6023 AMDGPUAsmParser::peekToken() { 6024 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6025 } 6026 6027 void 6028 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6029 auto TokCount = getLexer().peekTokens(Tokens); 6030 6031 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6032 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6033 } 6034 6035 AsmToken::TokenKind 6036 AMDGPUAsmParser::getTokenKind() const { 6037 return getLexer().getKind(); 6038 } 6039 6040 SMLoc 6041 AMDGPUAsmParser::getLoc() const { 6042 return getToken().getLoc(); 6043 } 6044 6045 StringRef 6046 AMDGPUAsmParser::getTokenStr() const { 6047 return getToken().getString(); 6048 } 6049 6050 void 6051 AMDGPUAsmParser::lex() { 6052 Parser.Lex(); 6053 } 6054 6055 //===----------------------------------------------------------------------===// 6056 // swizzle 6057 //===----------------------------------------------------------------------===// 6058 6059 LLVM_READNONE 6060 static unsigned 6061 encodeBitmaskPerm(const unsigned AndMask, 6062 const unsigned OrMask, 6063 const unsigned XorMask) { 6064 using namespace llvm::AMDGPU::Swizzle; 6065 6066 return BITMASK_PERM_ENC | 6067 (AndMask << BITMASK_AND_SHIFT) | 6068 (OrMask << BITMASK_OR_SHIFT) | 6069 (XorMask << BITMASK_XOR_SHIFT); 6070 } 6071 6072 bool 6073 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6074 const unsigned MinVal, 6075 const unsigned MaxVal, 6076 const StringRef ErrMsg) { 6077 for (unsigned i = 0; i < OpNum; ++i) { 6078 if (!skipToken(AsmToken::Comma, "expected a comma")){ 6079 return false; 6080 } 6081 SMLoc ExprLoc = Parser.getTok().getLoc(); 6082 if (!parseExpr(Op[i])) { 6083 return false; 6084 } 6085 if (Op[i] < MinVal || Op[i] > MaxVal) { 6086 Error(ExprLoc, ErrMsg); 6087 return false; 6088 } 6089 } 6090 6091 return true; 6092 } 6093 6094 bool 6095 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6096 using namespace llvm::AMDGPU::Swizzle; 6097 6098 int64_t Lane[LANE_NUM]; 6099 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6100 "expected a 2-bit lane id")) { 6101 Imm = QUAD_PERM_ENC; 6102 for (unsigned I = 0; I < LANE_NUM; ++I) { 6103 Imm |= Lane[I] << (LANE_SHIFT * I); 6104 } 6105 return true; 6106 } 6107 return false; 6108 } 6109 6110 bool 6111 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6112 using namespace llvm::AMDGPU::Swizzle; 6113 6114 SMLoc S = Parser.getTok().getLoc(); 6115 int64_t GroupSize; 6116 int64_t LaneIdx; 6117 6118 if (!parseSwizzleOperands(1, &GroupSize, 6119 2, 32, 6120 "group size must be in the interval [2,32]")) { 6121 return false; 6122 } 6123 if (!isPowerOf2_64(GroupSize)) { 6124 Error(S, "group size must be a power of two"); 6125 return false; 6126 } 6127 if (parseSwizzleOperands(1, &LaneIdx, 6128 0, GroupSize - 1, 6129 "lane id must be in the interval [0,group size - 1]")) { 6130 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6131 return true; 6132 } 6133 return false; 6134 } 6135 6136 bool 6137 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6138 using namespace llvm::AMDGPU::Swizzle; 6139 6140 SMLoc S = Parser.getTok().getLoc(); 6141 int64_t GroupSize; 6142 6143 if (!parseSwizzleOperands(1, &GroupSize, 6144 2, 32, "group size must be in the interval [2,32]")) { 6145 return false; 6146 } 6147 if (!isPowerOf2_64(GroupSize)) { 6148 Error(S, "group size must be a power of two"); 6149 return false; 6150 } 6151 6152 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6153 return true; 6154 } 6155 6156 bool 6157 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6158 using namespace llvm::AMDGPU::Swizzle; 6159 6160 SMLoc S = Parser.getTok().getLoc(); 6161 int64_t GroupSize; 6162 6163 if (!parseSwizzleOperands(1, &GroupSize, 6164 1, 16, "group size must be in the interval [1,16]")) { 6165 return false; 6166 } 6167 if (!isPowerOf2_64(GroupSize)) { 6168 Error(S, "group size must be a power of two"); 6169 return false; 6170 } 6171 6172 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6173 return true; 6174 } 6175 6176 bool 6177 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6178 using namespace llvm::AMDGPU::Swizzle; 6179 6180 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6181 return false; 6182 } 6183 6184 StringRef Ctl; 6185 SMLoc StrLoc = Parser.getTok().getLoc(); 6186 if (!parseString(Ctl)) { 6187 return false; 6188 } 6189 if (Ctl.size() != BITMASK_WIDTH) { 6190 Error(StrLoc, "expected a 5-character mask"); 6191 return false; 6192 } 6193 6194 unsigned AndMask = 0; 6195 unsigned OrMask = 0; 6196 unsigned XorMask = 0; 6197 6198 for (size_t i = 0; i < Ctl.size(); ++i) { 6199 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6200 switch(Ctl[i]) { 6201 default: 6202 Error(StrLoc, "invalid mask"); 6203 return false; 6204 case '0': 6205 break; 6206 case '1': 6207 OrMask |= Mask; 6208 break; 6209 case 'p': 6210 AndMask |= Mask; 6211 break; 6212 case 'i': 6213 AndMask |= Mask; 6214 XorMask |= Mask; 6215 break; 6216 } 6217 } 6218 6219 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6220 return true; 6221 } 6222 6223 bool 6224 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6225 6226 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6227 6228 if (!parseExpr(Imm)) { 6229 return false; 6230 } 6231 if (!isUInt<16>(Imm)) { 6232 Error(OffsetLoc, "expected a 16-bit offset"); 6233 return false; 6234 } 6235 return true; 6236 } 6237 6238 bool 6239 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6240 using namespace llvm::AMDGPU::Swizzle; 6241 6242 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6243 6244 SMLoc ModeLoc = Parser.getTok().getLoc(); 6245 bool Ok = false; 6246 6247 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6248 Ok = parseSwizzleQuadPerm(Imm); 6249 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6250 Ok = parseSwizzleBitmaskPerm(Imm); 6251 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6252 Ok = parseSwizzleBroadcast(Imm); 6253 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6254 Ok = parseSwizzleSwap(Imm); 6255 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6256 Ok = parseSwizzleReverse(Imm); 6257 } else { 6258 Error(ModeLoc, "expected a swizzle mode"); 6259 } 6260 6261 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6262 } 6263 6264 return false; 6265 } 6266 6267 OperandMatchResultTy 6268 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6269 SMLoc S = Parser.getTok().getLoc(); 6270 int64_t Imm = 0; 6271 6272 if (trySkipId("offset")) { 6273 6274 bool Ok = false; 6275 if (skipToken(AsmToken::Colon, "expected a colon")) { 6276 if (trySkipId("swizzle")) { 6277 Ok = parseSwizzleMacro(Imm); 6278 } else { 6279 Ok = parseSwizzleOffset(Imm); 6280 } 6281 } 6282 6283 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6284 6285 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6286 } else { 6287 // Swizzle "offset" operand is optional. 6288 // If it is omitted, try parsing other optional operands. 6289 return parseOptionalOpr(Operands); 6290 } 6291 } 6292 6293 bool 6294 AMDGPUOperand::isSwizzle() const { 6295 return isImmTy(ImmTySwizzle); 6296 } 6297 6298 //===----------------------------------------------------------------------===// 6299 // VGPR Index Mode 6300 //===----------------------------------------------------------------------===// 6301 6302 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6303 6304 using namespace llvm::AMDGPU::VGPRIndexMode; 6305 6306 if (trySkipToken(AsmToken::RParen)) { 6307 return OFF; 6308 } 6309 6310 int64_t Imm = 0; 6311 6312 while (true) { 6313 unsigned Mode = 0; 6314 SMLoc S = Parser.getTok().getLoc(); 6315 6316 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6317 if (trySkipId(IdSymbolic[ModeId])) { 6318 Mode = 1 << ModeId; 6319 break; 6320 } 6321 } 6322 6323 if (Mode == 0) { 6324 Error(S, (Imm == 0)? 6325 "expected a VGPR index mode or a closing parenthesis" : 6326 "expected a VGPR index mode"); 6327 return UNDEF; 6328 } 6329 6330 if (Imm & Mode) { 6331 Error(S, "duplicate VGPR index mode"); 6332 return UNDEF; 6333 } 6334 Imm |= Mode; 6335 6336 if (trySkipToken(AsmToken::RParen)) 6337 break; 6338 if (!skipToken(AsmToken::Comma, 6339 "expected a comma or a closing parenthesis")) 6340 return UNDEF; 6341 } 6342 6343 return Imm; 6344 } 6345 6346 OperandMatchResultTy 6347 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6348 6349 using namespace llvm::AMDGPU::VGPRIndexMode; 6350 6351 int64_t Imm = 0; 6352 SMLoc S = Parser.getTok().getLoc(); 6353 6354 if (getLexer().getKind() == AsmToken::Identifier && 6355 Parser.getTok().getString() == "gpr_idx" && 6356 getLexer().peekTok().is(AsmToken::LParen)) { 6357 6358 Parser.Lex(); 6359 Parser.Lex(); 6360 6361 Imm = parseGPRIdxMacro(); 6362 if (Imm == UNDEF) 6363 return MatchOperand_ParseFail; 6364 6365 } else { 6366 if (getParser().parseAbsoluteExpression(Imm)) 6367 return MatchOperand_ParseFail; 6368 if (Imm < 0 || !isUInt<4>(Imm)) { 6369 Error(S, "invalid immediate: only 4-bit values are legal"); 6370 return MatchOperand_ParseFail; 6371 } 6372 } 6373 6374 Operands.push_back( 6375 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6376 return MatchOperand_Success; 6377 } 6378 6379 bool AMDGPUOperand::isGPRIdxMode() const { 6380 return isImmTy(ImmTyGprIdxMode); 6381 } 6382 6383 //===----------------------------------------------------------------------===// 6384 // sopp branch targets 6385 //===----------------------------------------------------------------------===// 6386 6387 OperandMatchResultTy 6388 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6389 6390 // Make sure we are not parsing something 6391 // that looks like a label or an expression but is not. 6392 // This will improve error messages. 6393 if (isRegister() || isModifier()) 6394 return MatchOperand_NoMatch; 6395 6396 if (!parseExpr(Operands)) 6397 return MatchOperand_ParseFail; 6398 6399 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6400 assert(Opr.isImm() || Opr.isExpr()); 6401 SMLoc Loc = Opr.getStartLoc(); 6402 6403 // Currently we do not support arbitrary expressions as branch targets. 6404 // Only labels and absolute expressions are accepted. 6405 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6406 Error(Loc, "expected an absolute expression or a label"); 6407 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6408 Error(Loc, "expected a 16-bit signed jump offset"); 6409 } 6410 6411 return MatchOperand_Success; 6412 } 6413 6414 //===----------------------------------------------------------------------===// 6415 // Boolean holding registers 6416 //===----------------------------------------------------------------------===// 6417 6418 OperandMatchResultTy 6419 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6420 return parseReg(Operands); 6421 } 6422 6423 //===----------------------------------------------------------------------===// 6424 // mubuf 6425 //===----------------------------------------------------------------------===// 6426 6427 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6428 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6429 } 6430 6431 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6432 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6433 } 6434 6435 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6436 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6437 } 6438 6439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6440 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6441 } 6442 6443 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6444 const OperandVector &Operands, 6445 bool IsAtomic, 6446 bool IsAtomicReturn, 6447 bool IsLds) { 6448 bool IsLdsOpcode = IsLds; 6449 bool HasLdsModifier = false; 6450 OptionalImmIndexMap OptionalIdx; 6451 assert(IsAtomicReturn ? IsAtomic : true); 6452 unsigned FirstOperandIdx = 1; 6453 6454 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6455 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6456 6457 // Add the register arguments 6458 if (Op.isReg()) { 6459 Op.addRegOperands(Inst, 1); 6460 // Insert a tied src for atomic return dst. 6461 // This cannot be postponed as subsequent calls to 6462 // addImmOperands rely on correct number of MC operands. 6463 if (IsAtomicReturn && i == FirstOperandIdx) 6464 Op.addRegOperands(Inst, 1); 6465 continue; 6466 } 6467 6468 // Handle the case where soffset is an immediate 6469 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6470 Op.addImmOperands(Inst, 1); 6471 continue; 6472 } 6473 6474 HasLdsModifier |= Op.isLDS(); 6475 6476 // Handle tokens like 'offen' which are sometimes hard-coded into the 6477 // asm string. There are no MCInst operands for these. 6478 if (Op.isToken()) { 6479 continue; 6480 } 6481 assert(Op.isImm()); 6482 6483 // Handle optional arguments 6484 OptionalIdx[Op.getImmTy()] = i; 6485 } 6486 6487 // This is a workaround for an llvm quirk which may result in an 6488 // incorrect instruction selection. Lds and non-lds versions of 6489 // MUBUF instructions are identical except that lds versions 6490 // have mandatory 'lds' modifier. However this modifier follows 6491 // optional modifiers and llvm asm matcher regards this 'lds' 6492 // modifier as an optional one. As a result, an lds version 6493 // of opcode may be selected even if it has no 'lds' modifier. 6494 if (IsLdsOpcode && !HasLdsModifier) { 6495 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6496 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6497 Inst.setOpcode(NoLdsOpcode); 6498 IsLdsOpcode = false; 6499 } 6500 } 6501 6502 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6503 if (!IsAtomic || IsAtomicReturn) { 6504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6505 } 6506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6507 6508 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6509 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6510 } 6511 6512 if (isGFX10()) 6513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6514 } 6515 6516 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6517 OptionalImmIndexMap OptionalIdx; 6518 6519 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6520 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6521 6522 // Add the register arguments 6523 if (Op.isReg()) { 6524 Op.addRegOperands(Inst, 1); 6525 continue; 6526 } 6527 6528 // Handle the case where soffset is an immediate 6529 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6530 Op.addImmOperands(Inst, 1); 6531 continue; 6532 } 6533 6534 // Handle tokens like 'offen' which are sometimes hard-coded into the 6535 // asm string. There are no MCInst operands for these. 6536 if (Op.isToken()) { 6537 continue; 6538 } 6539 assert(Op.isImm()); 6540 6541 // Handle optional arguments 6542 OptionalIdx[Op.getImmTy()] = i; 6543 } 6544 6545 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6546 AMDGPUOperand::ImmTyOffset); 6547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6551 6552 if (isGFX10()) 6553 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6554 } 6555 6556 //===----------------------------------------------------------------------===// 6557 // mimg 6558 //===----------------------------------------------------------------------===// 6559 6560 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6561 bool IsAtomic) { 6562 unsigned I = 1; 6563 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6564 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6565 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6566 } 6567 6568 if (IsAtomic) { 6569 // Add src, same as dst 6570 assert(Desc.getNumDefs() == 1); 6571 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6572 } 6573 6574 OptionalImmIndexMap OptionalIdx; 6575 6576 for (unsigned E = Operands.size(); I != E; ++I) { 6577 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6578 6579 // Add the register arguments 6580 if (Op.isReg()) { 6581 Op.addRegOperands(Inst, 1); 6582 } else if (Op.isImmModifier()) { 6583 OptionalIdx[Op.getImmTy()] = I; 6584 } else if (!Op.isToken()) { 6585 llvm_unreachable("unexpected operand type"); 6586 } 6587 } 6588 6589 bool IsGFX10 = isGFX10(); 6590 6591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6592 if (IsGFX10) 6593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6594 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6595 if (IsGFX10) 6596 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6597 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6599 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6600 if (IsGFX10) 6601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6604 if (!IsGFX10) 6605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6606 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6607 } 6608 6609 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6610 cvtMIMG(Inst, Operands, true); 6611 } 6612 6613 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6614 const OperandVector &Operands) { 6615 for (unsigned I = 1; I < Operands.size(); ++I) { 6616 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6617 if (Operand.isReg()) 6618 Operand.addRegOperands(Inst, 1); 6619 } 6620 6621 Inst.addOperand(MCOperand::createImm(1)); // a16 6622 } 6623 6624 //===----------------------------------------------------------------------===// 6625 // smrd 6626 //===----------------------------------------------------------------------===// 6627 6628 bool AMDGPUOperand::isSMRDOffset8() const { 6629 return isImm() && isUInt<8>(getImm()); 6630 } 6631 6632 bool AMDGPUOperand::isSMEMOffset() const { 6633 return isImm(); // Offset range is checked later by validator. 6634 } 6635 6636 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6637 // 32-bit literals are only supported on CI and we only want to use them 6638 // when the offset is > 8-bits. 6639 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6640 } 6641 6642 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6643 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6644 } 6645 6646 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6647 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6648 } 6649 6650 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6651 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6652 } 6653 6654 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6655 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6656 } 6657 6658 //===----------------------------------------------------------------------===// 6659 // vop3 6660 //===----------------------------------------------------------------------===// 6661 6662 static bool ConvertOmodMul(int64_t &Mul) { 6663 if (Mul != 1 && Mul != 2 && Mul != 4) 6664 return false; 6665 6666 Mul >>= 1; 6667 return true; 6668 } 6669 6670 static bool ConvertOmodDiv(int64_t &Div) { 6671 if (Div == 1) { 6672 Div = 0; 6673 return true; 6674 } 6675 6676 if (Div == 2) { 6677 Div = 3; 6678 return true; 6679 } 6680 6681 return false; 6682 } 6683 6684 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6685 if (BoundCtrl == 0) { 6686 BoundCtrl = 1; 6687 return true; 6688 } 6689 6690 if (BoundCtrl == -1) { 6691 BoundCtrl = 0; 6692 return true; 6693 } 6694 6695 return false; 6696 } 6697 6698 // Note: the order in this table matches the order of operands in AsmString. 6699 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6700 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6701 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6702 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6703 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6704 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6705 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6706 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6707 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6708 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6709 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6710 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6711 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6712 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6713 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6714 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6715 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6716 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6717 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6718 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6719 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6720 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6721 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6722 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6723 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6724 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6725 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6726 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6727 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6728 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6729 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6730 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6731 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6732 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6733 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6734 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6735 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6736 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6737 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6738 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6739 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6740 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6741 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6742 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6743 }; 6744 6745 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6746 6747 OperandMatchResultTy res = parseOptionalOpr(Operands); 6748 6749 // This is a hack to enable hardcoded mandatory operands which follow 6750 // optional operands. 6751 // 6752 // Current design assumes that all operands after the first optional operand 6753 // are also optional. However implementation of some instructions violates 6754 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6755 // 6756 // To alleviate this problem, we have to (implicitly) parse extra operands 6757 // to make sure autogenerated parser of custom operands never hit hardcoded 6758 // mandatory operands. 6759 6760 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6761 if (res != MatchOperand_Success || 6762 isToken(AsmToken::EndOfStatement)) 6763 break; 6764 6765 trySkipToken(AsmToken::Comma); 6766 res = parseOptionalOpr(Operands); 6767 } 6768 6769 return res; 6770 } 6771 6772 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6773 OperandMatchResultTy res; 6774 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6775 // try to parse any optional operand here 6776 if (Op.IsBit) { 6777 res = parseNamedBit(Op.Name, Operands, Op.Type); 6778 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6779 res = parseOModOperand(Operands); 6780 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6781 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6782 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6783 res = parseSDWASel(Operands, Op.Name, Op.Type); 6784 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6785 res = parseSDWADstUnused(Operands); 6786 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6787 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6788 Op.Type == AMDGPUOperand::ImmTyNegLo || 6789 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6790 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6791 Op.ConvertResult); 6792 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6793 res = parseDim(Operands); 6794 } else { 6795 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6796 } 6797 if (res != MatchOperand_NoMatch) { 6798 return res; 6799 } 6800 } 6801 return MatchOperand_NoMatch; 6802 } 6803 6804 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6805 StringRef Name = Parser.getTok().getString(); 6806 if (Name == "mul") { 6807 return parseIntWithPrefix("mul", Operands, 6808 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6809 } 6810 6811 if (Name == "div") { 6812 return parseIntWithPrefix("div", Operands, 6813 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6814 } 6815 6816 return MatchOperand_NoMatch; 6817 } 6818 6819 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6820 cvtVOP3P(Inst, Operands); 6821 6822 int Opc = Inst.getOpcode(); 6823 6824 int SrcNum; 6825 const int Ops[] = { AMDGPU::OpName::src0, 6826 AMDGPU::OpName::src1, 6827 AMDGPU::OpName::src2 }; 6828 for (SrcNum = 0; 6829 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6830 ++SrcNum); 6831 assert(SrcNum > 0); 6832 6833 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6834 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6835 6836 if ((OpSel & (1 << SrcNum)) != 0) { 6837 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6838 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6839 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6840 } 6841 } 6842 6843 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6844 // 1. This operand is input modifiers 6845 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6846 // 2. This is not last operand 6847 && Desc.NumOperands > (OpNum + 1) 6848 // 3. Next operand is register class 6849 && Desc.OpInfo[OpNum + 1].RegClass != -1 6850 // 4. Next register is not tied to any other operand 6851 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6852 } 6853 6854 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6855 { 6856 OptionalImmIndexMap OptionalIdx; 6857 unsigned Opc = Inst.getOpcode(); 6858 6859 unsigned I = 1; 6860 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6861 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6862 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6863 } 6864 6865 for (unsigned E = Operands.size(); I != E; ++I) { 6866 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6867 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6868 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6869 } else if (Op.isInterpSlot() || 6870 Op.isInterpAttr() || 6871 Op.isAttrChan()) { 6872 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6873 } else if (Op.isImmModifier()) { 6874 OptionalIdx[Op.getImmTy()] = I; 6875 } else { 6876 llvm_unreachable("unhandled operand type"); 6877 } 6878 } 6879 6880 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6882 } 6883 6884 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6886 } 6887 6888 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6889 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6890 } 6891 } 6892 6893 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6894 OptionalImmIndexMap &OptionalIdx) { 6895 unsigned Opc = Inst.getOpcode(); 6896 6897 unsigned I = 1; 6898 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6899 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6900 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6901 } 6902 6903 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6904 // This instruction has src modifiers 6905 for (unsigned E = Operands.size(); I != E; ++I) { 6906 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6907 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6908 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6909 } else if (Op.isImmModifier()) { 6910 OptionalIdx[Op.getImmTy()] = I; 6911 } else if (Op.isRegOrImm()) { 6912 Op.addRegOrImmOperands(Inst, 1); 6913 } else { 6914 llvm_unreachable("unhandled operand type"); 6915 } 6916 } 6917 } else { 6918 // No src modifiers 6919 for (unsigned E = Operands.size(); I != E; ++I) { 6920 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6921 if (Op.isMod()) { 6922 OptionalIdx[Op.getImmTy()] = I; 6923 } else { 6924 Op.addRegOrImmOperands(Inst, 1); 6925 } 6926 } 6927 } 6928 6929 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6931 } 6932 6933 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6935 } 6936 6937 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6938 // it has src2 register operand that is tied to dst operand 6939 // we don't allow modifiers for this operand in assembler so src2_modifiers 6940 // should be 0. 6941 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6942 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6943 Opc == AMDGPU::V_MAC_F32_e64_vi || 6944 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 6945 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 6946 Opc == AMDGPU::V_MAC_F16_e64_vi || 6947 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6948 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6949 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 6950 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6951 auto it = Inst.begin(); 6952 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6953 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6954 ++it; 6955 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6956 } 6957 } 6958 6959 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6960 OptionalImmIndexMap OptionalIdx; 6961 cvtVOP3(Inst, Operands, OptionalIdx); 6962 } 6963 6964 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6965 const OperandVector &Operands) { 6966 OptionalImmIndexMap OptIdx; 6967 const int Opc = Inst.getOpcode(); 6968 const MCInstrDesc &Desc = MII.get(Opc); 6969 6970 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6971 6972 cvtVOP3(Inst, Operands, OptIdx); 6973 6974 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6975 assert(!IsPacked); 6976 Inst.addOperand(Inst.getOperand(0)); 6977 } 6978 6979 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6980 // instruction, and then figure out where to actually put the modifiers 6981 6982 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6983 6984 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6985 if (OpSelHiIdx != -1) { 6986 int DefaultVal = IsPacked ? -1 : 0; 6987 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6988 DefaultVal); 6989 } 6990 6991 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6992 if (NegLoIdx != -1) { 6993 assert(IsPacked); 6994 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6995 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6996 } 6997 6998 const int Ops[] = { AMDGPU::OpName::src0, 6999 AMDGPU::OpName::src1, 7000 AMDGPU::OpName::src2 }; 7001 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7002 AMDGPU::OpName::src1_modifiers, 7003 AMDGPU::OpName::src2_modifiers }; 7004 7005 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7006 7007 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7008 unsigned OpSelHi = 0; 7009 unsigned NegLo = 0; 7010 unsigned NegHi = 0; 7011 7012 if (OpSelHiIdx != -1) { 7013 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7014 } 7015 7016 if (NegLoIdx != -1) { 7017 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7018 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7019 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7020 } 7021 7022 for (int J = 0; J < 3; ++J) { 7023 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7024 if (OpIdx == -1) 7025 break; 7026 7027 uint32_t ModVal = 0; 7028 7029 if ((OpSel & (1 << J)) != 0) 7030 ModVal |= SISrcMods::OP_SEL_0; 7031 7032 if ((OpSelHi & (1 << J)) != 0) 7033 ModVal |= SISrcMods::OP_SEL_1; 7034 7035 if ((NegLo & (1 << J)) != 0) 7036 ModVal |= SISrcMods::NEG; 7037 7038 if ((NegHi & (1 << J)) != 0) 7039 ModVal |= SISrcMods::NEG_HI; 7040 7041 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7042 7043 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7044 } 7045 } 7046 7047 //===----------------------------------------------------------------------===// 7048 // dpp 7049 //===----------------------------------------------------------------------===// 7050 7051 bool AMDGPUOperand::isDPP8() const { 7052 return isImmTy(ImmTyDPP8); 7053 } 7054 7055 bool AMDGPUOperand::isDPPCtrl() const { 7056 using namespace AMDGPU::DPP; 7057 7058 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7059 if (result) { 7060 int64_t Imm = getImm(); 7061 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7062 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7063 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7064 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7065 (Imm == DppCtrl::WAVE_SHL1) || 7066 (Imm == DppCtrl::WAVE_ROL1) || 7067 (Imm == DppCtrl::WAVE_SHR1) || 7068 (Imm == DppCtrl::WAVE_ROR1) || 7069 (Imm == DppCtrl::ROW_MIRROR) || 7070 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7071 (Imm == DppCtrl::BCAST15) || 7072 (Imm == DppCtrl::BCAST31) || 7073 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7074 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7075 } 7076 return false; 7077 } 7078 7079 //===----------------------------------------------------------------------===// 7080 // mAI 7081 //===----------------------------------------------------------------------===// 7082 7083 bool AMDGPUOperand::isBLGP() const { 7084 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7085 } 7086 7087 bool AMDGPUOperand::isCBSZ() const { 7088 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7089 } 7090 7091 bool AMDGPUOperand::isABID() const { 7092 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7093 } 7094 7095 bool AMDGPUOperand::isS16Imm() const { 7096 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7097 } 7098 7099 bool AMDGPUOperand::isU16Imm() const { 7100 return isImm() && isUInt<16>(getImm()); 7101 } 7102 7103 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7104 if (!isGFX10()) 7105 return MatchOperand_NoMatch; 7106 7107 SMLoc S = Parser.getTok().getLoc(); 7108 7109 if (getLexer().isNot(AsmToken::Identifier)) 7110 return MatchOperand_NoMatch; 7111 if (getLexer().getTok().getString() != "dim") 7112 return MatchOperand_NoMatch; 7113 7114 Parser.Lex(); 7115 if (getLexer().isNot(AsmToken::Colon)) 7116 return MatchOperand_ParseFail; 7117 7118 Parser.Lex(); 7119 7120 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7121 // integer. 7122 std::string Token; 7123 if (getLexer().is(AsmToken::Integer)) { 7124 SMLoc Loc = getLexer().getTok().getEndLoc(); 7125 Token = std::string(getLexer().getTok().getString()); 7126 Parser.Lex(); 7127 if (getLexer().getTok().getLoc() != Loc) 7128 return MatchOperand_ParseFail; 7129 } 7130 if (getLexer().isNot(AsmToken::Identifier)) 7131 return MatchOperand_ParseFail; 7132 Token += getLexer().getTok().getString(); 7133 7134 StringRef DimId = Token; 7135 if (DimId.startswith("SQ_RSRC_IMG_")) 7136 DimId = DimId.substr(12); 7137 7138 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7139 if (!DimInfo) 7140 return MatchOperand_ParseFail; 7141 7142 Parser.Lex(); 7143 7144 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7145 AMDGPUOperand::ImmTyDim)); 7146 return MatchOperand_Success; 7147 } 7148 7149 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7150 SMLoc S = Parser.getTok().getLoc(); 7151 StringRef Prefix; 7152 7153 if (getLexer().getKind() == AsmToken::Identifier) { 7154 Prefix = Parser.getTok().getString(); 7155 } else { 7156 return MatchOperand_NoMatch; 7157 } 7158 7159 if (Prefix != "dpp8") 7160 return parseDPPCtrl(Operands); 7161 if (!isGFX10()) 7162 return MatchOperand_NoMatch; 7163 7164 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7165 7166 int64_t Sels[8]; 7167 7168 Parser.Lex(); 7169 if (getLexer().isNot(AsmToken::Colon)) 7170 return MatchOperand_ParseFail; 7171 7172 Parser.Lex(); 7173 if (getLexer().isNot(AsmToken::LBrac)) 7174 return MatchOperand_ParseFail; 7175 7176 Parser.Lex(); 7177 if (getParser().parseAbsoluteExpression(Sels[0])) 7178 return MatchOperand_ParseFail; 7179 if (0 > Sels[0] || 7 < Sels[0]) 7180 return MatchOperand_ParseFail; 7181 7182 for (size_t i = 1; i < 8; ++i) { 7183 if (getLexer().isNot(AsmToken::Comma)) 7184 return MatchOperand_ParseFail; 7185 7186 Parser.Lex(); 7187 if (getParser().parseAbsoluteExpression(Sels[i])) 7188 return MatchOperand_ParseFail; 7189 if (0 > Sels[i] || 7 < Sels[i]) 7190 return MatchOperand_ParseFail; 7191 } 7192 7193 if (getLexer().isNot(AsmToken::RBrac)) 7194 return MatchOperand_ParseFail; 7195 Parser.Lex(); 7196 7197 unsigned DPP8 = 0; 7198 for (size_t i = 0; i < 8; ++i) 7199 DPP8 |= (Sels[i] << (i * 3)); 7200 7201 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7202 return MatchOperand_Success; 7203 } 7204 7205 OperandMatchResultTy 7206 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7207 using namespace AMDGPU::DPP; 7208 7209 SMLoc S = Parser.getTok().getLoc(); 7210 StringRef Prefix; 7211 int64_t Int; 7212 7213 if (getLexer().getKind() == AsmToken::Identifier) { 7214 Prefix = Parser.getTok().getString(); 7215 } else { 7216 return MatchOperand_NoMatch; 7217 } 7218 7219 if (Prefix == "row_mirror") { 7220 Int = DppCtrl::ROW_MIRROR; 7221 Parser.Lex(); 7222 } else if (Prefix == "row_half_mirror") { 7223 Int = DppCtrl::ROW_HALF_MIRROR; 7224 Parser.Lex(); 7225 } else { 7226 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7227 if (Prefix != "quad_perm" 7228 && Prefix != "row_shl" 7229 && Prefix != "row_shr" 7230 && Prefix != "row_ror" 7231 && Prefix != "wave_shl" 7232 && Prefix != "wave_rol" 7233 && Prefix != "wave_shr" 7234 && Prefix != "wave_ror" 7235 && Prefix != "row_bcast" 7236 && Prefix != "row_share" 7237 && Prefix != "row_xmask") { 7238 return MatchOperand_NoMatch; 7239 } 7240 7241 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7242 return MatchOperand_NoMatch; 7243 7244 if (!isVI() && !isGFX9() && 7245 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7246 Prefix == "wave_rol" || Prefix == "wave_ror" || 7247 Prefix == "row_bcast")) 7248 return MatchOperand_NoMatch; 7249 7250 Parser.Lex(); 7251 if (getLexer().isNot(AsmToken::Colon)) 7252 return MatchOperand_ParseFail; 7253 7254 if (Prefix == "quad_perm") { 7255 // quad_perm:[%d,%d,%d,%d] 7256 Parser.Lex(); 7257 if (getLexer().isNot(AsmToken::LBrac)) 7258 return MatchOperand_ParseFail; 7259 Parser.Lex(); 7260 7261 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7262 return MatchOperand_ParseFail; 7263 7264 for (int i = 0; i < 3; ++i) { 7265 if (getLexer().isNot(AsmToken::Comma)) 7266 return MatchOperand_ParseFail; 7267 Parser.Lex(); 7268 7269 int64_t Temp; 7270 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7271 return MatchOperand_ParseFail; 7272 const int shift = i*2 + 2; 7273 Int += (Temp << shift); 7274 } 7275 7276 if (getLexer().isNot(AsmToken::RBrac)) 7277 return MatchOperand_ParseFail; 7278 Parser.Lex(); 7279 } else { 7280 // sel:%d 7281 Parser.Lex(); 7282 if (getParser().parseAbsoluteExpression(Int)) 7283 return MatchOperand_ParseFail; 7284 7285 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7286 Int |= DppCtrl::ROW_SHL0; 7287 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7288 Int |= DppCtrl::ROW_SHR0; 7289 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7290 Int |= DppCtrl::ROW_ROR0; 7291 } else if (Prefix == "wave_shl" && 1 == Int) { 7292 Int = DppCtrl::WAVE_SHL1; 7293 } else if (Prefix == "wave_rol" && 1 == Int) { 7294 Int = DppCtrl::WAVE_ROL1; 7295 } else if (Prefix == "wave_shr" && 1 == Int) { 7296 Int = DppCtrl::WAVE_SHR1; 7297 } else if (Prefix == "wave_ror" && 1 == Int) { 7298 Int = DppCtrl::WAVE_ROR1; 7299 } else if (Prefix == "row_bcast") { 7300 if (Int == 15) { 7301 Int = DppCtrl::BCAST15; 7302 } else if (Int == 31) { 7303 Int = DppCtrl::BCAST31; 7304 } else { 7305 return MatchOperand_ParseFail; 7306 } 7307 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7308 Int |= DppCtrl::ROW_SHARE_FIRST; 7309 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7310 Int |= DppCtrl::ROW_XMASK_FIRST; 7311 } else { 7312 return MatchOperand_ParseFail; 7313 } 7314 } 7315 } 7316 7317 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7318 return MatchOperand_Success; 7319 } 7320 7321 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7322 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7323 } 7324 7325 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7326 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7327 } 7328 7329 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7330 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7331 } 7332 7333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7334 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7335 } 7336 7337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7338 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7339 } 7340 7341 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7342 OptionalImmIndexMap OptionalIdx; 7343 7344 unsigned I = 1; 7345 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7346 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7347 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7348 } 7349 7350 int Fi = 0; 7351 for (unsigned E = Operands.size(); I != E; ++I) { 7352 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7353 MCOI::TIED_TO); 7354 if (TiedTo != -1) { 7355 assert((unsigned)TiedTo < Inst.getNumOperands()); 7356 // handle tied old or src2 for MAC instructions 7357 Inst.addOperand(Inst.getOperand(TiedTo)); 7358 } 7359 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7360 // Add the register arguments 7361 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7362 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7363 // Skip it. 7364 continue; 7365 } 7366 7367 if (IsDPP8) { 7368 if (Op.isDPP8()) { 7369 Op.addImmOperands(Inst, 1); 7370 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7371 Op.addRegWithFPInputModsOperands(Inst, 2); 7372 } else if (Op.isFI()) { 7373 Fi = Op.getImm(); 7374 } else if (Op.isReg()) { 7375 Op.addRegOperands(Inst, 1); 7376 } else { 7377 llvm_unreachable("Invalid operand type"); 7378 } 7379 } else { 7380 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7381 Op.addRegWithFPInputModsOperands(Inst, 2); 7382 } else if (Op.isDPPCtrl()) { 7383 Op.addImmOperands(Inst, 1); 7384 } else if (Op.isImm()) { 7385 // Handle optional arguments 7386 OptionalIdx[Op.getImmTy()] = I; 7387 } else { 7388 llvm_unreachable("Invalid operand type"); 7389 } 7390 } 7391 } 7392 7393 if (IsDPP8) { 7394 using namespace llvm::AMDGPU::DPP; 7395 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7396 } else { 7397 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7398 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7400 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7402 } 7403 } 7404 } 7405 7406 //===----------------------------------------------------------------------===// 7407 // sdwa 7408 //===----------------------------------------------------------------------===// 7409 7410 OperandMatchResultTy 7411 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7412 AMDGPUOperand::ImmTy Type) { 7413 using namespace llvm::AMDGPU::SDWA; 7414 7415 SMLoc S = Parser.getTok().getLoc(); 7416 StringRef Value; 7417 OperandMatchResultTy res; 7418 7419 res = parseStringWithPrefix(Prefix, Value); 7420 if (res != MatchOperand_Success) { 7421 return res; 7422 } 7423 7424 int64_t Int; 7425 Int = StringSwitch<int64_t>(Value) 7426 .Case("BYTE_0", SdwaSel::BYTE_0) 7427 .Case("BYTE_1", SdwaSel::BYTE_1) 7428 .Case("BYTE_2", SdwaSel::BYTE_2) 7429 .Case("BYTE_3", SdwaSel::BYTE_3) 7430 .Case("WORD_0", SdwaSel::WORD_0) 7431 .Case("WORD_1", SdwaSel::WORD_1) 7432 .Case("DWORD", SdwaSel::DWORD) 7433 .Default(0xffffffff); 7434 Parser.Lex(); // eat last token 7435 7436 if (Int == 0xffffffff) { 7437 return MatchOperand_ParseFail; 7438 } 7439 7440 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7441 return MatchOperand_Success; 7442 } 7443 7444 OperandMatchResultTy 7445 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7446 using namespace llvm::AMDGPU::SDWA; 7447 7448 SMLoc S = Parser.getTok().getLoc(); 7449 StringRef Value; 7450 OperandMatchResultTy res; 7451 7452 res = parseStringWithPrefix("dst_unused", Value); 7453 if (res != MatchOperand_Success) { 7454 return res; 7455 } 7456 7457 int64_t Int; 7458 Int = StringSwitch<int64_t>(Value) 7459 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7460 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7461 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7462 .Default(0xffffffff); 7463 Parser.Lex(); // eat last token 7464 7465 if (Int == 0xffffffff) { 7466 return MatchOperand_ParseFail; 7467 } 7468 7469 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7470 return MatchOperand_Success; 7471 } 7472 7473 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7474 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7475 } 7476 7477 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7478 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7479 } 7480 7481 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7482 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7483 } 7484 7485 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7486 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7487 } 7488 7489 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7490 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7491 } 7492 7493 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7494 uint64_t BasicInstType, 7495 bool SkipDstVcc, 7496 bool SkipSrcVcc) { 7497 using namespace llvm::AMDGPU::SDWA; 7498 7499 OptionalImmIndexMap OptionalIdx; 7500 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7501 bool SkippedVcc = false; 7502 7503 unsigned I = 1; 7504 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7505 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7506 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7507 } 7508 7509 for (unsigned E = Operands.size(); I != E; ++I) { 7510 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7511 if (SkipVcc && !SkippedVcc && Op.isReg() && 7512 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7513 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7514 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7515 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7516 // Skip VCC only if we didn't skip it on previous iteration. 7517 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7518 if (BasicInstType == SIInstrFlags::VOP2 && 7519 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7520 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7521 SkippedVcc = true; 7522 continue; 7523 } else if (BasicInstType == SIInstrFlags::VOPC && 7524 Inst.getNumOperands() == 0) { 7525 SkippedVcc = true; 7526 continue; 7527 } 7528 } 7529 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7530 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7531 } else if (Op.isImm()) { 7532 // Handle optional arguments 7533 OptionalIdx[Op.getImmTy()] = I; 7534 } else { 7535 llvm_unreachable("Invalid operand type"); 7536 } 7537 SkippedVcc = false; 7538 } 7539 7540 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7541 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7542 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7543 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7544 switch (BasicInstType) { 7545 case SIInstrFlags::VOP1: 7546 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7547 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7549 } 7550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7553 break; 7554 7555 case SIInstrFlags::VOP2: 7556 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7557 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7558 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7559 } 7560 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7564 break; 7565 7566 case SIInstrFlags::VOPC: 7567 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7571 break; 7572 7573 default: 7574 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7575 } 7576 } 7577 7578 // special case v_mac_{f16, f32}: 7579 // it has src2 register operand that is tied to dst operand 7580 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7581 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7582 auto it = Inst.begin(); 7583 std::advance( 7584 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7585 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7586 } 7587 } 7588 7589 //===----------------------------------------------------------------------===// 7590 // mAI 7591 //===----------------------------------------------------------------------===// 7592 7593 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7594 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7595 } 7596 7597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7598 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7599 } 7600 7601 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7602 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7603 } 7604 7605 /// Force static initialization. 7606 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7607 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7608 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7609 } 7610 7611 #define GET_REGISTER_MATCHER 7612 #define GET_MATCHER_IMPLEMENTATION 7613 #define GET_MNEMONIC_SPELL_CHECKER 7614 #define GET_MNEMONIC_CHECKER 7615 #include "AMDGPUGenAsmMatcher.inc" 7616 7617 // This fuction should be defined after auto-generated include so that we have 7618 // MatchClassKind enum defined 7619 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7620 unsigned Kind) { 7621 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7622 // But MatchInstructionImpl() expects to meet token and fails to validate 7623 // operand. This method checks if we are given immediate operand but expect to 7624 // get corresponding token. 7625 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7626 switch (Kind) { 7627 case MCK_addr64: 7628 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7629 case MCK_gds: 7630 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7631 case MCK_lds: 7632 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7633 case MCK_glc: 7634 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7635 case MCK_idxen: 7636 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7637 case MCK_offen: 7638 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7639 case MCK_SSrcB32: 7640 // When operands have expression values, they will return true for isToken, 7641 // because it is not possible to distinguish between a token and an 7642 // expression at parse time. MatchInstructionImpl() will always try to 7643 // match an operand as a token, when isToken returns true, and when the 7644 // name of the expression is not a valid token, the match will fail, 7645 // so we need to handle it here. 7646 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7647 case MCK_SSrcF32: 7648 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7649 case MCK_SoppBrTarget: 7650 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7651 case MCK_VReg32OrOff: 7652 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7653 case MCK_InterpSlot: 7654 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7655 case MCK_Attr: 7656 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7657 case MCK_AttrChan: 7658 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7659 case MCK_ImmSMEMOffset: 7660 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7661 case MCK_SReg_64: 7662 case MCK_SReg_64_XEXEC: 7663 // Null is defined as a 32-bit register but 7664 // it should also be enabled with 64-bit operands. 7665 // The following code enables it for SReg_64 operands 7666 // used as source and destination. Remaining source 7667 // operands are handled in isInlinableImm. 7668 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7669 default: 7670 return Match_InvalidOperand; 7671 } 7672 } 7673 7674 //===----------------------------------------------------------------------===// 7675 // endpgm 7676 //===----------------------------------------------------------------------===// 7677 7678 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7679 SMLoc S = Parser.getTok().getLoc(); 7680 int64_t Imm = 0; 7681 7682 if (!parseExpr(Imm)) { 7683 // The operand is optional, if not present default to 0 7684 Imm = 0; 7685 } 7686 7687 if (!isUInt<16>(Imm)) { 7688 Error(S, "expected a 16-bit value"); 7689 return MatchOperand_ParseFail; 7690 } 7691 7692 Operands.push_back( 7693 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7694 return MatchOperand_Success; 7695 } 7696 7697 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7698