1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth, 1092 SMLoc Loc); 1093 1094 bool isRegister(); 1095 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1096 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1097 void initializeGprCountSymbol(RegisterKind RegKind); 1098 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1099 unsigned RegWidth); 1100 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1101 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1102 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1103 bool IsGdsHardcoded); 1104 1105 public: 1106 enum AMDGPUMatchResultTy { 1107 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1108 }; 1109 enum OperandMode { 1110 OperandMode_Default, 1111 OperandMode_NSA, 1112 }; 1113 1114 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1115 1116 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1117 const MCInstrInfo &MII, 1118 const MCTargetOptions &Options) 1119 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1120 MCAsmParserExtension::Initialize(Parser); 1121 1122 if (getFeatureBits().none()) { 1123 // Set default features. 1124 copySTI().ToggleFeature("southern-islands"); 1125 } 1126 1127 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1128 1129 { 1130 // TODO: make those pre-defined variables read-only. 1131 // Currently there is none suitable machinery in the core llvm-mc for this. 1132 // MCSymbol::isRedefinable is intended for another purpose, and 1133 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1135 MCContext &Ctx = getContext(); 1136 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1137 MCSymbol *Sym = 1138 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1144 } else { 1145 MCSymbol *Sym = 1146 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1147 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1148 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1149 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1150 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1151 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1152 } 1153 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1154 initializeGprCountSymbol(IS_VGPR); 1155 initializeGprCountSymbol(IS_SGPR); 1156 } else 1157 KernelScope.initialize(getContext()); 1158 } 1159 } 1160 1161 bool hasXNACK() const { 1162 return AMDGPU::hasXNACK(getSTI()); 1163 } 1164 1165 bool hasMIMG_R128() const { 1166 return AMDGPU::hasMIMG_R128(getSTI()); 1167 } 1168 1169 bool hasPackedD16() const { 1170 return AMDGPU::hasPackedD16(getSTI()); 1171 } 1172 1173 bool hasGFX10A16() const { 1174 return AMDGPU::hasGFX10A16(getSTI()); 1175 } 1176 1177 bool isSI() const { 1178 return AMDGPU::isSI(getSTI()); 1179 } 1180 1181 bool isCI() const { 1182 return AMDGPU::isCI(getSTI()); 1183 } 1184 1185 bool isVI() const { 1186 return AMDGPU::isVI(getSTI()); 1187 } 1188 1189 bool isGFX9() const { 1190 return AMDGPU::isGFX9(getSTI()); 1191 } 1192 1193 bool isGFX9Plus() const { 1194 return AMDGPU::isGFX9Plus(getSTI()); 1195 } 1196 1197 bool isGFX10() const { 1198 return AMDGPU::isGFX10(getSTI()); 1199 } 1200 1201 bool isGFX10_BEncoding() const { 1202 return AMDGPU::isGFX10_BEncoding(getSTI()); 1203 } 1204 1205 bool hasInv2PiInlineImm() const { 1206 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1207 } 1208 1209 bool hasFlatOffsets() const { 1210 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1211 } 1212 1213 bool hasSGPR102_SGPR103() const { 1214 return !isVI() && !isGFX9(); 1215 } 1216 1217 bool hasSGPR104_SGPR105() const { 1218 return isGFX10(); 1219 } 1220 1221 bool hasIntClamp() const { 1222 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1223 } 1224 1225 AMDGPUTargetStreamer &getTargetStreamer() { 1226 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1227 return static_cast<AMDGPUTargetStreamer &>(TS); 1228 } 1229 1230 const MCRegisterInfo *getMRI() const { 1231 // We need this const_cast because for some reason getContext() is not const 1232 // in MCAsmParser. 1233 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1234 } 1235 1236 const MCInstrInfo *getMII() const { 1237 return &MII; 1238 } 1239 1240 const FeatureBitset &getFeatureBits() const { 1241 return getSTI().getFeatureBits(); 1242 } 1243 1244 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1245 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1246 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1247 1248 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1249 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1250 bool isForcedDPP() const { return ForcedDPP; } 1251 bool isForcedSDWA() const { return ForcedSDWA; } 1252 ArrayRef<unsigned> getMatchedVariants() const; 1253 StringRef getMatchedVariantName() const; 1254 1255 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1256 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1257 bool RestoreOnFailure); 1258 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1259 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1260 SMLoc &EndLoc) override; 1261 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1262 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1263 unsigned Kind) override; 1264 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1265 OperandVector &Operands, MCStreamer &Out, 1266 uint64_t &ErrorInfo, 1267 bool MatchingInlineAsm) override; 1268 bool ParseDirective(AsmToken DirectiveID) override; 1269 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1270 OperandMode Mode = OperandMode_Default); 1271 StringRef parseMnemonicSuffix(StringRef Name); 1272 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1273 SMLoc NameLoc, OperandVector &Operands) override; 1274 //bool ProcessInstruction(MCInst &Inst); 1275 1276 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1277 1278 OperandMatchResultTy 1279 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1281 bool (*ConvertResult)(int64_t &) = nullptr); 1282 1283 OperandMatchResultTy 1284 parseOperandArrayWithPrefix(const char *Prefix, 1285 OperandVector &Operands, 1286 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1287 bool (*ConvertResult)(int64_t&) = nullptr); 1288 1289 OperandMatchResultTy 1290 parseNamedBit(const char *Name, OperandVector &Operands, 1291 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1292 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1293 StringRef &Value); 1294 1295 bool isModifier(); 1296 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1297 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1298 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1299 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1300 bool parseSP3NegModifier(); 1301 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1302 OperandMatchResultTy parseReg(OperandVector &Operands); 1303 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1304 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1305 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1306 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1307 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1308 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1309 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1310 OperandMatchResultTy parseUfmt(int64_t &Format); 1311 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1312 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1313 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1314 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1315 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1316 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1317 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1318 1319 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1320 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1321 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1322 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1323 1324 bool parseCnt(int64_t &IntVal); 1325 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1326 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1327 1328 private: 1329 struct OperandInfoTy { 1330 int64_t Id; 1331 bool IsSymbolic = false; 1332 bool IsDefined = false; 1333 1334 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1335 }; 1336 1337 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1338 bool validateSendMsg(const OperandInfoTy &Msg, 1339 const OperandInfoTy &Op, 1340 const OperandInfoTy &Stream, 1341 const SMLoc Loc); 1342 1343 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1344 bool validateHwreg(const OperandInfoTy &HwReg, 1345 const int64_t Offset, 1346 const int64_t Width, 1347 const SMLoc Loc); 1348 1349 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1350 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1351 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1352 1353 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1354 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1355 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1356 bool validateSOPLiteral(const MCInst &Inst) const; 1357 bool validateConstantBusLimitations(const MCInst &Inst); 1358 bool validateEarlyClobberLimitations(const MCInst &Inst); 1359 bool validateIntClampSupported(const MCInst &Inst); 1360 bool validateMIMGAtomicDMask(const MCInst &Inst); 1361 bool validateMIMGGatherDMask(const MCInst &Inst); 1362 bool validateMovrels(const MCInst &Inst); 1363 bool validateMIMGDataSize(const MCInst &Inst); 1364 bool validateMIMGAddrSize(const MCInst &Inst); 1365 bool validateMIMGD16(const MCInst &Inst); 1366 bool validateMIMGDim(const MCInst &Inst); 1367 bool validateLdsDirect(const MCInst &Inst); 1368 bool validateOpSel(const MCInst &Inst); 1369 bool validateVccOperand(unsigned Reg) const; 1370 bool validateVOP3Literal(const MCInst &Inst) const; 1371 bool validateMAIAccWrite(const MCInst &Inst); 1372 bool validateDivScale(const MCInst &Inst); 1373 unsigned getConstantBusLimit(unsigned Opcode) const; 1374 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1375 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1376 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1377 1378 bool isSupportedMnemo(StringRef Mnemo, 1379 const FeatureBitset &FBS); 1380 bool isSupportedMnemo(StringRef Mnemo, 1381 const FeatureBitset &FBS, 1382 ArrayRef<unsigned> Variants); 1383 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1384 1385 bool isId(const StringRef Id) const; 1386 bool isId(const AsmToken &Token, const StringRef Id) const; 1387 bool isToken(const AsmToken::TokenKind Kind) const; 1388 bool trySkipId(const StringRef Id); 1389 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1390 bool trySkipToken(const AsmToken::TokenKind Kind); 1391 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1392 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1393 bool parseId(StringRef &Val, const StringRef ErrMsg); 1394 1395 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1396 AsmToken::TokenKind getTokenKind() const; 1397 bool parseExpr(int64_t &Imm); 1398 bool parseExpr(OperandVector &Operands); 1399 StringRef getTokenStr() const; 1400 AsmToken peekToken(); 1401 AsmToken getToken() const; 1402 SMLoc getLoc() const; 1403 void lex(); 1404 1405 public: 1406 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1407 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1408 1409 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1410 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1411 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1412 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1413 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1414 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1415 1416 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1417 const unsigned MinVal, 1418 const unsigned MaxVal, 1419 const StringRef ErrMsg); 1420 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1421 bool parseSwizzleOffset(int64_t &Imm); 1422 bool parseSwizzleMacro(int64_t &Imm); 1423 bool parseSwizzleQuadPerm(int64_t &Imm); 1424 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1425 bool parseSwizzleBroadcast(int64_t &Imm); 1426 bool parseSwizzleSwap(int64_t &Imm); 1427 bool parseSwizzleReverse(int64_t &Imm); 1428 1429 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1430 int64_t parseGPRIdxMacro(); 1431 1432 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1433 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1434 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1435 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1436 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1437 1438 AMDGPUOperand::Ptr defaultDLC() const; 1439 AMDGPUOperand::Ptr defaultGLC() const; 1440 AMDGPUOperand::Ptr defaultSLC() const; 1441 1442 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1443 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1444 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1445 AMDGPUOperand::Ptr defaultFlatOffset() const; 1446 1447 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1448 1449 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1450 OptionalImmIndexMap &OptionalIdx); 1451 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1452 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1453 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1454 1455 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1456 1457 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1458 bool IsAtomic = false); 1459 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1460 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1461 1462 OperandMatchResultTy parseDim(OperandVector &Operands); 1463 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1464 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1465 AMDGPUOperand::Ptr defaultRowMask() const; 1466 AMDGPUOperand::Ptr defaultBankMask() const; 1467 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1468 AMDGPUOperand::Ptr defaultFI() const; 1469 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1470 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1471 1472 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1473 AMDGPUOperand::ImmTy Type); 1474 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1475 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1476 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1477 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1478 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1479 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1480 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1481 uint64_t BasicInstType, 1482 bool SkipDstVcc = false, 1483 bool SkipSrcVcc = false); 1484 1485 AMDGPUOperand::Ptr defaultBLGP() const; 1486 AMDGPUOperand::Ptr defaultCBSZ() const; 1487 AMDGPUOperand::Ptr defaultABID() const; 1488 1489 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1490 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1491 }; 1492 1493 struct OptionalOperand { 1494 const char *Name; 1495 AMDGPUOperand::ImmTy Type; 1496 bool IsBit; 1497 bool (*ConvertResult)(int64_t&); 1498 }; 1499 1500 } // end anonymous namespace 1501 1502 // May be called with integer type with equivalent bitwidth. 1503 static const fltSemantics *getFltSemantics(unsigned Size) { 1504 switch (Size) { 1505 case 4: 1506 return &APFloat::IEEEsingle(); 1507 case 8: 1508 return &APFloat::IEEEdouble(); 1509 case 2: 1510 return &APFloat::IEEEhalf(); 1511 default: 1512 llvm_unreachable("unsupported fp type"); 1513 } 1514 } 1515 1516 static const fltSemantics *getFltSemantics(MVT VT) { 1517 return getFltSemantics(VT.getSizeInBits() / 8); 1518 } 1519 1520 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1521 switch (OperandType) { 1522 case AMDGPU::OPERAND_REG_IMM_INT32: 1523 case AMDGPU::OPERAND_REG_IMM_FP32: 1524 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1525 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1527 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1528 return &APFloat::IEEEsingle(); 1529 case AMDGPU::OPERAND_REG_IMM_INT64: 1530 case AMDGPU::OPERAND_REG_IMM_FP64: 1531 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1532 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1533 return &APFloat::IEEEdouble(); 1534 case AMDGPU::OPERAND_REG_IMM_INT16: 1535 case AMDGPU::OPERAND_REG_IMM_FP16: 1536 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1537 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1538 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1539 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1540 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1541 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1542 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1543 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1544 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1545 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1546 return &APFloat::IEEEhalf(); 1547 default: 1548 llvm_unreachable("unsupported fp type"); 1549 } 1550 } 1551 1552 //===----------------------------------------------------------------------===// 1553 // Operand 1554 //===----------------------------------------------------------------------===// 1555 1556 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1557 bool Lost; 1558 1559 // Convert literal to single precision 1560 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1561 APFloat::rmNearestTiesToEven, 1562 &Lost); 1563 // We allow precision lost but not overflow or underflow 1564 if (Status != APFloat::opOK && 1565 Lost && 1566 ((Status & APFloat::opOverflow) != 0 || 1567 (Status & APFloat::opUnderflow) != 0)) { 1568 return false; 1569 } 1570 1571 return true; 1572 } 1573 1574 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1575 return isUIntN(Size, Val) || isIntN(Size, Val); 1576 } 1577 1578 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1579 if (VT.getScalarType() == MVT::i16) { 1580 // FP immediate values are broken. 1581 return isInlinableIntLiteral(Val); 1582 } 1583 1584 // f16/v2f16 operands work correctly for all values. 1585 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1586 } 1587 1588 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1589 1590 // This is a hack to enable named inline values like 1591 // shared_base with both 32-bit and 64-bit operands. 1592 // Note that these values are defined as 1593 // 32-bit operands only. 1594 if (isInlineValue()) { 1595 return true; 1596 } 1597 1598 if (!isImmTy(ImmTyNone)) { 1599 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1600 return false; 1601 } 1602 // TODO: We should avoid using host float here. It would be better to 1603 // check the float bit values which is what a few other places do. 1604 // We've had bot failures before due to weird NaN support on mips hosts. 1605 1606 APInt Literal(64, Imm.Val); 1607 1608 if (Imm.IsFPImm) { // We got fp literal token 1609 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1610 return AMDGPU::isInlinableLiteral64(Imm.Val, 1611 AsmParser->hasInv2PiInlineImm()); 1612 } 1613 1614 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1615 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1616 return false; 1617 1618 if (type.getScalarSizeInBits() == 16) { 1619 return isInlineableLiteralOp16( 1620 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1621 type, AsmParser->hasInv2PiInlineImm()); 1622 } 1623 1624 // Check if single precision literal is inlinable 1625 return AMDGPU::isInlinableLiteral32( 1626 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1627 AsmParser->hasInv2PiInlineImm()); 1628 } 1629 1630 // We got int literal token. 1631 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1632 return AMDGPU::isInlinableLiteral64(Imm.Val, 1633 AsmParser->hasInv2PiInlineImm()); 1634 } 1635 1636 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1637 return false; 1638 } 1639 1640 if (type.getScalarSizeInBits() == 16) { 1641 return isInlineableLiteralOp16( 1642 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1643 type, AsmParser->hasInv2PiInlineImm()); 1644 } 1645 1646 return AMDGPU::isInlinableLiteral32( 1647 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1648 AsmParser->hasInv2PiInlineImm()); 1649 } 1650 1651 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1652 // Check that this immediate can be added as literal 1653 if (!isImmTy(ImmTyNone)) { 1654 return false; 1655 } 1656 1657 if (!Imm.IsFPImm) { 1658 // We got int literal token. 1659 1660 if (type == MVT::f64 && hasFPModifiers()) { 1661 // Cannot apply fp modifiers to int literals preserving the same semantics 1662 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1663 // disable these cases. 1664 return false; 1665 } 1666 1667 unsigned Size = type.getSizeInBits(); 1668 if (Size == 64) 1669 Size = 32; 1670 1671 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1672 // types. 1673 return isSafeTruncation(Imm.Val, Size); 1674 } 1675 1676 // We got fp literal token 1677 if (type == MVT::f64) { // Expected 64-bit fp operand 1678 // We would set low 64-bits of literal to zeroes but we accept this literals 1679 return true; 1680 } 1681 1682 if (type == MVT::i64) { // Expected 64-bit int operand 1683 // We don't allow fp literals in 64-bit integer instructions. It is 1684 // unclear how we should encode them. 1685 return false; 1686 } 1687 1688 // We allow fp literals with f16x2 operands assuming that the specified 1689 // literal goes into the lower half and the upper half is zero. We also 1690 // require that the literal may be losslesly converted to f16. 1691 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1692 (type == MVT::v2i16)? MVT::i16 : type; 1693 1694 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1695 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1696 } 1697 1698 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1699 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1700 } 1701 1702 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1703 if (AsmParser->isVI()) 1704 return isVReg32(); 1705 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1706 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1707 else 1708 return false; 1709 } 1710 1711 bool AMDGPUOperand::isSDWAFP16Operand() const { 1712 return isSDWAOperand(MVT::f16); 1713 } 1714 1715 bool AMDGPUOperand::isSDWAFP32Operand() const { 1716 return isSDWAOperand(MVT::f32); 1717 } 1718 1719 bool AMDGPUOperand::isSDWAInt16Operand() const { 1720 return isSDWAOperand(MVT::i16); 1721 } 1722 1723 bool AMDGPUOperand::isSDWAInt32Operand() const { 1724 return isSDWAOperand(MVT::i32); 1725 } 1726 1727 bool AMDGPUOperand::isBoolReg() const { 1728 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1729 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1730 } 1731 1732 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1733 { 1734 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1735 assert(Size == 2 || Size == 4 || Size == 8); 1736 1737 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1738 1739 if (Imm.Mods.Abs) { 1740 Val &= ~FpSignMask; 1741 } 1742 if (Imm.Mods.Neg) { 1743 Val ^= FpSignMask; 1744 } 1745 1746 return Val; 1747 } 1748 1749 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1750 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1751 Inst.getNumOperands())) { 1752 addLiteralImmOperand(Inst, Imm.Val, 1753 ApplyModifiers & 1754 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1755 } else { 1756 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1757 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1758 } 1759 } 1760 1761 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1762 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1763 auto OpNum = Inst.getNumOperands(); 1764 // Check that this operand accepts literals 1765 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1766 1767 if (ApplyModifiers) { 1768 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1769 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1770 Val = applyInputFPModifiers(Val, Size); 1771 } 1772 1773 APInt Literal(64, Val); 1774 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1775 1776 if (Imm.IsFPImm) { // We got fp literal token 1777 switch (OpTy) { 1778 case AMDGPU::OPERAND_REG_IMM_INT64: 1779 case AMDGPU::OPERAND_REG_IMM_FP64: 1780 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1781 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1782 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1783 AsmParser->hasInv2PiInlineImm())) { 1784 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1785 return; 1786 } 1787 1788 // Non-inlineable 1789 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1790 // For fp operands we check if low 32 bits are zeros 1791 if (Literal.getLoBits(32) != 0) { 1792 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1793 "Can't encode literal as exact 64-bit floating-point operand. " 1794 "Low 32-bits will be set to zero"); 1795 } 1796 1797 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1798 return; 1799 } 1800 1801 // We don't allow fp literals in 64-bit integer instructions. It is 1802 // unclear how we should encode them. This case should be checked earlier 1803 // in predicate methods (isLiteralImm()) 1804 llvm_unreachable("fp literal in 64-bit integer instruction."); 1805 1806 case AMDGPU::OPERAND_REG_IMM_INT32: 1807 case AMDGPU::OPERAND_REG_IMM_FP32: 1808 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1809 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1811 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1812 case AMDGPU::OPERAND_REG_IMM_INT16: 1813 case AMDGPU::OPERAND_REG_IMM_FP16: 1814 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1816 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1817 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1822 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1823 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1824 bool lost; 1825 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1826 // Convert literal to single precision 1827 FPLiteral.convert(*getOpFltSemantics(OpTy), 1828 APFloat::rmNearestTiesToEven, &lost); 1829 // We allow precision lost but not overflow or underflow. This should be 1830 // checked earlier in isLiteralImm() 1831 1832 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1833 Inst.addOperand(MCOperand::createImm(ImmVal)); 1834 return; 1835 } 1836 default: 1837 llvm_unreachable("invalid operand size"); 1838 } 1839 1840 return; 1841 } 1842 1843 // We got int literal token. 1844 // Only sign extend inline immediates. 1845 switch (OpTy) { 1846 case AMDGPU::OPERAND_REG_IMM_INT32: 1847 case AMDGPU::OPERAND_REG_IMM_FP32: 1848 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1849 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1852 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1853 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1854 if (isSafeTruncation(Val, 32) && 1855 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1856 AsmParser->hasInv2PiInlineImm())) { 1857 Inst.addOperand(MCOperand::createImm(Val)); 1858 return; 1859 } 1860 1861 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1862 return; 1863 1864 case AMDGPU::OPERAND_REG_IMM_INT64: 1865 case AMDGPU::OPERAND_REG_IMM_FP64: 1866 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1867 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1868 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1869 Inst.addOperand(MCOperand::createImm(Val)); 1870 return; 1871 } 1872 1873 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1874 return; 1875 1876 case AMDGPU::OPERAND_REG_IMM_INT16: 1877 case AMDGPU::OPERAND_REG_IMM_FP16: 1878 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1879 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1882 if (isSafeTruncation(Val, 16) && 1883 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1884 AsmParser->hasInv2PiInlineImm())) { 1885 Inst.addOperand(MCOperand::createImm(Val)); 1886 return; 1887 } 1888 1889 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1890 return; 1891 1892 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1893 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1894 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1895 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1896 assert(isSafeTruncation(Val, 16)); 1897 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1898 AsmParser->hasInv2PiInlineImm())); 1899 1900 Inst.addOperand(MCOperand::createImm(Val)); 1901 return; 1902 } 1903 default: 1904 llvm_unreachable("invalid operand size"); 1905 } 1906 } 1907 1908 template <unsigned Bitwidth> 1909 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1910 APInt Literal(64, Imm.Val); 1911 1912 if (!Imm.IsFPImm) { 1913 // We got int literal token. 1914 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1915 return; 1916 } 1917 1918 bool Lost; 1919 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1920 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1921 APFloat::rmNearestTiesToEven, &Lost); 1922 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1923 } 1924 1925 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1926 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1927 } 1928 1929 static bool isInlineValue(unsigned Reg) { 1930 switch (Reg) { 1931 case AMDGPU::SRC_SHARED_BASE: 1932 case AMDGPU::SRC_SHARED_LIMIT: 1933 case AMDGPU::SRC_PRIVATE_BASE: 1934 case AMDGPU::SRC_PRIVATE_LIMIT: 1935 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1936 return true; 1937 case AMDGPU::SRC_VCCZ: 1938 case AMDGPU::SRC_EXECZ: 1939 case AMDGPU::SRC_SCC: 1940 return true; 1941 case AMDGPU::SGPR_NULL: 1942 return true; 1943 default: 1944 return false; 1945 } 1946 } 1947 1948 bool AMDGPUOperand::isInlineValue() const { 1949 return isRegKind() && ::isInlineValue(getReg()); 1950 } 1951 1952 //===----------------------------------------------------------------------===// 1953 // AsmParser 1954 //===----------------------------------------------------------------------===// 1955 1956 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1957 if (Is == IS_VGPR) { 1958 switch (RegWidth) { 1959 default: return -1; 1960 case 1: return AMDGPU::VGPR_32RegClassID; 1961 case 2: return AMDGPU::VReg_64RegClassID; 1962 case 3: return AMDGPU::VReg_96RegClassID; 1963 case 4: return AMDGPU::VReg_128RegClassID; 1964 case 5: return AMDGPU::VReg_160RegClassID; 1965 case 6: return AMDGPU::VReg_192RegClassID; 1966 case 8: return AMDGPU::VReg_256RegClassID; 1967 case 16: return AMDGPU::VReg_512RegClassID; 1968 case 32: return AMDGPU::VReg_1024RegClassID; 1969 } 1970 } else if (Is == IS_TTMP) { 1971 switch (RegWidth) { 1972 default: return -1; 1973 case 1: return AMDGPU::TTMP_32RegClassID; 1974 case 2: return AMDGPU::TTMP_64RegClassID; 1975 case 4: return AMDGPU::TTMP_128RegClassID; 1976 case 8: return AMDGPU::TTMP_256RegClassID; 1977 case 16: return AMDGPU::TTMP_512RegClassID; 1978 } 1979 } else if (Is == IS_SGPR) { 1980 switch (RegWidth) { 1981 default: return -1; 1982 case 1: return AMDGPU::SGPR_32RegClassID; 1983 case 2: return AMDGPU::SGPR_64RegClassID; 1984 case 3: return AMDGPU::SGPR_96RegClassID; 1985 case 4: return AMDGPU::SGPR_128RegClassID; 1986 case 5: return AMDGPU::SGPR_160RegClassID; 1987 case 6: return AMDGPU::SGPR_192RegClassID; 1988 case 8: return AMDGPU::SGPR_256RegClassID; 1989 case 16: return AMDGPU::SGPR_512RegClassID; 1990 } 1991 } else if (Is == IS_AGPR) { 1992 switch (RegWidth) { 1993 default: return -1; 1994 case 1: return AMDGPU::AGPR_32RegClassID; 1995 case 2: return AMDGPU::AReg_64RegClassID; 1996 case 3: return AMDGPU::AReg_96RegClassID; 1997 case 4: return AMDGPU::AReg_128RegClassID; 1998 case 5: return AMDGPU::AReg_160RegClassID; 1999 case 6: return AMDGPU::AReg_192RegClassID; 2000 case 8: return AMDGPU::AReg_256RegClassID; 2001 case 16: return AMDGPU::AReg_512RegClassID; 2002 case 32: return AMDGPU::AReg_1024RegClassID; 2003 } 2004 } 2005 return -1; 2006 } 2007 2008 static unsigned getSpecialRegForName(StringRef RegName) { 2009 return StringSwitch<unsigned>(RegName) 2010 .Case("exec", AMDGPU::EXEC) 2011 .Case("vcc", AMDGPU::VCC) 2012 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2013 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2014 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2015 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2016 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2017 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2018 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2019 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2020 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2021 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2022 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2023 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2024 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2025 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2026 .Case("m0", AMDGPU::M0) 2027 .Case("vccz", AMDGPU::SRC_VCCZ) 2028 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2029 .Case("execz", AMDGPU::SRC_EXECZ) 2030 .Case("src_execz", AMDGPU::SRC_EXECZ) 2031 .Case("scc", AMDGPU::SRC_SCC) 2032 .Case("src_scc", AMDGPU::SRC_SCC) 2033 .Case("tba", AMDGPU::TBA) 2034 .Case("tma", AMDGPU::TMA) 2035 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2036 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2037 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2038 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2039 .Case("vcc_lo", AMDGPU::VCC_LO) 2040 .Case("vcc_hi", AMDGPU::VCC_HI) 2041 .Case("exec_lo", AMDGPU::EXEC_LO) 2042 .Case("exec_hi", AMDGPU::EXEC_HI) 2043 .Case("tma_lo", AMDGPU::TMA_LO) 2044 .Case("tma_hi", AMDGPU::TMA_HI) 2045 .Case("tba_lo", AMDGPU::TBA_LO) 2046 .Case("tba_hi", AMDGPU::TBA_HI) 2047 .Case("pc", AMDGPU::PC_REG) 2048 .Case("null", AMDGPU::SGPR_NULL) 2049 .Default(AMDGPU::NoRegister); 2050 } 2051 2052 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2053 SMLoc &EndLoc, bool RestoreOnFailure) { 2054 auto R = parseRegister(); 2055 if (!R) return true; 2056 assert(R->isReg()); 2057 RegNo = R->getReg(); 2058 StartLoc = R->getStartLoc(); 2059 EndLoc = R->getEndLoc(); 2060 return false; 2061 } 2062 2063 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2064 SMLoc &EndLoc) { 2065 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2066 } 2067 2068 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2069 SMLoc &StartLoc, 2070 SMLoc &EndLoc) { 2071 bool Result = 2072 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2073 bool PendingErrors = getParser().hasPendingError(); 2074 getParser().clearPendingErrors(); 2075 if (PendingErrors) 2076 return MatchOperand_ParseFail; 2077 if (Result) 2078 return MatchOperand_NoMatch; 2079 return MatchOperand_Success; 2080 } 2081 2082 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2083 RegisterKind RegKind, unsigned Reg1, 2084 SMLoc Loc) { 2085 switch (RegKind) { 2086 case IS_SPECIAL: 2087 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2088 Reg = AMDGPU::EXEC; 2089 RegWidth = 2; 2090 return true; 2091 } 2092 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2093 Reg = AMDGPU::FLAT_SCR; 2094 RegWidth = 2; 2095 return true; 2096 } 2097 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2098 Reg = AMDGPU::XNACK_MASK; 2099 RegWidth = 2; 2100 return true; 2101 } 2102 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2103 Reg = AMDGPU::VCC; 2104 RegWidth = 2; 2105 return true; 2106 } 2107 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2108 Reg = AMDGPU::TBA; 2109 RegWidth = 2; 2110 return true; 2111 } 2112 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2113 Reg = AMDGPU::TMA; 2114 RegWidth = 2; 2115 return true; 2116 } 2117 Error(Loc, "register does not fit in the list"); 2118 return false; 2119 case IS_VGPR: 2120 case IS_SGPR: 2121 case IS_AGPR: 2122 case IS_TTMP: 2123 if (Reg1 != Reg + RegWidth) { 2124 Error(Loc, "registers in a list must have consecutive indices"); 2125 return false; 2126 } 2127 RegWidth++; 2128 return true; 2129 default: 2130 llvm_unreachable("unexpected register kind"); 2131 } 2132 } 2133 2134 struct RegInfo { 2135 StringLiteral Name; 2136 RegisterKind Kind; 2137 }; 2138 2139 static constexpr RegInfo RegularRegisters[] = { 2140 {{"v"}, IS_VGPR}, 2141 {{"s"}, IS_SGPR}, 2142 {{"ttmp"}, IS_TTMP}, 2143 {{"acc"}, IS_AGPR}, 2144 {{"a"}, IS_AGPR}, 2145 }; 2146 2147 static bool isRegularReg(RegisterKind Kind) { 2148 return Kind == IS_VGPR || 2149 Kind == IS_SGPR || 2150 Kind == IS_TTMP || 2151 Kind == IS_AGPR; 2152 } 2153 2154 static const RegInfo* getRegularRegInfo(StringRef Str) { 2155 for (const RegInfo &Reg : RegularRegisters) 2156 if (Str.startswith(Reg.Name)) 2157 return &Reg; 2158 return nullptr; 2159 } 2160 2161 static bool getRegNum(StringRef Str, unsigned& Num) { 2162 return !Str.getAsInteger(10, Num); 2163 } 2164 2165 bool 2166 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2167 const AsmToken &NextToken) const { 2168 2169 // A list of consecutive registers: [s0,s1,s2,s3] 2170 if (Token.is(AsmToken::LBrac)) 2171 return true; 2172 2173 if (!Token.is(AsmToken::Identifier)) 2174 return false; 2175 2176 // A single register like s0 or a range of registers like s[0:1] 2177 2178 StringRef Str = Token.getString(); 2179 const RegInfo *Reg = getRegularRegInfo(Str); 2180 if (Reg) { 2181 StringRef RegName = Reg->Name; 2182 StringRef RegSuffix = Str.substr(RegName.size()); 2183 if (!RegSuffix.empty()) { 2184 unsigned Num; 2185 // A single register with an index: rXX 2186 if (getRegNum(RegSuffix, Num)) 2187 return true; 2188 } else { 2189 // A range of registers: r[XX:YY]. 2190 if (NextToken.is(AsmToken::LBrac)) 2191 return true; 2192 } 2193 } 2194 2195 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2196 } 2197 2198 bool 2199 AMDGPUAsmParser::isRegister() 2200 { 2201 return isRegister(getToken(), peekToken()); 2202 } 2203 2204 unsigned 2205 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2206 unsigned RegNum, 2207 unsigned RegWidth, 2208 SMLoc Loc) { 2209 2210 assert(isRegularReg(RegKind)); 2211 2212 unsigned AlignSize = 1; 2213 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2214 // SGPR and TTMP registers must be aligned. 2215 // Max required alignment is 4 dwords. 2216 AlignSize = std::min(RegWidth, 4u); 2217 } 2218 2219 if (RegNum % AlignSize != 0) { 2220 Error(Loc, "invalid register alignment"); 2221 return AMDGPU::NoRegister; 2222 } 2223 2224 unsigned RegIdx = RegNum / AlignSize; 2225 int RCID = getRegClass(RegKind, RegWidth); 2226 if (RCID == -1) { 2227 Error(Loc, "invalid or unsupported register size"); 2228 return AMDGPU::NoRegister; 2229 } 2230 2231 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2232 const MCRegisterClass RC = TRI->getRegClass(RCID); 2233 if (RegIdx >= RC.getNumRegs()) { 2234 Error(Loc, "register index is out of range"); 2235 return AMDGPU::NoRegister; 2236 } 2237 2238 return RC.getRegister(RegIdx); 2239 } 2240 2241 bool 2242 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2243 int64_t RegLo, RegHi; 2244 if (!skipToken(AsmToken::LBrac, "missing register index")) 2245 return false; 2246 2247 SMLoc FirstIdxLoc = getLoc(); 2248 SMLoc SecondIdxLoc; 2249 2250 if (!parseExpr(RegLo)) 2251 return false; 2252 2253 if (trySkipToken(AsmToken::Colon)) { 2254 SecondIdxLoc = getLoc(); 2255 if (!parseExpr(RegHi)) 2256 return false; 2257 } else { 2258 RegHi = RegLo; 2259 } 2260 2261 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2262 return false; 2263 2264 if (!isUInt<32>(RegLo)) { 2265 Error(FirstIdxLoc, "invalid register index"); 2266 return false; 2267 } 2268 2269 if (!isUInt<32>(RegHi)) { 2270 Error(SecondIdxLoc, "invalid register index"); 2271 return false; 2272 } 2273 2274 if (RegLo > RegHi) { 2275 Error(FirstIdxLoc, "first register index should not exceed second index"); 2276 return false; 2277 } 2278 2279 Num = static_cast<unsigned>(RegLo); 2280 Width = (RegHi - RegLo) + 1; 2281 return true; 2282 } 2283 2284 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2285 unsigned &RegNum, unsigned &RegWidth, 2286 SmallVectorImpl<AsmToken> &Tokens) { 2287 assert(isToken(AsmToken::Identifier)); 2288 unsigned Reg = getSpecialRegForName(getTokenStr()); 2289 if (Reg) { 2290 RegNum = 0; 2291 RegWidth = 1; 2292 RegKind = IS_SPECIAL; 2293 Tokens.push_back(getToken()); 2294 lex(); // skip register name 2295 } 2296 return Reg; 2297 } 2298 2299 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2300 unsigned &RegNum, unsigned &RegWidth, 2301 SmallVectorImpl<AsmToken> &Tokens) { 2302 assert(isToken(AsmToken::Identifier)); 2303 StringRef RegName = getTokenStr(); 2304 auto Loc = getLoc(); 2305 2306 const RegInfo *RI = getRegularRegInfo(RegName); 2307 if (!RI) { 2308 Error(Loc, "invalid register name"); 2309 return AMDGPU::NoRegister; 2310 } 2311 2312 Tokens.push_back(getToken()); 2313 lex(); // skip register name 2314 2315 RegKind = RI->Kind; 2316 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2317 if (!RegSuffix.empty()) { 2318 // Single 32-bit register: vXX. 2319 if (!getRegNum(RegSuffix, RegNum)) { 2320 Error(Loc, "invalid register index"); 2321 return AMDGPU::NoRegister; 2322 } 2323 RegWidth = 1; 2324 } else { 2325 // Range of registers: v[XX:YY]. ":YY" is optional. 2326 if (!ParseRegRange(RegNum, RegWidth)) 2327 return AMDGPU::NoRegister; 2328 } 2329 2330 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2331 } 2332 2333 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2334 unsigned &RegWidth, 2335 SmallVectorImpl<AsmToken> &Tokens) { 2336 unsigned Reg = AMDGPU::NoRegister; 2337 auto ListLoc = getLoc(); 2338 2339 if (!skipToken(AsmToken::LBrac, 2340 "expected a register or a list of registers")) { 2341 return AMDGPU::NoRegister; 2342 } 2343 2344 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2345 2346 auto Loc = getLoc(); 2347 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2348 return AMDGPU::NoRegister; 2349 if (RegWidth != 1) { 2350 Error(Loc, "expected a single 32-bit register"); 2351 return AMDGPU::NoRegister; 2352 } 2353 2354 for (; trySkipToken(AsmToken::Comma); ) { 2355 RegisterKind NextRegKind; 2356 unsigned NextReg, NextRegNum, NextRegWidth; 2357 Loc = getLoc(); 2358 2359 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2360 NextRegNum, NextRegWidth, 2361 Tokens)) { 2362 return AMDGPU::NoRegister; 2363 } 2364 if (NextRegWidth != 1) { 2365 Error(Loc, "expected a single 32-bit register"); 2366 return AMDGPU::NoRegister; 2367 } 2368 if (NextRegKind != RegKind) { 2369 Error(Loc, "registers in a list must be of the same kind"); 2370 return AMDGPU::NoRegister; 2371 } 2372 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2373 return AMDGPU::NoRegister; 2374 } 2375 2376 if (!skipToken(AsmToken::RBrac, 2377 "expected a comma or a closing square bracket")) { 2378 return AMDGPU::NoRegister; 2379 } 2380 2381 if (isRegularReg(RegKind)) 2382 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2383 2384 return Reg; 2385 } 2386 2387 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2388 unsigned &RegNum, unsigned &RegWidth, 2389 SmallVectorImpl<AsmToken> &Tokens) { 2390 auto Loc = getLoc(); 2391 Reg = AMDGPU::NoRegister; 2392 2393 if (isToken(AsmToken::Identifier)) { 2394 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2395 if (Reg == AMDGPU::NoRegister) 2396 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2397 } else { 2398 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2399 } 2400 2401 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2402 if (Reg == AMDGPU::NoRegister) { 2403 assert(Parser.hasPendingError()); 2404 return false; 2405 } 2406 2407 if (!subtargetHasRegister(*TRI, Reg)) { 2408 if (Reg == AMDGPU::SGPR_NULL) { 2409 Error(Loc, "'null' operand is not supported on this GPU"); 2410 } else { 2411 Error(Loc, "register not available on this GPU"); 2412 } 2413 return false; 2414 } 2415 2416 return true; 2417 } 2418 2419 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2420 unsigned &RegNum, unsigned &RegWidth, 2421 bool RestoreOnFailure /*=false*/) { 2422 Reg = AMDGPU::NoRegister; 2423 2424 SmallVector<AsmToken, 1> Tokens; 2425 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2426 if (RestoreOnFailure) { 2427 while (!Tokens.empty()) { 2428 getLexer().UnLex(Tokens.pop_back_val()); 2429 } 2430 } 2431 return true; 2432 } 2433 return false; 2434 } 2435 2436 Optional<StringRef> 2437 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2438 switch (RegKind) { 2439 case IS_VGPR: 2440 return StringRef(".amdgcn.next_free_vgpr"); 2441 case IS_SGPR: 2442 return StringRef(".amdgcn.next_free_sgpr"); 2443 default: 2444 return None; 2445 } 2446 } 2447 2448 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2449 auto SymbolName = getGprCountSymbolName(RegKind); 2450 assert(SymbolName && "initializing invalid register kind"); 2451 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2452 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2453 } 2454 2455 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2456 unsigned DwordRegIndex, 2457 unsigned RegWidth) { 2458 // Symbols are only defined for GCN targets 2459 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2460 return true; 2461 2462 auto SymbolName = getGprCountSymbolName(RegKind); 2463 if (!SymbolName) 2464 return true; 2465 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2466 2467 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2468 int64_t OldCount; 2469 2470 if (!Sym->isVariable()) 2471 return !Error(getParser().getTok().getLoc(), 2472 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2473 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2474 return !Error( 2475 getParser().getTok().getLoc(), 2476 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2477 2478 if (OldCount <= NewMax) 2479 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2480 2481 return true; 2482 } 2483 2484 std::unique_ptr<AMDGPUOperand> 2485 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2486 const auto &Tok = Parser.getTok(); 2487 SMLoc StartLoc = Tok.getLoc(); 2488 SMLoc EndLoc = Tok.getEndLoc(); 2489 RegisterKind RegKind; 2490 unsigned Reg, RegNum, RegWidth; 2491 2492 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2493 return nullptr; 2494 } 2495 if (isHsaAbiVersion3(&getSTI())) { 2496 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2497 return nullptr; 2498 } else 2499 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2500 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2501 } 2502 2503 OperandMatchResultTy 2504 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2505 // TODO: add syntactic sugar for 1/(2*PI) 2506 2507 assert(!isRegister()); 2508 assert(!isModifier()); 2509 2510 const auto& Tok = getToken(); 2511 const auto& NextTok = peekToken(); 2512 bool IsReal = Tok.is(AsmToken::Real); 2513 SMLoc S = getLoc(); 2514 bool Negate = false; 2515 2516 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2517 lex(); 2518 IsReal = true; 2519 Negate = true; 2520 } 2521 2522 if (IsReal) { 2523 // Floating-point expressions are not supported. 2524 // Can only allow floating-point literals with an 2525 // optional sign. 2526 2527 StringRef Num = getTokenStr(); 2528 lex(); 2529 2530 APFloat RealVal(APFloat::IEEEdouble()); 2531 auto roundMode = APFloat::rmNearestTiesToEven; 2532 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2533 return MatchOperand_ParseFail; 2534 } 2535 if (Negate) 2536 RealVal.changeSign(); 2537 2538 Operands.push_back( 2539 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2540 AMDGPUOperand::ImmTyNone, true)); 2541 2542 return MatchOperand_Success; 2543 2544 } else { 2545 int64_t IntVal; 2546 const MCExpr *Expr; 2547 SMLoc S = getLoc(); 2548 2549 if (HasSP3AbsModifier) { 2550 // This is a workaround for handling expressions 2551 // as arguments of SP3 'abs' modifier, for example: 2552 // |1.0| 2553 // |-1| 2554 // |1+x| 2555 // This syntax is not compatible with syntax of standard 2556 // MC expressions (due to the trailing '|'). 2557 SMLoc EndLoc; 2558 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2559 return MatchOperand_ParseFail; 2560 } else { 2561 if (Parser.parseExpression(Expr)) 2562 return MatchOperand_ParseFail; 2563 } 2564 2565 if (Expr->evaluateAsAbsolute(IntVal)) { 2566 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2567 } else { 2568 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2569 } 2570 2571 return MatchOperand_Success; 2572 } 2573 2574 return MatchOperand_NoMatch; 2575 } 2576 2577 OperandMatchResultTy 2578 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2579 if (!isRegister()) 2580 return MatchOperand_NoMatch; 2581 2582 if (auto R = parseRegister()) { 2583 assert(R->isReg()); 2584 Operands.push_back(std::move(R)); 2585 return MatchOperand_Success; 2586 } 2587 return MatchOperand_ParseFail; 2588 } 2589 2590 OperandMatchResultTy 2591 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2592 auto res = parseReg(Operands); 2593 if (res != MatchOperand_NoMatch) { 2594 return res; 2595 } else if (isModifier()) { 2596 return MatchOperand_NoMatch; 2597 } else { 2598 return parseImm(Operands, HasSP3AbsMod); 2599 } 2600 } 2601 2602 bool 2603 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2604 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2605 const auto &str = Token.getString(); 2606 return str == "abs" || str == "neg" || str == "sext"; 2607 } 2608 return false; 2609 } 2610 2611 bool 2612 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2613 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2614 } 2615 2616 bool 2617 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2618 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2619 } 2620 2621 bool 2622 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2623 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2624 } 2625 2626 // Check if this is an operand modifier or an opcode modifier 2627 // which may look like an expression but it is not. We should 2628 // avoid parsing these modifiers as expressions. Currently 2629 // recognized sequences are: 2630 // |...| 2631 // abs(...) 2632 // neg(...) 2633 // sext(...) 2634 // -reg 2635 // -|...| 2636 // -abs(...) 2637 // name:... 2638 // Note that simple opcode modifiers like 'gds' may be parsed as 2639 // expressions; this is a special case. See getExpressionAsToken. 2640 // 2641 bool 2642 AMDGPUAsmParser::isModifier() { 2643 2644 AsmToken Tok = getToken(); 2645 AsmToken NextToken[2]; 2646 peekTokens(NextToken); 2647 2648 return isOperandModifier(Tok, NextToken[0]) || 2649 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2650 isOpcodeModifierWithVal(Tok, NextToken[0]); 2651 } 2652 2653 // Check if the current token is an SP3 'neg' modifier. 2654 // Currently this modifier is allowed in the following context: 2655 // 2656 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2657 // 2. Before an 'abs' modifier: -abs(...) 2658 // 3. Before an SP3 'abs' modifier: -|...| 2659 // 2660 // In all other cases "-" is handled as a part 2661 // of an expression that follows the sign. 2662 // 2663 // Note: When "-" is followed by an integer literal, 2664 // this is interpreted as integer negation rather 2665 // than a floating-point NEG modifier applied to N. 2666 // Beside being contr-intuitive, such use of floating-point 2667 // NEG modifier would have resulted in different meaning 2668 // of integer literals used with VOP1/2/C and VOP3, 2669 // for example: 2670 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2671 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2672 // Negative fp literals with preceding "-" are 2673 // handled likewise for unifomtity 2674 // 2675 bool 2676 AMDGPUAsmParser::parseSP3NegModifier() { 2677 2678 AsmToken NextToken[2]; 2679 peekTokens(NextToken); 2680 2681 if (isToken(AsmToken::Minus) && 2682 (isRegister(NextToken[0], NextToken[1]) || 2683 NextToken[0].is(AsmToken::Pipe) || 2684 isId(NextToken[0], "abs"))) { 2685 lex(); 2686 return true; 2687 } 2688 2689 return false; 2690 } 2691 2692 OperandMatchResultTy 2693 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2694 bool AllowImm) { 2695 bool Neg, SP3Neg; 2696 bool Abs, SP3Abs; 2697 SMLoc Loc; 2698 2699 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2700 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2701 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2702 return MatchOperand_ParseFail; 2703 } 2704 2705 SP3Neg = parseSP3NegModifier(); 2706 2707 Loc = getLoc(); 2708 Neg = trySkipId("neg"); 2709 if (Neg && SP3Neg) { 2710 Error(Loc, "expected register or immediate"); 2711 return MatchOperand_ParseFail; 2712 } 2713 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2714 return MatchOperand_ParseFail; 2715 2716 Abs = trySkipId("abs"); 2717 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2718 return MatchOperand_ParseFail; 2719 2720 Loc = getLoc(); 2721 SP3Abs = trySkipToken(AsmToken::Pipe); 2722 if (Abs && SP3Abs) { 2723 Error(Loc, "expected register or immediate"); 2724 return MatchOperand_ParseFail; 2725 } 2726 2727 OperandMatchResultTy Res; 2728 if (AllowImm) { 2729 Res = parseRegOrImm(Operands, SP3Abs); 2730 } else { 2731 Res = parseReg(Operands); 2732 } 2733 if (Res != MatchOperand_Success) { 2734 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2735 } 2736 2737 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2738 return MatchOperand_ParseFail; 2739 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2740 return MatchOperand_ParseFail; 2741 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2742 return MatchOperand_ParseFail; 2743 2744 AMDGPUOperand::Modifiers Mods; 2745 Mods.Abs = Abs || SP3Abs; 2746 Mods.Neg = Neg || SP3Neg; 2747 2748 if (Mods.hasFPModifiers()) { 2749 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2750 if (Op.isExpr()) { 2751 Error(Op.getStartLoc(), "expected an absolute expression"); 2752 return MatchOperand_ParseFail; 2753 } 2754 Op.setModifiers(Mods); 2755 } 2756 return MatchOperand_Success; 2757 } 2758 2759 OperandMatchResultTy 2760 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2761 bool AllowImm) { 2762 bool Sext = trySkipId("sext"); 2763 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2764 return MatchOperand_ParseFail; 2765 2766 OperandMatchResultTy Res; 2767 if (AllowImm) { 2768 Res = parseRegOrImm(Operands); 2769 } else { 2770 Res = parseReg(Operands); 2771 } 2772 if (Res != MatchOperand_Success) { 2773 return Sext? MatchOperand_ParseFail : Res; 2774 } 2775 2776 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2777 return MatchOperand_ParseFail; 2778 2779 AMDGPUOperand::Modifiers Mods; 2780 Mods.Sext = Sext; 2781 2782 if (Mods.hasIntModifiers()) { 2783 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2784 if (Op.isExpr()) { 2785 Error(Op.getStartLoc(), "expected an absolute expression"); 2786 return MatchOperand_ParseFail; 2787 } 2788 Op.setModifiers(Mods); 2789 } 2790 2791 return MatchOperand_Success; 2792 } 2793 2794 OperandMatchResultTy 2795 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2796 return parseRegOrImmWithFPInputMods(Operands, false); 2797 } 2798 2799 OperandMatchResultTy 2800 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2801 return parseRegOrImmWithIntInputMods(Operands, false); 2802 } 2803 2804 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2805 auto Loc = getLoc(); 2806 if (trySkipId("off")) { 2807 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2808 AMDGPUOperand::ImmTyOff, false)); 2809 return MatchOperand_Success; 2810 } 2811 2812 if (!isRegister()) 2813 return MatchOperand_NoMatch; 2814 2815 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2816 if (Reg) { 2817 Operands.push_back(std::move(Reg)); 2818 return MatchOperand_Success; 2819 } 2820 2821 return MatchOperand_ParseFail; 2822 2823 } 2824 2825 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2826 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2827 2828 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2829 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2830 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2831 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2832 return Match_InvalidOperand; 2833 2834 if ((TSFlags & SIInstrFlags::VOP3) && 2835 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2836 getForcedEncodingSize() != 64) 2837 return Match_PreferE32; 2838 2839 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2840 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2841 // v_mac_f32/16 allow only dst_sel == DWORD; 2842 auto OpNum = 2843 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2844 const auto &Op = Inst.getOperand(OpNum); 2845 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2846 return Match_InvalidOperand; 2847 } 2848 } 2849 2850 return Match_Success; 2851 } 2852 2853 static ArrayRef<unsigned> getAllVariants() { 2854 static const unsigned Variants[] = { 2855 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2856 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2857 }; 2858 2859 return makeArrayRef(Variants); 2860 } 2861 2862 // What asm variants we should check 2863 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2864 if (getForcedEncodingSize() == 32) { 2865 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2866 return makeArrayRef(Variants); 2867 } 2868 2869 if (isForcedVOP3()) { 2870 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2871 return makeArrayRef(Variants); 2872 } 2873 2874 if (isForcedSDWA()) { 2875 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2876 AMDGPUAsmVariants::SDWA9}; 2877 return makeArrayRef(Variants); 2878 } 2879 2880 if (isForcedDPP()) { 2881 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2882 return makeArrayRef(Variants); 2883 } 2884 2885 return getAllVariants(); 2886 } 2887 2888 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2889 if (getForcedEncodingSize() == 32) 2890 return "e32"; 2891 2892 if (isForcedVOP3()) 2893 return "e64"; 2894 2895 if (isForcedSDWA()) 2896 return "sdwa"; 2897 2898 if (isForcedDPP()) 2899 return "dpp"; 2900 2901 return ""; 2902 } 2903 2904 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2905 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2906 const unsigned Num = Desc.getNumImplicitUses(); 2907 for (unsigned i = 0; i < Num; ++i) { 2908 unsigned Reg = Desc.ImplicitUses[i]; 2909 switch (Reg) { 2910 case AMDGPU::FLAT_SCR: 2911 case AMDGPU::VCC: 2912 case AMDGPU::VCC_LO: 2913 case AMDGPU::VCC_HI: 2914 case AMDGPU::M0: 2915 return Reg; 2916 default: 2917 break; 2918 } 2919 } 2920 return AMDGPU::NoRegister; 2921 } 2922 2923 // NB: This code is correct only when used to check constant 2924 // bus limitations because GFX7 support no f16 inline constants. 2925 // Note that there are no cases when a GFX7 opcode violates 2926 // constant bus limitations due to the use of an f16 constant. 2927 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2928 unsigned OpIdx) const { 2929 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2930 2931 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2932 return false; 2933 } 2934 2935 const MCOperand &MO = Inst.getOperand(OpIdx); 2936 2937 int64_t Val = MO.getImm(); 2938 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2939 2940 switch (OpSize) { // expected operand size 2941 case 8: 2942 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2943 case 4: 2944 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2945 case 2: { 2946 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2947 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2948 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2949 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2950 return AMDGPU::isInlinableIntLiteral(Val); 2951 2952 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2953 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2954 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2955 return AMDGPU::isInlinableIntLiteralV216(Val); 2956 2957 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2958 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2959 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2960 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2961 2962 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2963 } 2964 default: 2965 llvm_unreachable("invalid operand size"); 2966 } 2967 } 2968 2969 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2970 if (!isGFX10()) 2971 return 1; 2972 2973 switch (Opcode) { 2974 // 64-bit shift instructions can use only one scalar value input 2975 case AMDGPU::V_LSHLREV_B64: 2976 case AMDGPU::V_LSHLREV_B64_gfx10: 2977 case AMDGPU::V_LSHL_B64: 2978 case AMDGPU::V_LSHRREV_B64: 2979 case AMDGPU::V_LSHRREV_B64_gfx10: 2980 case AMDGPU::V_LSHR_B64: 2981 case AMDGPU::V_ASHRREV_I64: 2982 case AMDGPU::V_ASHRREV_I64_gfx10: 2983 case AMDGPU::V_ASHR_I64: 2984 return 1; 2985 default: 2986 return 2; 2987 } 2988 } 2989 2990 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2991 const MCOperand &MO = Inst.getOperand(OpIdx); 2992 if (MO.isImm()) { 2993 return !isInlineConstant(Inst, OpIdx); 2994 } else if (MO.isReg()) { 2995 auto Reg = MO.getReg(); 2996 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2997 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2998 } else { 2999 return true; 3000 } 3001 } 3002 3003 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 3004 const unsigned Opcode = Inst.getOpcode(); 3005 const MCInstrDesc &Desc = MII.get(Opcode); 3006 unsigned ConstantBusUseCount = 0; 3007 unsigned NumLiterals = 0; 3008 unsigned LiteralSize; 3009 3010 if (Desc.TSFlags & 3011 (SIInstrFlags::VOPC | 3012 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3013 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3014 SIInstrFlags::SDWA)) { 3015 // Check special imm operands (used by madmk, etc) 3016 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3017 ++ConstantBusUseCount; 3018 } 3019 3020 SmallDenseSet<unsigned> SGPRsUsed; 3021 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3022 if (SGPRUsed != AMDGPU::NoRegister) { 3023 SGPRsUsed.insert(SGPRUsed); 3024 ++ConstantBusUseCount; 3025 } 3026 3027 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3028 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3029 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3030 3031 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3032 3033 for (int OpIdx : OpIndices) { 3034 if (OpIdx == -1) break; 3035 3036 const MCOperand &MO = Inst.getOperand(OpIdx); 3037 if (usesConstantBus(Inst, OpIdx)) { 3038 if (MO.isReg()) { 3039 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3040 // Pairs of registers with a partial intersections like these 3041 // s0, s[0:1] 3042 // flat_scratch_lo, flat_scratch 3043 // flat_scratch_lo, flat_scratch_hi 3044 // are theoretically valid but they are disabled anyway. 3045 // Note that this code mimics SIInstrInfo::verifyInstruction 3046 if (!SGPRsUsed.count(Reg)) { 3047 SGPRsUsed.insert(Reg); 3048 ++ConstantBusUseCount; 3049 } 3050 } else { // Expression or a literal 3051 3052 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3053 continue; // special operand like VINTERP attr_chan 3054 3055 // An instruction may use only one literal. 3056 // This has been validated on the previous step. 3057 // See validateVOP3Literal. 3058 // This literal may be used as more than one operand. 3059 // If all these operands are of the same size, 3060 // this literal counts as one scalar value. 3061 // Otherwise it counts as 2 scalar values. 3062 // See "GFX10 Shader Programming", section 3.6.2.3. 3063 3064 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3065 if (Size < 4) Size = 4; 3066 3067 if (NumLiterals == 0) { 3068 NumLiterals = 1; 3069 LiteralSize = Size; 3070 } else if (LiteralSize != Size) { 3071 NumLiterals = 2; 3072 } 3073 } 3074 } 3075 } 3076 } 3077 ConstantBusUseCount += NumLiterals; 3078 3079 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3080 } 3081 3082 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3083 const unsigned Opcode = Inst.getOpcode(); 3084 const MCInstrDesc &Desc = MII.get(Opcode); 3085 3086 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3087 if (DstIdx == -1 || 3088 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3089 return true; 3090 } 3091 3092 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3093 3094 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3095 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3096 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3097 3098 assert(DstIdx != -1); 3099 const MCOperand &Dst = Inst.getOperand(DstIdx); 3100 assert(Dst.isReg()); 3101 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3102 3103 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3104 3105 for (int SrcIdx : SrcIndices) { 3106 if (SrcIdx == -1) break; 3107 const MCOperand &Src = Inst.getOperand(SrcIdx); 3108 if (Src.isReg()) { 3109 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3110 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3111 return false; 3112 } 3113 } 3114 } 3115 3116 return true; 3117 } 3118 3119 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3120 3121 const unsigned Opc = Inst.getOpcode(); 3122 const MCInstrDesc &Desc = MII.get(Opc); 3123 3124 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3125 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3126 assert(ClampIdx != -1); 3127 return Inst.getOperand(ClampIdx).getImm() == 0; 3128 } 3129 3130 return true; 3131 } 3132 3133 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3134 3135 const unsigned Opc = Inst.getOpcode(); 3136 const MCInstrDesc &Desc = MII.get(Opc); 3137 3138 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3139 return true; 3140 3141 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3142 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3143 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3144 3145 assert(VDataIdx != -1); 3146 3147 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3148 return true; 3149 3150 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3151 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3152 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3153 if (DMask == 0) 3154 DMask = 1; 3155 3156 unsigned DataSize = 3157 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3158 if (hasPackedD16()) { 3159 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3160 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3161 DataSize = (DataSize + 1) / 2; 3162 } 3163 3164 return (VDataSize / 4) == DataSize + TFESize; 3165 } 3166 3167 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3168 const unsigned Opc = Inst.getOpcode(); 3169 const MCInstrDesc &Desc = MII.get(Opc); 3170 3171 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3172 return true; 3173 3174 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3175 3176 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3177 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3178 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3179 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3180 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3181 3182 assert(VAddr0Idx != -1); 3183 assert(SrsrcIdx != -1); 3184 assert(SrsrcIdx > VAddr0Idx); 3185 3186 if (DimIdx == -1) 3187 return true; // intersect_ray 3188 3189 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3190 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3191 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3192 unsigned VAddrSize = 3193 IsNSA ? SrsrcIdx - VAddr0Idx 3194 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3195 3196 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3197 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3198 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3199 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3200 if (!IsNSA) { 3201 if (AddrSize > 8) 3202 AddrSize = 16; 3203 else if (AddrSize > 4) 3204 AddrSize = 8; 3205 } 3206 3207 return VAddrSize == AddrSize; 3208 } 3209 3210 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3211 3212 const unsigned Opc = Inst.getOpcode(); 3213 const MCInstrDesc &Desc = MII.get(Opc); 3214 3215 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3216 return true; 3217 if (!Desc.mayLoad() || !Desc.mayStore()) 3218 return true; // Not atomic 3219 3220 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3221 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3222 3223 // This is an incomplete check because image_atomic_cmpswap 3224 // may only use 0x3 and 0xf while other atomic operations 3225 // may use 0x1 and 0x3. However these limitations are 3226 // verified when we check that dmask matches dst size. 3227 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3228 } 3229 3230 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3231 3232 const unsigned Opc = Inst.getOpcode(); 3233 const MCInstrDesc &Desc = MII.get(Opc); 3234 3235 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3236 return true; 3237 3238 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3239 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3240 3241 // GATHER4 instructions use dmask in a different fashion compared to 3242 // other MIMG instructions. The only useful DMASK values are 3243 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3244 // (red,red,red,red) etc.) The ISA document doesn't mention 3245 // this. 3246 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3247 } 3248 3249 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3250 { 3251 switch (Opcode) { 3252 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3253 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3254 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3255 return true; 3256 default: 3257 return false; 3258 } 3259 } 3260 3261 // movrels* opcodes should only allow VGPRS as src0. 3262 // This is specified in .td description for vop1/vop3, 3263 // but sdwa is handled differently. See isSDWAOperand. 3264 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3265 3266 const unsigned Opc = Inst.getOpcode(); 3267 const MCInstrDesc &Desc = MII.get(Opc); 3268 3269 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3270 return true; 3271 3272 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3273 assert(Src0Idx != -1); 3274 3275 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3276 if (!Src0.isReg()) 3277 return false; 3278 3279 auto Reg = Src0.getReg(); 3280 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3281 return !isSGPR(mc2PseudoReg(Reg), TRI); 3282 } 3283 3284 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3285 3286 const unsigned Opc = Inst.getOpcode(); 3287 3288 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3289 return true; 3290 3291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3292 assert(Src0Idx != -1); 3293 3294 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3295 if (!Src0.isReg()) 3296 return true; 3297 3298 auto Reg = Src0.getReg(); 3299 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3300 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3301 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3302 return false; 3303 } 3304 3305 return true; 3306 } 3307 3308 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3309 switch (Inst.getOpcode()) { 3310 default: 3311 return true; 3312 case V_DIV_SCALE_F32_gfx6_gfx7: 3313 case V_DIV_SCALE_F32_vi: 3314 case V_DIV_SCALE_F32_gfx10: 3315 case V_DIV_SCALE_F64_gfx6_gfx7: 3316 case V_DIV_SCALE_F64_vi: 3317 case V_DIV_SCALE_F64_gfx10: 3318 break; 3319 } 3320 3321 // TODO: Check that src0 = src1 or src2. 3322 3323 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3324 AMDGPU::OpName::src2_modifiers, 3325 AMDGPU::OpName::src2_modifiers}) { 3326 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3327 .getImm() & 3328 SISrcMods::ABS) { 3329 Error(getLoc(), "ABS not allowed in VOP3B instructions"); 3330 return false; 3331 } 3332 } 3333 3334 return true; 3335 } 3336 3337 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3338 3339 const unsigned Opc = Inst.getOpcode(); 3340 const MCInstrDesc &Desc = MII.get(Opc); 3341 3342 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3343 return true; 3344 3345 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3346 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3347 if (isCI() || isSI()) 3348 return false; 3349 } 3350 3351 return true; 3352 } 3353 3354 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3355 const unsigned Opc = Inst.getOpcode(); 3356 const MCInstrDesc &Desc = MII.get(Opc); 3357 3358 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3359 return true; 3360 3361 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3362 if (DimIdx < 0) 3363 return true; 3364 3365 long Imm = Inst.getOperand(DimIdx).getImm(); 3366 if (Imm < 0 || Imm >= 8) 3367 return false; 3368 3369 return true; 3370 } 3371 3372 static bool IsRevOpcode(const unsigned Opcode) 3373 { 3374 switch (Opcode) { 3375 case AMDGPU::V_SUBREV_F32_e32: 3376 case AMDGPU::V_SUBREV_F32_e64: 3377 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3378 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3379 case AMDGPU::V_SUBREV_F32_e32_vi: 3380 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3381 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3382 case AMDGPU::V_SUBREV_F32_e64_vi: 3383 3384 case AMDGPU::V_SUBREV_CO_U32_e32: 3385 case AMDGPU::V_SUBREV_CO_U32_e64: 3386 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3387 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3388 3389 case AMDGPU::V_SUBBREV_U32_e32: 3390 case AMDGPU::V_SUBBREV_U32_e64: 3391 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3392 case AMDGPU::V_SUBBREV_U32_e32_vi: 3393 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3394 case AMDGPU::V_SUBBREV_U32_e64_vi: 3395 3396 case AMDGPU::V_SUBREV_U32_e32: 3397 case AMDGPU::V_SUBREV_U32_e64: 3398 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3399 case AMDGPU::V_SUBREV_U32_e32_vi: 3400 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3401 case AMDGPU::V_SUBREV_U32_e64_vi: 3402 3403 case AMDGPU::V_SUBREV_F16_e32: 3404 case AMDGPU::V_SUBREV_F16_e64: 3405 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3406 case AMDGPU::V_SUBREV_F16_e32_vi: 3407 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3408 case AMDGPU::V_SUBREV_F16_e64_vi: 3409 3410 case AMDGPU::V_SUBREV_U16_e32: 3411 case AMDGPU::V_SUBREV_U16_e64: 3412 case AMDGPU::V_SUBREV_U16_e32_vi: 3413 case AMDGPU::V_SUBREV_U16_e64_vi: 3414 3415 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3416 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3417 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3418 3419 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3420 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3421 3422 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3423 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3424 3425 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3426 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3427 3428 case AMDGPU::V_LSHRREV_B32_e32: 3429 case AMDGPU::V_LSHRREV_B32_e64: 3430 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3431 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3432 case AMDGPU::V_LSHRREV_B32_e32_vi: 3433 case AMDGPU::V_LSHRREV_B32_e64_vi: 3434 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3435 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3436 3437 case AMDGPU::V_ASHRREV_I32_e32: 3438 case AMDGPU::V_ASHRREV_I32_e64: 3439 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3440 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3441 case AMDGPU::V_ASHRREV_I32_e32_vi: 3442 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3443 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3444 case AMDGPU::V_ASHRREV_I32_e64_vi: 3445 3446 case AMDGPU::V_LSHLREV_B32_e32: 3447 case AMDGPU::V_LSHLREV_B32_e64: 3448 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3449 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3450 case AMDGPU::V_LSHLREV_B32_e32_vi: 3451 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3452 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3453 case AMDGPU::V_LSHLREV_B32_e64_vi: 3454 3455 case AMDGPU::V_LSHLREV_B16_e32: 3456 case AMDGPU::V_LSHLREV_B16_e64: 3457 case AMDGPU::V_LSHLREV_B16_e32_vi: 3458 case AMDGPU::V_LSHLREV_B16_e64_vi: 3459 case AMDGPU::V_LSHLREV_B16_gfx10: 3460 3461 case AMDGPU::V_LSHRREV_B16_e32: 3462 case AMDGPU::V_LSHRREV_B16_e64: 3463 case AMDGPU::V_LSHRREV_B16_e32_vi: 3464 case AMDGPU::V_LSHRREV_B16_e64_vi: 3465 case AMDGPU::V_LSHRREV_B16_gfx10: 3466 3467 case AMDGPU::V_ASHRREV_I16_e32: 3468 case AMDGPU::V_ASHRREV_I16_e64: 3469 case AMDGPU::V_ASHRREV_I16_e32_vi: 3470 case AMDGPU::V_ASHRREV_I16_e64_vi: 3471 case AMDGPU::V_ASHRREV_I16_gfx10: 3472 3473 case AMDGPU::V_LSHLREV_B64: 3474 case AMDGPU::V_LSHLREV_B64_gfx10: 3475 case AMDGPU::V_LSHLREV_B64_vi: 3476 3477 case AMDGPU::V_LSHRREV_B64: 3478 case AMDGPU::V_LSHRREV_B64_gfx10: 3479 case AMDGPU::V_LSHRREV_B64_vi: 3480 3481 case AMDGPU::V_ASHRREV_I64: 3482 case AMDGPU::V_ASHRREV_I64_gfx10: 3483 case AMDGPU::V_ASHRREV_I64_vi: 3484 3485 case AMDGPU::V_PK_LSHLREV_B16: 3486 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3487 case AMDGPU::V_PK_LSHLREV_B16_vi: 3488 3489 case AMDGPU::V_PK_LSHRREV_B16: 3490 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3491 case AMDGPU::V_PK_LSHRREV_B16_vi: 3492 case AMDGPU::V_PK_ASHRREV_I16: 3493 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3494 case AMDGPU::V_PK_ASHRREV_I16_vi: 3495 return true; 3496 default: 3497 return false; 3498 } 3499 } 3500 3501 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3502 3503 using namespace SIInstrFlags; 3504 const unsigned Opcode = Inst.getOpcode(); 3505 const MCInstrDesc &Desc = MII.get(Opcode); 3506 3507 // lds_direct register is defined so that it can be used 3508 // with 9-bit operands only. Ignore encodings which do not accept these. 3509 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3510 return true; 3511 3512 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3513 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3514 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3515 3516 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3517 3518 // lds_direct cannot be specified as either src1 or src2. 3519 for (int SrcIdx : SrcIndices) { 3520 if (SrcIdx == -1) break; 3521 const MCOperand &Src = Inst.getOperand(SrcIdx); 3522 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3523 return false; 3524 } 3525 } 3526 3527 if (Src0Idx == -1) 3528 return true; 3529 3530 const MCOperand &Src = Inst.getOperand(Src0Idx); 3531 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3532 return true; 3533 3534 // lds_direct is specified as src0. Check additional limitations. 3535 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3536 } 3537 3538 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3539 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3540 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3541 if (Op.isFlatOffset()) 3542 return Op.getStartLoc(); 3543 } 3544 return getLoc(); 3545 } 3546 3547 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3548 const OperandVector &Operands) { 3549 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3550 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3551 return true; 3552 3553 auto Opcode = Inst.getOpcode(); 3554 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3555 assert(OpNum != -1); 3556 3557 const auto &Op = Inst.getOperand(OpNum); 3558 if (!hasFlatOffsets() && Op.getImm() != 0) { 3559 Error(getFlatOffsetLoc(Operands), 3560 "flat offset modifier is not supported on this GPU"); 3561 return false; 3562 } 3563 3564 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3565 // For FLAT segment the offset must be positive; 3566 // MSB is ignored and forced to zero. 3567 unsigned OffsetSize = isGFX9() ? 13 : 12; 3568 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3569 if (!isIntN(OffsetSize, Op.getImm())) { 3570 Error(getFlatOffsetLoc(Operands), 3571 isGFX9() ? "expected a 13-bit signed offset" : 3572 "expected a 12-bit signed offset"); 3573 return false; 3574 } 3575 } else { 3576 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3577 Error(getFlatOffsetLoc(Operands), 3578 isGFX9() ? "expected a 12-bit unsigned offset" : 3579 "expected an 11-bit unsigned offset"); 3580 return false; 3581 } 3582 } 3583 3584 return true; 3585 } 3586 3587 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3588 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3589 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3590 if (Op.isSMEMOffset()) 3591 return Op.getStartLoc(); 3592 } 3593 return getLoc(); 3594 } 3595 3596 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3597 const OperandVector &Operands) { 3598 if (isCI() || isSI()) 3599 return true; 3600 3601 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3602 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3603 return true; 3604 3605 auto Opcode = Inst.getOpcode(); 3606 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3607 if (OpNum == -1) 3608 return true; 3609 3610 const auto &Op = Inst.getOperand(OpNum); 3611 if (!Op.isImm()) 3612 return true; 3613 3614 uint64_t Offset = Op.getImm(); 3615 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3616 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3617 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3618 return true; 3619 3620 Error(getSMEMOffsetLoc(Operands), 3621 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3622 "expected a 21-bit signed offset"); 3623 3624 return false; 3625 } 3626 3627 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3628 unsigned Opcode = Inst.getOpcode(); 3629 const MCInstrDesc &Desc = MII.get(Opcode); 3630 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3631 return true; 3632 3633 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3634 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3635 3636 const int OpIndices[] = { Src0Idx, Src1Idx }; 3637 3638 unsigned NumExprs = 0; 3639 unsigned NumLiterals = 0; 3640 uint32_t LiteralValue; 3641 3642 for (int OpIdx : OpIndices) { 3643 if (OpIdx == -1) break; 3644 3645 const MCOperand &MO = Inst.getOperand(OpIdx); 3646 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3647 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3648 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3649 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3650 if (NumLiterals == 0 || LiteralValue != Value) { 3651 LiteralValue = Value; 3652 ++NumLiterals; 3653 } 3654 } else if (MO.isExpr()) { 3655 ++NumExprs; 3656 } 3657 } 3658 } 3659 3660 return NumLiterals + NumExprs <= 1; 3661 } 3662 3663 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3664 const unsigned Opc = Inst.getOpcode(); 3665 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3666 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3667 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3668 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3669 3670 if (OpSel & ~3) 3671 return false; 3672 } 3673 return true; 3674 } 3675 3676 // Check if VCC register matches wavefront size 3677 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3678 auto FB = getFeatureBits(); 3679 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3680 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3681 } 3682 3683 // VOP3 literal is only allowed in GFX10+ and only one can be used 3684 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3685 unsigned Opcode = Inst.getOpcode(); 3686 const MCInstrDesc &Desc = MII.get(Opcode); 3687 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3688 return true; 3689 3690 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3691 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3692 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3693 3694 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3695 3696 unsigned NumExprs = 0; 3697 unsigned NumLiterals = 0; 3698 uint32_t LiteralValue; 3699 3700 for (int OpIdx : OpIndices) { 3701 if (OpIdx == -1) break; 3702 3703 const MCOperand &MO = Inst.getOperand(OpIdx); 3704 if (!MO.isImm() && !MO.isExpr()) 3705 continue; 3706 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3707 continue; 3708 3709 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3710 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3711 return false; 3712 3713 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3714 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3715 if (NumLiterals == 0 || LiteralValue != Value) { 3716 LiteralValue = Value; 3717 ++NumLiterals; 3718 } 3719 } else if (MO.isExpr()) { 3720 ++NumExprs; 3721 } 3722 } 3723 NumLiterals += NumExprs; 3724 3725 return !NumLiterals || 3726 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3727 } 3728 3729 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3730 const SMLoc &IDLoc, 3731 const OperandVector &Operands) { 3732 if (!validateLdsDirect(Inst)) { 3733 Error(IDLoc, 3734 "invalid use of lds_direct"); 3735 return false; 3736 } 3737 if (!validateSOPLiteral(Inst)) { 3738 Error(IDLoc, 3739 "only one literal operand is allowed"); 3740 return false; 3741 } 3742 if (!validateVOP3Literal(Inst)) { 3743 Error(IDLoc, 3744 "invalid literal operand"); 3745 return false; 3746 } 3747 if (!validateConstantBusLimitations(Inst)) { 3748 Error(IDLoc, 3749 "invalid operand (violates constant bus restrictions)"); 3750 return false; 3751 } 3752 if (!validateEarlyClobberLimitations(Inst)) { 3753 Error(IDLoc, 3754 "destination must be different than all sources"); 3755 return false; 3756 } 3757 if (!validateIntClampSupported(Inst)) { 3758 Error(IDLoc, 3759 "integer clamping is not supported on this GPU"); 3760 return false; 3761 } 3762 if (!validateOpSel(Inst)) { 3763 Error(IDLoc, 3764 "invalid op_sel operand"); 3765 return false; 3766 } 3767 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3768 if (!validateMIMGD16(Inst)) { 3769 Error(IDLoc, 3770 "d16 modifier is not supported on this GPU"); 3771 return false; 3772 } 3773 if (!validateMIMGDim(Inst)) { 3774 Error(IDLoc, "dim modifier is required on this GPU"); 3775 return false; 3776 } 3777 if (!validateMIMGDataSize(Inst)) { 3778 Error(IDLoc, 3779 "image data size does not match dmask and tfe"); 3780 return false; 3781 } 3782 if (!validateMIMGAddrSize(Inst)) { 3783 Error(IDLoc, 3784 "image address size does not match dim and a16"); 3785 return false; 3786 } 3787 if (!validateMIMGAtomicDMask(Inst)) { 3788 Error(IDLoc, 3789 "invalid atomic image dmask"); 3790 return false; 3791 } 3792 if (!validateMIMGGatherDMask(Inst)) { 3793 Error(IDLoc, 3794 "invalid image_gather dmask: only one bit must be set"); 3795 return false; 3796 } 3797 if (!validateMovrels(Inst)) { 3798 Error(IDLoc, "source operand must be a VGPR"); 3799 return false; 3800 } 3801 if (!validateFlatOffset(Inst, Operands)) { 3802 return false; 3803 } 3804 if (!validateSMEMOffset(Inst, Operands)) { 3805 return false; 3806 } 3807 if (!validateMAIAccWrite(Inst)) { 3808 return false; 3809 } 3810 if (!validateDivScale(Inst)) { 3811 return false; 3812 } 3813 3814 return true; 3815 } 3816 3817 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3818 const FeatureBitset &FBS, 3819 unsigned VariantID = 0); 3820 3821 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3822 const FeatureBitset &AvailableFeatures, 3823 unsigned VariantID); 3824 3825 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3826 const FeatureBitset &FBS) { 3827 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3828 } 3829 3830 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3831 const FeatureBitset &FBS, 3832 ArrayRef<unsigned> Variants) { 3833 for (auto Variant : Variants) { 3834 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3835 return true; 3836 } 3837 3838 return false; 3839 } 3840 3841 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3842 const SMLoc &IDLoc) { 3843 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3844 3845 // Check if requested instruction variant is supported. 3846 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3847 return false; 3848 3849 // This instruction is not supported. 3850 // Clear any other pending errors because they are no longer relevant. 3851 getParser().clearPendingErrors(); 3852 3853 // Requested instruction variant is not supported. 3854 // Check if any other variants are supported. 3855 StringRef VariantName = getMatchedVariantName(); 3856 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3857 return Error(IDLoc, 3858 Twine(VariantName, 3859 " variant of this instruction is not supported")); 3860 } 3861 3862 // Finally check if this instruction is supported on any other GPU. 3863 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3864 return Error(IDLoc, "instruction not supported on this GPU"); 3865 } 3866 3867 // Instruction not supported on any GPU. Probably a typo. 3868 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3869 return Error(IDLoc, "invalid instruction" + Suggestion); 3870 } 3871 3872 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3873 OperandVector &Operands, 3874 MCStreamer &Out, 3875 uint64_t &ErrorInfo, 3876 bool MatchingInlineAsm) { 3877 MCInst Inst; 3878 unsigned Result = Match_Success; 3879 for (auto Variant : getMatchedVariants()) { 3880 uint64_t EI; 3881 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3882 Variant); 3883 // We order match statuses from least to most specific. We use most specific 3884 // status as resulting 3885 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3886 if ((R == Match_Success) || 3887 (R == Match_PreferE32) || 3888 (R == Match_MissingFeature && Result != Match_PreferE32) || 3889 (R == Match_InvalidOperand && Result != Match_MissingFeature 3890 && Result != Match_PreferE32) || 3891 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3892 && Result != Match_MissingFeature 3893 && Result != Match_PreferE32)) { 3894 Result = R; 3895 ErrorInfo = EI; 3896 } 3897 if (R == Match_Success) 3898 break; 3899 } 3900 3901 if (Result == Match_Success) { 3902 if (!validateInstruction(Inst, IDLoc, Operands)) { 3903 return true; 3904 } 3905 Inst.setLoc(IDLoc); 3906 Out.emitInstruction(Inst, getSTI()); 3907 return false; 3908 } 3909 3910 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 3911 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 3912 return true; 3913 } 3914 3915 switch (Result) { 3916 default: break; 3917 case Match_MissingFeature: 3918 // It has been verified that the specified instruction 3919 // mnemonic is valid. A match was found but it requires 3920 // features which are not supported on this GPU. 3921 return Error(IDLoc, "operands are not valid for this GPU or mode"); 3922 3923 case Match_InvalidOperand: { 3924 SMLoc ErrorLoc = IDLoc; 3925 if (ErrorInfo != ~0ULL) { 3926 if (ErrorInfo >= Operands.size()) { 3927 return Error(IDLoc, "too few operands for instruction"); 3928 } 3929 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3930 if (ErrorLoc == SMLoc()) 3931 ErrorLoc = IDLoc; 3932 } 3933 return Error(ErrorLoc, "invalid operand for instruction"); 3934 } 3935 3936 case Match_PreferE32: 3937 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3938 "should be encoded as e32"); 3939 case Match_MnemonicFail: 3940 llvm_unreachable("Invalid instructions should have been handled already"); 3941 } 3942 llvm_unreachable("Implement any new match types added!"); 3943 } 3944 3945 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3946 int64_t Tmp = -1; 3947 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3948 return true; 3949 } 3950 if (getParser().parseAbsoluteExpression(Tmp)) { 3951 return true; 3952 } 3953 Ret = static_cast<uint32_t>(Tmp); 3954 return false; 3955 } 3956 3957 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3958 uint32_t &Minor) { 3959 if (ParseAsAbsoluteExpression(Major)) 3960 return TokError("invalid major version"); 3961 3962 if (getLexer().isNot(AsmToken::Comma)) 3963 return TokError("minor version number required, comma expected"); 3964 Lex(); 3965 3966 if (ParseAsAbsoluteExpression(Minor)) 3967 return TokError("invalid minor version"); 3968 3969 return false; 3970 } 3971 3972 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3973 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3974 return TokError("directive only supported for amdgcn architecture"); 3975 3976 std::string Target; 3977 3978 SMLoc TargetStart = getTok().getLoc(); 3979 if (getParser().parseEscapedString(Target)) 3980 return true; 3981 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3982 3983 std::string ExpectedTarget; 3984 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3985 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3986 3987 if (Target != ExpectedTargetOS.str()) 3988 return getParser().Error(TargetRange.Start, "target must match options", 3989 TargetRange); 3990 3991 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3992 return false; 3993 } 3994 3995 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3996 return getParser().Error(Range.Start, "value out of range", Range); 3997 } 3998 3999 bool AMDGPUAsmParser::calculateGPRBlocks( 4000 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4001 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4002 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4003 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4004 // TODO(scott.linder): These calculations are duplicated from 4005 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4006 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4007 4008 unsigned NumVGPRs = NextFreeVGPR; 4009 unsigned NumSGPRs = NextFreeSGPR; 4010 4011 if (Version.Major >= 10) 4012 NumSGPRs = 0; 4013 else { 4014 unsigned MaxAddressableNumSGPRs = 4015 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4016 4017 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4018 NumSGPRs > MaxAddressableNumSGPRs) 4019 return OutOfRangeError(SGPRRange); 4020 4021 NumSGPRs += 4022 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4023 4024 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4025 NumSGPRs > MaxAddressableNumSGPRs) 4026 return OutOfRangeError(SGPRRange); 4027 4028 if (Features.test(FeatureSGPRInitBug)) 4029 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4030 } 4031 4032 VGPRBlocks = 4033 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4034 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4035 4036 return false; 4037 } 4038 4039 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4040 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4041 return TokError("directive only supported for amdgcn architecture"); 4042 4043 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4044 return TokError("directive only supported for amdhsa OS"); 4045 4046 StringRef KernelName; 4047 if (getParser().parseIdentifier(KernelName)) 4048 return true; 4049 4050 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4051 4052 StringSet<> Seen; 4053 4054 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4055 4056 SMRange VGPRRange; 4057 uint64_t NextFreeVGPR = 0; 4058 SMRange SGPRRange; 4059 uint64_t NextFreeSGPR = 0; 4060 unsigned UserSGPRCount = 0; 4061 bool ReserveVCC = true; 4062 bool ReserveFlatScr = true; 4063 bool ReserveXNACK = hasXNACK(); 4064 Optional<bool> EnableWavefrontSize32; 4065 4066 while (true) { 4067 while (getLexer().is(AsmToken::EndOfStatement)) 4068 Lex(); 4069 4070 if (getLexer().isNot(AsmToken::Identifier)) 4071 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4072 4073 StringRef ID = getTok().getIdentifier(); 4074 SMRange IDRange = getTok().getLocRange(); 4075 Lex(); 4076 4077 if (ID == ".end_amdhsa_kernel") 4078 break; 4079 4080 if (Seen.find(ID) != Seen.end()) 4081 return TokError(".amdhsa_ directives cannot be repeated"); 4082 Seen.insert(ID); 4083 4084 SMLoc ValStart = getTok().getLoc(); 4085 int64_t IVal; 4086 if (getParser().parseAbsoluteExpression(IVal)) 4087 return true; 4088 SMLoc ValEnd = getTok().getLoc(); 4089 SMRange ValRange = SMRange(ValStart, ValEnd); 4090 4091 if (IVal < 0) 4092 return OutOfRangeError(ValRange); 4093 4094 uint64_t Val = IVal; 4095 4096 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4097 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4098 return OutOfRangeError(RANGE); \ 4099 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4100 4101 if (ID == ".amdhsa_group_segment_fixed_size") { 4102 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4103 return OutOfRangeError(ValRange); 4104 KD.group_segment_fixed_size = Val; 4105 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4106 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4107 return OutOfRangeError(ValRange); 4108 KD.private_segment_fixed_size = Val; 4109 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4110 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4111 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4112 Val, ValRange); 4113 if (Val) 4114 UserSGPRCount += 4; 4115 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4116 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4117 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4118 ValRange); 4119 if (Val) 4120 UserSGPRCount += 2; 4121 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4122 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4123 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4124 ValRange); 4125 if (Val) 4126 UserSGPRCount += 2; 4127 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4128 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4129 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4130 Val, ValRange); 4131 if (Val) 4132 UserSGPRCount += 2; 4133 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4134 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4135 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4136 ValRange); 4137 if (Val) 4138 UserSGPRCount += 2; 4139 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4140 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4141 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4142 ValRange); 4143 if (Val) 4144 UserSGPRCount += 2; 4145 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4146 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4147 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4148 Val, ValRange); 4149 if (Val) 4150 UserSGPRCount += 1; 4151 } else if (ID == ".amdhsa_wavefront_size32") { 4152 if (IVersion.Major < 10) 4153 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4154 IDRange); 4155 EnableWavefrontSize32 = Val; 4156 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4157 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4158 Val, ValRange); 4159 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4160 PARSE_BITS_ENTRY( 4161 KD.compute_pgm_rsrc2, 4162 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4163 ValRange); 4164 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4165 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4166 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4167 ValRange); 4168 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4169 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4170 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4171 ValRange); 4172 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4173 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4174 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4175 ValRange); 4176 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4177 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4178 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4179 ValRange); 4180 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4181 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4182 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4183 ValRange); 4184 } else if (ID == ".amdhsa_next_free_vgpr") { 4185 VGPRRange = ValRange; 4186 NextFreeVGPR = Val; 4187 } else if (ID == ".amdhsa_next_free_sgpr") { 4188 SGPRRange = ValRange; 4189 NextFreeSGPR = Val; 4190 } else if (ID == ".amdhsa_reserve_vcc") { 4191 if (!isUInt<1>(Val)) 4192 return OutOfRangeError(ValRange); 4193 ReserveVCC = Val; 4194 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4195 if (IVersion.Major < 7) 4196 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4197 IDRange); 4198 if (!isUInt<1>(Val)) 4199 return OutOfRangeError(ValRange); 4200 ReserveFlatScr = Val; 4201 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4202 if (IVersion.Major < 8) 4203 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4204 IDRange); 4205 if (!isUInt<1>(Val)) 4206 return OutOfRangeError(ValRange); 4207 ReserveXNACK = Val; 4208 } else if (ID == ".amdhsa_float_round_mode_32") { 4209 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4210 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4211 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4212 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4213 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4214 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4215 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4216 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4217 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4218 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4219 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4220 ValRange); 4221 } else if (ID == ".amdhsa_dx10_clamp") { 4222 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4223 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4224 } else if (ID == ".amdhsa_ieee_mode") { 4225 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4226 Val, ValRange); 4227 } else if (ID == ".amdhsa_fp16_overflow") { 4228 if (IVersion.Major < 9) 4229 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4230 IDRange); 4231 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4232 ValRange); 4233 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4234 if (IVersion.Major < 10) 4235 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4236 IDRange); 4237 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4238 ValRange); 4239 } else if (ID == ".amdhsa_memory_ordered") { 4240 if (IVersion.Major < 10) 4241 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4242 IDRange); 4243 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4244 ValRange); 4245 } else if (ID == ".amdhsa_forward_progress") { 4246 if (IVersion.Major < 10) 4247 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4248 IDRange); 4249 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4250 ValRange); 4251 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4252 PARSE_BITS_ENTRY( 4253 KD.compute_pgm_rsrc2, 4254 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4255 ValRange); 4256 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4257 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4258 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4259 Val, ValRange); 4260 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4261 PARSE_BITS_ENTRY( 4262 KD.compute_pgm_rsrc2, 4263 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4264 ValRange); 4265 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4266 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4267 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4268 Val, ValRange); 4269 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4270 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4271 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4272 Val, ValRange); 4273 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4274 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4275 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4276 Val, ValRange); 4277 } else if (ID == ".amdhsa_exception_int_div_zero") { 4278 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4279 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4280 Val, ValRange); 4281 } else { 4282 return getParser().Error(IDRange.Start, 4283 "unknown .amdhsa_kernel directive", IDRange); 4284 } 4285 4286 #undef PARSE_BITS_ENTRY 4287 } 4288 4289 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4290 return TokError(".amdhsa_next_free_vgpr directive is required"); 4291 4292 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4293 return TokError(".amdhsa_next_free_sgpr directive is required"); 4294 4295 unsigned VGPRBlocks; 4296 unsigned SGPRBlocks; 4297 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4298 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4299 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4300 SGPRBlocks)) 4301 return true; 4302 4303 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4304 VGPRBlocks)) 4305 return OutOfRangeError(VGPRRange); 4306 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4307 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4308 4309 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4310 SGPRBlocks)) 4311 return OutOfRangeError(SGPRRange); 4312 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4313 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4314 SGPRBlocks); 4315 4316 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4317 return TokError("too many user SGPRs enabled"); 4318 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4319 UserSGPRCount); 4320 4321 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4322 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4323 ReserveFlatScr, ReserveXNACK); 4324 return false; 4325 } 4326 4327 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4328 uint32_t Major; 4329 uint32_t Minor; 4330 4331 if (ParseDirectiveMajorMinor(Major, Minor)) 4332 return true; 4333 4334 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4335 return false; 4336 } 4337 4338 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4339 uint32_t Major; 4340 uint32_t Minor; 4341 uint32_t Stepping; 4342 StringRef VendorName; 4343 StringRef ArchName; 4344 4345 // If this directive has no arguments, then use the ISA version for the 4346 // targeted GPU. 4347 if (getLexer().is(AsmToken::EndOfStatement)) { 4348 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4349 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4350 ISA.Stepping, 4351 "AMD", "AMDGPU"); 4352 return false; 4353 } 4354 4355 if (ParseDirectiveMajorMinor(Major, Minor)) 4356 return true; 4357 4358 if (getLexer().isNot(AsmToken::Comma)) 4359 return TokError("stepping version number required, comma expected"); 4360 Lex(); 4361 4362 if (ParseAsAbsoluteExpression(Stepping)) 4363 return TokError("invalid stepping version"); 4364 4365 if (getLexer().isNot(AsmToken::Comma)) 4366 return TokError("vendor name required, comma expected"); 4367 Lex(); 4368 4369 if (getLexer().isNot(AsmToken::String)) 4370 return TokError("invalid vendor name"); 4371 4372 VendorName = getLexer().getTok().getStringContents(); 4373 Lex(); 4374 4375 if (getLexer().isNot(AsmToken::Comma)) 4376 return TokError("arch name required, comma expected"); 4377 Lex(); 4378 4379 if (getLexer().isNot(AsmToken::String)) 4380 return TokError("invalid arch name"); 4381 4382 ArchName = getLexer().getTok().getStringContents(); 4383 Lex(); 4384 4385 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4386 VendorName, ArchName); 4387 return false; 4388 } 4389 4390 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4391 amd_kernel_code_t &Header) { 4392 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4393 // assembly for backwards compatibility. 4394 if (ID == "max_scratch_backing_memory_byte_size") { 4395 Parser.eatToEndOfStatement(); 4396 return false; 4397 } 4398 4399 SmallString<40> ErrStr; 4400 raw_svector_ostream Err(ErrStr); 4401 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4402 return TokError(Err.str()); 4403 } 4404 Lex(); 4405 4406 if (ID == "enable_wavefront_size32") { 4407 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4408 if (!isGFX10()) 4409 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4410 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4411 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4412 } else { 4413 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4414 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4415 } 4416 } 4417 4418 if (ID == "wavefront_size") { 4419 if (Header.wavefront_size == 5) { 4420 if (!isGFX10()) 4421 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4422 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4423 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4424 } else if (Header.wavefront_size == 6) { 4425 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4426 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4427 } 4428 } 4429 4430 if (ID == "enable_wgp_mode") { 4431 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4432 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4433 } 4434 4435 if (ID == "enable_mem_ordered") { 4436 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4437 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4438 } 4439 4440 if (ID == "enable_fwd_progress") { 4441 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4442 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4443 } 4444 4445 return false; 4446 } 4447 4448 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4449 amd_kernel_code_t Header; 4450 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4451 4452 while (true) { 4453 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4454 // will set the current token to EndOfStatement. 4455 while(getLexer().is(AsmToken::EndOfStatement)) 4456 Lex(); 4457 4458 if (getLexer().isNot(AsmToken::Identifier)) 4459 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4460 4461 StringRef ID = getLexer().getTok().getIdentifier(); 4462 Lex(); 4463 4464 if (ID == ".end_amd_kernel_code_t") 4465 break; 4466 4467 if (ParseAMDKernelCodeTValue(ID, Header)) 4468 return true; 4469 } 4470 4471 getTargetStreamer().EmitAMDKernelCodeT(Header); 4472 4473 return false; 4474 } 4475 4476 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4477 if (getLexer().isNot(AsmToken::Identifier)) 4478 return TokError("expected symbol name"); 4479 4480 StringRef KernelName = Parser.getTok().getString(); 4481 4482 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4483 ELF::STT_AMDGPU_HSA_KERNEL); 4484 Lex(); 4485 4486 KernelScope.initialize(getContext()); 4487 return false; 4488 } 4489 4490 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4491 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4492 return Error(getParser().getTok().getLoc(), 4493 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4494 "architectures"); 4495 } 4496 4497 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4498 4499 std::string ISAVersionStringFromSTI; 4500 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4501 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4502 4503 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4504 return Error(getParser().getTok().getLoc(), 4505 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4506 "arguments specified through the command line"); 4507 } 4508 4509 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4510 Lex(); 4511 4512 return false; 4513 } 4514 4515 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4516 const char *AssemblerDirectiveBegin; 4517 const char *AssemblerDirectiveEnd; 4518 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4519 isHsaAbiVersion3(&getSTI()) 4520 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4521 HSAMD::V3::AssemblerDirectiveEnd) 4522 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4523 HSAMD::AssemblerDirectiveEnd); 4524 4525 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4526 return Error(getParser().getTok().getLoc(), 4527 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4528 "not available on non-amdhsa OSes")).str()); 4529 } 4530 4531 std::string HSAMetadataString; 4532 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4533 HSAMetadataString)) 4534 return true; 4535 4536 if (isHsaAbiVersion3(&getSTI())) { 4537 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4538 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4539 } else { 4540 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4541 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4542 } 4543 4544 return false; 4545 } 4546 4547 /// Common code to parse out a block of text (typically YAML) between start and 4548 /// end directives. 4549 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4550 const char *AssemblerDirectiveEnd, 4551 std::string &CollectString) { 4552 4553 raw_string_ostream CollectStream(CollectString); 4554 4555 getLexer().setSkipSpace(false); 4556 4557 bool FoundEnd = false; 4558 while (!getLexer().is(AsmToken::Eof)) { 4559 while (getLexer().is(AsmToken::Space)) { 4560 CollectStream << getLexer().getTok().getString(); 4561 Lex(); 4562 } 4563 4564 if (getLexer().is(AsmToken::Identifier)) { 4565 StringRef ID = getLexer().getTok().getIdentifier(); 4566 if (ID == AssemblerDirectiveEnd) { 4567 Lex(); 4568 FoundEnd = true; 4569 break; 4570 } 4571 } 4572 4573 CollectStream << Parser.parseStringToEndOfStatement() 4574 << getContext().getAsmInfo()->getSeparatorString(); 4575 4576 Parser.eatToEndOfStatement(); 4577 } 4578 4579 getLexer().setSkipSpace(true); 4580 4581 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4582 return TokError(Twine("expected directive ") + 4583 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4584 } 4585 4586 CollectStream.flush(); 4587 return false; 4588 } 4589 4590 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4591 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4592 std::string String; 4593 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4594 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4595 return true; 4596 4597 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4598 if (!PALMetadata->setFromString(String)) 4599 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4600 return false; 4601 } 4602 4603 /// Parse the assembler directive for old linear-format PAL metadata. 4604 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4605 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4606 return Error(getParser().getTok().getLoc(), 4607 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4608 "not available on non-amdpal OSes")).str()); 4609 } 4610 4611 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4612 PALMetadata->setLegacy(); 4613 for (;;) { 4614 uint32_t Key, Value; 4615 if (ParseAsAbsoluteExpression(Key)) { 4616 return TokError(Twine("invalid value in ") + 4617 Twine(PALMD::AssemblerDirective)); 4618 } 4619 if (getLexer().isNot(AsmToken::Comma)) { 4620 return TokError(Twine("expected an even number of values in ") + 4621 Twine(PALMD::AssemblerDirective)); 4622 } 4623 Lex(); 4624 if (ParseAsAbsoluteExpression(Value)) { 4625 return TokError(Twine("invalid value in ") + 4626 Twine(PALMD::AssemblerDirective)); 4627 } 4628 PALMetadata->setRegister(Key, Value); 4629 if (getLexer().isNot(AsmToken::Comma)) 4630 break; 4631 Lex(); 4632 } 4633 return false; 4634 } 4635 4636 /// ParseDirectiveAMDGPULDS 4637 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4638 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4639 if (getParser().checkForValidSection()) 4640 return true; 4641 4642 StringRef Name; 4643 SMLoc NameLoc = getLexer().getLoc(); 4644 if (getParser().parseIdentifier(Name)) 4645 return TokError("expected identifier in directive"); 4646 4647 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4648 if (parseToken(AsmToken::Comma, "expected ','")) 4649 return true; 4650 4651 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4652 4653 int64_t Size; 4654 SMLoc SizeLoc = getLexer().getLoc(); 4655 if (getParser().parseAbsoluteExpression(Size)) 4656 return true; 4657 if (Size < 0) 4658 return Error(SizeLoc, "size must be non-negative"); 4659 if (Size > LocalMemorySize) 4660 return Error(SizeLoc, "size is too large"); 4661 4662 int64_t Alignment = 4; 4663 if (getLexer().is(AsmToken::Comma)) { 4664 Lex(); 4665 SMLoc AlignLoc = getLexer().getLoc(); 4666 if (getParser().parseAbsoluteExpression(Alignment)) 4667 return true; 4668 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4669 return Error(AlignLoc, "alignment must be a power of two"); 4670 4671 // Alignment larger than the size of LDS is possible in theory, as long 4672 // as the linker manages to place to symbol at address 0, but we do want 4673 // to make sure the alignment fits nicely into a 32-bit integer. 4674 if (Alignment >= 1u << 31) 4675 return Error(AlignLoc, "alignment is too large"); 4676 } 4677 4678 if (parseToken(AsmToken::EndOfStatement, 4679 "unexpected token in '.amdgpu_lds' directive")) 4680 return true; 4681 4682 Symbol->redefineIfPossible(); 4683 if (!Symbol->isUndefined()) 4684 return Error(NameLoc, "invalid symbol redefinition"); 4685 4686 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4687 return false; 4688 } 4689 4690 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4691 StringRef IDVal = DirectiveID.getString(); 4692 4693 if (isHsaAbiVersion3(&getSTI())) { 4694 if (IDVal == ".amdgcn_target") 4695 return ParseDirectiveAMDGCNTarget(); 4696 4697 if (IDVal == ".amdhsa_kernel") 4698 return ParseDirectiveAMDHSAKernel(); 4699 4700 // TODO: Restructure/combine with PAL metadata directive. 4701 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4702 return ParseDirectiveHSAMetadata(); 4703 } else { 4704 if (IDVal == ".hsa_code_object_version") 4705 return ParseDirectiveHSACodeObjectVersion(); 4706 4707 if (IDVal == ".hsa_code_object_isa") 4708 return ParseDirectiveHSACodeObjectISA(); 4709 4710 if (IDVal == ".amd_kernel_code_t") 4711 return ParseDirectiveAMDKernelCodeT(); 4712 4713 if (IDVal == ".amdgpu_hsa_kernel") 4714 return ParseDirectiveAMDGPUHsaKernel(); 4715 4716 if (IDVal == ".amd_amdgpu_isa") 4717 return ParseDirectiveISAVersion(); 4718 4719 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4720 return ParseDirectiveHSAMetadata(); 4721 } 4722 4723 if (IDVal == ".amdgpu_lds") 4724 return ParseDirectiveAMDGPULDS(); 4725 4726 if (IDVal == PALMD::AssemblerDirectiveBegin) 4727 return ParseDirectivePALMetadataBegin(); 4728 4729 if (IDVal == PALMD::AssemblerDirective) 4730 return ParseDirectivePALMetadata(); 4731 4732 return true; 4733 } 4734 4735 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4736 unsigned RegNo) const { 4737 4738 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4739 R.isValid(); ++R) { 4740 if (*R == RegNo) 4741 return isGFX9Plus(); 4742 } 4743 4744 // GFX10 has 2 more SGPRs 104 and 105. 4745 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4746 R.isValid(); ++R) { 4747 if (*R == RegNo) 4748 return hasSGPR104_SGPR105(); 4749 } 4750 4751 switch (RegNo) { 4752 case AMDGPU::SRC_SHARED_BASE: 4753 case AMDGPU::SRC_SHARED_LIMIT: 4754 case AMDGPU::SRC_PRIVATE_BASE: 4755 case AMDGPU::SRC_PRIVATE_LIMIT: 4756 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4757 return !isCI() && !isSI() && !isVI(); 4758 case AMDGPU::TBA: 4759 case AMDGPU::TBA_LO: 4760 case AMDGPU::TBA_HI: 4761 case AMDGPU::TMA: 4762 case AMDGPU::TMA_LO: 4763 case AMDGPU::TMA_HI: 4764 return !isGFX9() && !isGFX10(); 4765 case AMDGPU::XNACK_MASK: 4766 case AMDGPU::XNACK_MASK_LO: 4767 case AMDGPU::XNACK_MASK_HI: 4768 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4769 case AMDGPU::SGPR_NULL: 4770 return isGFX10(); 4771 default: 4772 break; 4773 } 4774 4775 if (isCI()) 4776 return true; 4777 4778 if (isSI() || isGFX10()) { 4779 // No flat_scr on SI. 4780 // On GFX10 flat scratch is not a valid register operand and can only be 4781 // accessed with s_setreg/s_getreg. 4782 switch (RegNo) { 4783 case AMDGPU::FLAT_SCR: 4784 case AMDGPU::FLAT_SCR_LO: 4785 case AMDGPU::FLAT_SCR_HI: 4786 return false; 4787 default: 4788 return true; 4789 } 4790 } 4791 4792 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4793 // SI/CI have. 4794 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4795 R.isValid(); ++R) { 4796 if (*R == RegNo) 4797 return hasSGPR102_SGPR103(); 4798 } 4799 4800 return true; 4801 } 4802 4803 OperandMatchResultTy 4804 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4805 OperandMode Mode) { 4806 // Try to parse with a custom parser 4807 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4808 4809 // If we successfully parsed the operand or if there as an error parsing, 4810 // we are done. 4811 // 4812 // If we are parsing after we reach EndOfStatement then this means we 4813 // are appending default values to the Operands list. This is only done 4814 // by custom parser, so we shouldn't continue on to the generic parsing. 4815 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4816 getLexer().is(AsmToken::EndOfStatement)) 4817 return ResTy; 4818 4819 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4820 unsigned Prefix = Operands.size(); 4821 SMLoc LBraceLoc = getTok().getLoc(); 4822 Parser.Lex(); // eat the '[' 4823 4824 for (;;) { 4825 ResTy = parseReg(Operands); 4826 if (ResTy != MatchOperand_Success) 4827 return ResTy; 4828 4829 if (getLexer().is(AsmToken::RBrac)) 4830 break; 4831 4832 if (getLexer().isNot(AsmToken::Comma)) 4833 return MatchOperand_ParseFail; 4834 Parser.Lex(); 4835 } 4836 4837 if (Operands.size() - Prefix > 1) { 4838 Operands.insert(Operands.begin() + Prefix, 4839 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4840 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4841 getTok().getLoc())); 4842 } 4843 4844 Parser.Lex(); // eat the ']' 4845 return MatchOperand_Success; 4846 } 4847 4848 return parseRegOrImm(Operands); 4849 } 4850 4851 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4852 // Clear any forced encodings from the previous instruction. 4853 setForcedEncodingSize(0); 4854 setForcedDPP(false); 4855 setForcedSDWA(false); 4856 4857 if (Name.endswith("_e64")) { 4858 setForcedEncodingSize(64); 4859 return Name.substr(0, Name.size() - 4); 4860 } else if (Name.endswith("_e32")) { 4861 setForcedEncodingSize(32); 4862 return Name.substr(0, Name.size() - 4); 4863 } else if (Name.endswith("_dpp")) { 4864 setForcedDPP(true); 4865 return Name.substr(0, Name.size() - 4); 4866 } else if (Name.endswith("_sdwa")) { 4867 setForcedSDWA(true); 4868 return Name.substr(0, Name.size() - 5); 4869 } 4870 return Name; 4871 } 4872 4873 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4874 StringRef Name, 4875 SMLoc NameLoc, OperandVector &Operands) { 4876 // Add the instruction mnemonic 4877 Name = parseMnemonicSuffix(Name); 4878 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4879 4880 bool IsMIMG = Name.startswith("image_"); 4881 4882 while (!getLexer().is(AsmToken::EndOfStatement)) { 4883 OperandMode Mode = OperandMode_Default; 4884 if (IsMIMG && isGFX10() && Operands.size() == 2) 4885 Mode = OperandMode_NSA; 4886 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4887 4888 // Eat the comma or space if there is one. 4889 if (getLexer().is(AsmToken::Comma)) 4890 Parser.Lex(); 4891 4892 if (Res != MatchOperand_Success) { 4893 checkUnsupportedInstruction(Name, NameLoc); 4894 if (!Parser.hasPendingError()) { 4895 // FIXME: use real operand location rather than the current location. 4896 StringRef Msg = 4897 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4898 "not a valid operand."; 4899 Error(getLexer().getLoc(), Msg); 4900 } 4901 while (!getLexer().is(AsmToken::EndOfStatement)) { 4902 Parser.Lex(); 4903 } 4904 return true; 4905 } 4906 } 4907 4908 return false; 4909 } 4910 4911 //===----------------------------------------------------------------------===// 4912 // Utility functions 4913 //===----------------------------------------------------------------------===// 4914 4915 OperandMatchResultTy 4916 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4917 4918 if (!trySkipId(Prefix, AsmToken::Colon)) 4919 return MatchOperand_NoMatch; 4920 4921 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4922 } 4923 4924 OperandMatchResultTy 4925 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4926 AMDGPUOperand::ImmTy ImmTy, 4927 bool (*ConvertResult)(int64_t&)) { 4928 SMLoc S = getLoc(); 4929 int64_t Value = 0; 4930 4931 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4932 if (Res != MatchOperand_Success) 4933 return Res; 4934 4935 if (ConvertResult && !ConvertResult(Value)) { 4936 Error(S, "invalid " + StringRef(Prefix) + " value."); 4937 } 4938 4939 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4940 return MatchOperand_Success; 4941 } 4942 4943 OperandMatchResultTy 4944 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4945 OperandVector &Operands, 4946 AMDGPUOperand::ImmTy ImmTy, 4947 bool (*ConvertResult)(int64_t&)) { 4948 SMLoc S = getLoc(); 4949 if (!trySkipId(Prefix, AsmToken::Colon)) 4950 return MatchOperand_NoMatch; 4951 4952 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4953 return MatchOperand_ParseFail; 4954 4955 unsigned Val = 0; 4956 const unsigned MaxSize = 4; 4957 4958 // FIXME: How to verify the number of elements matches the number of src 4959 // operands? 4960 for (int I = 0; ; ++I) { 4961 int64_t Op; 4962 SMLoc Loc = getLoc(); 4963 if (!parseExpr(Op)) 4964 return MatchOperand_ParseFail; 4965 4966 if (Op != 0 && Op != 1) { 4967 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4968 return MatchOperand_ParseFail; 4969 } 4970 4971 Val |= (Op << I); 4972 4973 if (trySkipToken(AsmToken::RBrac)) 4974 break; 4975 4976 if (I + 1 == MaxSize) { 4977 Error(getLoc(), "expected a closing square bracket"); 4978 return MatchOperand_ParseFail; 4979 } 4980 4981 if (!skipToken(AsmToken::Comma, "expected a comma")) 4982 return MatchOperand_ParseFail; 4983 } 4984 4985 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4986 return MatchOperand_Success; 4987 } 4988 4989 OperandMatchResultTy 4990 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4991 AMDGPUOperand::ImmTy ImmTy) { 4992 int64_t Bit = 0; 4993 SMLoc S = Parser.getTok().getLoc(); 4994 4995 // We are at the end of the statement, and this is a default argument, so 4996 // use a default value. 4997 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4998 switch(getLexer().getKind()) { 4999 case AsmToken::Identifier: { 5000 StringRef Tok = Parser.getTok().getString(); 5001 if (Tok == Name) { 5002 if (Tok == "r128" && !hasMIMG_R128()) 5003 Error(S, "r128 modifier is not supported on this GPU"); 5004 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5005 Error(S, "a16 modifier is not supported on this GPU"); 5006 Bit = 1; 5007 Parser.Lex(); 5008 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5009 Bit = 0; 5010 Parser.Lex(); 5011 } else { 5012 return MatchOperand_NoMatch; 5013 } 5014 break; 5015 } 5016 default: 5017 return MatchOperand_NoMatch; 5018 } 5019 } 5020 5021 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 5022 return MatchOperand_ParseFail; 5023 5024 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5025 ImmTy = AMDGPUOperand::ImmTyR128A16; 5026 5027 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5028 return MatchOperand_Success; 5029 } 5030 5031 static void addOptionalImmOperand( 5032 MCInst& Inst, const OperandVector& Operands, 5033 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5034 AMDGPUOperand::ImmTy ImmT, 5035 int64_t Default = 0) { 5036 auto i = OptionalIdx.find(ImmT); 5037 if (i != OptionalIdx.end()) { 5038 unsigned Idx = i->second; 5039 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5040 } else { 5041 Inst.addOperand(MCOperand::createImm(Default)); 5042 } 5043 } 5044 5045 OperandMatchResultTy 5046 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5047 if (getLexer().isNot(AsmToken::Identifier)) { 5048 return MatchOperand_NoMatch; 5049 } 5050 StringRef Tok = Parser.getTok().getString(); 5051 if (Tok != Prefix) { 5052 return MatchOperand_NoMatch; 5053 } 5054 5055 Parser.Lex(); 5056 if (getLexer().isNot(AsmToken::Colon)) { 5057 return MatchOperand_ParseFail; 5058 } 5059 5060 Parser.Lex(); 5061 if (getLexer().isNot(AsmToken::Identifier)) { 5062 return MatchOperand_ParseFail; 5063 } 5064 5065 Value = Parser.getTok().getString(); 5066 return MatchOperand_Success; 5067 } 5068 5069 //===----------------------------------------------------------------------===// 5070 // MTBUF format 5071 //===----------------------------------------------------------------------===// 5072 5073 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5074 int64_t MaxVal, 5075 int64_t &Fmt) { 5076 int64_t Val; 5077 SMLoc Loc = getLoc(); 5078 5079 auto Res = parseIntWithPrefix(Pref, Val); 5080 if (Res == MatchOperand_ParseFail) 5081 return false; 5082 if (Res == MatchOperand_NoMatch) 5083 return true; 5084 5085 if (Val < 0 || Val > MaxVal) { 5086 Error(Loc, Twine("out of range ", StringRef(Pref))); 5087 return false; 5088 } 5089 5090 Fmt = Val; 5091 return true; 5092 } 5093 5094 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5095 // values to live in a joint format operand in the MCInst encoding. 5096 OperandMatchResultTy 5097 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5098 using namespace llvm::AMDGPU::MTBUFFormat; 5099 5100 int64_t Dfmt = DFMT_UNDEF; 5101 int64_t Nfmt = NFMT_UNDEF; 5102 5103 // dfmt and nfmt can appear in either order, and each is optional. 5104 for (int I = 0; I < 2; ++I) { 5105 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5106 return MatchOperand_ParseFail; 5107 5108 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5109 return MatchOperand_ParseFail; 5110 } 5111 // Skip optional comma between dfmt/nfmt 5112 // but guard against 2 commas following each other. 5113 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5114 !peekToken().is(AsmToken::Comma)) { 5115 trySkipToken(AsmToken::Comma); 5116 } 5117 } 5118 5119 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5120 return MatchOperand_NoMatch; 5121 5122 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5123 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5124 5125 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5126 return MatchOperand_Success; 5127 } 5128 5129 OperandMatchResultTy 5130 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5131 using namespace llvm::AMDGPU::MTBUFFormat; 5132 5133 int64_t Fmt = UFMT_UNDEF; 5134 5135 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5136 return MatchOperand_ParseFail; 5137 5138 if (Fmt == UFMT_UNDEF) 5139 return MatchOperand_NoMatch; 5140 5141 Format = Fmt; 5142 return MatchOperand_Success; 5143 } 5144 5145 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5146 int64_t &Nfmt, 5147 StringRef FormatStr, 5148 SMLoc Loc) { 5149 using namespace llvm::AMDGPU::MTBUFFormat; 5150 int64_t Format; 5151 5152 Format = getDfmt(FormatStr); 5153 if (Format != DFMT_UNDEF) { 5154 Dfmt = Format; 5155 return true; 5156 } 5157 5158 Format = getNfmt(FormatStr, getSTI()); 5159 if (Format != NFMT_UNDEF) { 5160 Nfmt = Format; 5161 return true; 5162 } 5163 5164 Error(Loc, "unsupported format"); 5165 return false; 5166 } 5167 5168 OperandMatchResultTy 5169 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5170 SMLoc FormatLoc, 5171 int64_t &Format) { 5172 using namespace llvm::AMDGPU::MTBUFFormat; 5173 5174 int64_t Dfmt = DFMT_UNDEF; 5175 int64_t Nfmt = NFMT_UNDEF; 5176 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5177 return MatchOperand_ParseFail; 5178 5179 if (trySkipToken(AsmToken::Comma)) { 5180 StringRef Str; 5181 SMLoc Loc = getLoc(); 5182 if (!parseId(Str, "expected a format string") || 5183 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5184 return MatchOperand_ParseFail; 5185 } 5186 if (Dfmt == DFMT_UNDEF) { 5187 Error(Loc, "duplicate numeric format"); 5188 return MatchOperand_ParseFail; 5189 } else if (Nfmt == NFMT_UNDEF) { 5190 Error(Loc, "duplicate data format"); 5191 return MatchOperand_ParseFail; 5192 } 5193 } 5194 5195 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5196 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5197 5198 if (isGFX10()) { 5199 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5200 if (Ufmt == UFMT_UNDEF) { 5201 Error(FormatLoc, "unsupported format"); 5202 return MatchOperand_ParseFail; 5203 } 5204 Format = Ufmt; 5205 } else { 5206 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5207 } 5208 5209 return MatchOperand_Success; 5210 } 5211 5212 OperandMatchResultTy 5213 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5214 SMLoc Loc, 5215 int64_t &Format) { 5216 using namespace llvm::AMDGPU::MTBUFFormat; 5217 5218 auto Id = getUnifiedFormat(FormatStr); 5219 if (Id == UFMT_UNDEF) 5220 return MatchOperand_NoMatch; 5221 5222 if (!isGFX10()) { 5223 Error(Loc, "unified format is not supported on this GPU"); 5224 return MatchOperand_ParseFail; 5225 } 5226 5227 Format = Id; 5228 return MatchOperand_Success; 5229 } 5230 5231 OperandMatchResultTy 5232 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5233 using namespace llvm::AMDGPU::MTBUFFormat; 5234 SMLoc Loc = getLoc(); 5235 5236 if (!parseExpr(Format)) 5237 return MatchOperand_ParseFail; 5238 if (!isValidFormatEncoding(Format, getSTI())) { 5239 Error(Loc, "out of range format"); 5240 return MatchOperand_ParseFail; 5241 } 5242 5243 return MatchOperand_Success; 5244 } 5245 5246 OperandMatchResultTy 5247 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5248 using namespace llvm::AMDGPU::MTBUFFormat; 5249 5250 if (!trySkipId("format", AsmToken::Colon)) 5251 return MatchOperand_NoMatch; 5252 5253 if (trySkipToken(AsmToken::LBrac)) { 5254 StringRef FormatStr; 5255 SMLoc Loc = getLoc(); 5256 if (!parseId(FormatStr, "expected a format string")) 5257 return MatchOperand_ParseFail; 5258 5259 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5260 if (Res == MatchOperand_NoMatch) 5261 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5262 if (Res != MatchOperand_Success) 5263 return Res; 5264 5265 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5266 return MatchOperand_ParseFail; 5267 5268 return MatchOperand_Success; 5269 } 5270 5271 return parseNumericFormat(Format); 5272 } 5273 5274 OperandMatchResultTy 5275 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5276 using namespace llvm::AMDGPU::MTBUFFormat; 5277 5278 int64_t Format = getDefaultFormatEncoding(getSTI()); 5279 OperandMatchResultTy Res; 5280 SMLoc Loc = getLoc(); 5281 5282 // Parse legacy format syntax. 5283 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5284 if (Res == MatchOperand_ParseFail) 5285 return Res; 5286 5287 bool FormatFound = (Res == MatchOperand_Success); 5288 5289 Operands.push_back( 5290 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5291 5292 if (FormatFound) 5293 trySkipToken(AsmToken::Comma); 5294 5295 if (isToken(AsmToken::EndOfStatement)) { 5296 // We are expecting an soffset operand, 5297 // but let matcher handle the error. 5298 return MatchOperand_Success; 5299 } 5300 5301 // Parse soffset. 5302 Res = parseRegOrImm(Operands); 5303 if (Res != MatchOperand_Success) 5304 return Res; 5305 5306 trySkipToken(AsmToken::Comma); 5307 5308 if (!FormatFound) { 5309 Res = parseSymbolicOrNumericFormat(Format); 5310 if (Res == MatchOperand_ParseFail) 5311 return Res; 5312 if (Res == MatchOperand_Success) { 5313 auto Size = Operands.size(); 5314 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5315 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5316 Op.setImm(Format); 5317 } 5318 return MatchOperand_Success; 5319 } 5320 5321 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5322 Error(getLoc(), "duplicate format"); 5323 return MatchOperand_ParseFail; 5324 } 5325 return MatchOperand_Success; 5326 } 5327 5328 //===----------------------------------------------------------------------===// 5329 // ds 5330 //===----------------------------------------------------------------------===// 5331 5332 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5333 const OperandVector &Operands) { 5334 OptionalImmIndexMap OptionalIdx; 5335 5336 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5337 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5338 5339 // Add the register arguments 5340 if (Op.isReg()) { 5341 Op.addRegOperands(Inst, 1); 5342 continue; 5343 } 5344 5345 // Handle optional arguments 5346 OptionalIdx[Op.getImmTy()] = i; 5347 } 5348 5349 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5350 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5351 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5352 5353 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5354 } 5355 5356 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5357 bool IsGdsHardcoded) { 5358 OptionalImmIndexMap OptionalIdx; 5359 5360 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5361 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5362 5363 // Add the register arguments 5364 if (Op.isReg()) { 5365 Op.addRegOperands(Inst, 1); 5366 continue; 5367 } 5368 5369 if (Op.isToken() && Op.getToken() == "gds") { 5370 IsGdsHardcoded = true; 5371 continue; 5372 } 5373 5374 // Handle optional arguments 5375 OptionalIdx[Op.getImmTy()] = i; 5376 } 5377 5378 AMDGPUOperand::ImmTy OffsetType = 5379 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5380 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5381 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5382 AMDGPUOperand::ImmTyOffset; 5383 5384 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5385 5386 if (!IsGdsHardcoded) { 5387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5388 } 5389 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5390 } 5391 5392 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5393 OptionalImmIndexMap OptionalIdx; 5394 5395 unsigned OperandIdx[4]; 5396 unsigned EnMask = 0; 5397 int SrcIdx = 0; 5398 5399 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5400 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5401 5402 // Add the register arguments 5403 if (Op.isReg()) { 5404 assert(SrcIdx < 4); 5405 OperandIdx[SrcIdx] = Inst.size(); 5406 Op.addRegOperands(Inst, 1); 5407 ++SrcIdx; 5408 continue; 5409 } 5410 5411 if (Op.isOff()) { 5412 assert(SrcIdx < 4); 5413 OperandIdx[SrcIdx] = Inst.size(); 5414 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5415 ++SrcIdx; 5416 continue; 5417 } 5418 5419 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5420 Op.addImmOperands(Inst, 1); 5421 continue; 5422 } 5423 5424 if (Op.isToken() && Op.getToken() == "done") 5425 continue; 5426 5427 // Handle optional arguments 5428 OptionalIdx[Op.getImmTy()] = i; 5429 } 5430 5431 assert(SrcIdx == 4); 5432 5433 bool Compr = false; 5434 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5435 Compr = true; 5436 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5437 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5438 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5439 } 5440 5441 for (auto i = 0; i < SrcIdx; ++i) { 5442 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5443 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5444 } 5445 } 5446 5447 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5449 5450 Inst.addOperand(MCOperand::createImm(EnMask)); 5451 } 5452 5453 //===----------------------------------------------------------------------===// 5454 // s_waitcnt 5455 //===----------------------------------------------------------------------===// 5456 5457 static bool 5458 encodeCnt( 5459 const AMDGPU::IsaVersion ISA, 5460 int64_t &IntVal, 5461 int64_t CntVal, 5462 bool Saturate, 5463 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5464 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5465 { 5466 bool Failed = false; 5467 5468 IntVal = encode(ISA, IntVal, CntVal); 5469 if (CntVal != decode(ISA, IntVal)) { 5470 if (Saturate) { 5471 IntVal = encode(ISA, IntVal, -1); 5472 } else { 5473 Failed = true; 5474 } 5475 } 5476 return Failed; 5477 } 5478 5479 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5480 5481 SMLoc CntLoc = getLoc(); 5482 StringRef CntName = getTokenStr(); 5483 5484 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5485 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5486 return false; 5487 5488 int64_t CntVal; 5489 SMLoc ValLoc = getLoc(); 5490 if (!parseExpr(CntVal)) 5491 return false; 5492 5493 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5494 5495 bool Failed = true; 5496 bool Sat = CntName.endswith("_sat"); 5497 5498 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5499 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5500 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5501 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5502 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5503 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5504 } else { 5505 Error(CntLoc, "invalid counter name " + CntName); 5506 return false; 5507 } 5508 5509 if (Failed) { 5510 Error(ValLoc, "too large value for " + CntName); 5511 return false; 5512 } 5513 5514 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5515 return false; 5516 5517 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5518 if (isToken(AsmToken::EndOfStatement)) { 5519 Error(getLoc(), "expected a counter name"); 5520 return false; 5521 } 5522 } 5523 5524 return true; 5525 } 5526 5527 OperandMatchResultTy 5528 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5529 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5530 int64_t Waitcnt = getWaitcntBitMask(ISA); 5531 SMLoc S = getLoc(); 5532 5533 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5534 while (!isToken(AsmToken::EndOfStatement)) { 5535 if (!parseCnt(Waitcnt)) 5536 return MatchOperand_ParseFail; 5537 } 5538 } else { 5539 if (!parseExpr(Waitcnt)) 5540 return MatchOperand_ParseFail; 5541 } 5542 5543 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5544 return MatchOperand_Success; 5545 } 5546 5547 bool 5548 AMDGPUOperand::isSWaitCnt() const { 5549 return isImm(); 5550 } 5551 5552 //===----------------------------------------------------------------------===// 5553 // hwreg 5554 //===----------------------------------------------------------------------===// 5555 5556 bool 5557 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5558 int64_t &Offset, 5559 int64_t &Width) { 5560 using namespace llvm::AMDGPU::Hwreg; 5561 5562 // The register may be specified by name or using a numeric code 5563 if (isToken(AsmToken::Identifier) && 5564 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5565 HwReg.IsSymbolic = true; 5566 lex(); // skip message name 5567 } else if (!parseExpr(HwReg.Id)) { 5568 return false; 5569 } 5570 5571 if (trySkipToken(AsmToken::RParen)) 5572 return true; 5573 5574 // parse optional params 5575 return 5576 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5577 parseExpr(Offset) && 5578 skipToken(AsmToken::Comma, "expected a comma") && 5579 parseExpr(Width) && 5580 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5581 } 5582 5583 bool 5584 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5585 const int64_t Offset, 5586 const int64_t Width, 5587 const SMLoc Loc) { 5588 5589 using namespace llvm::AMDGPU::Hwreg; 5590 5591 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5592 Error(Loc, "specified hardware register is not supported on this GPU"); 5593 return false; 5594 } else if (!isValidHwreg(HwReg.Id)) { 5595 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5596 return false; 5597 } else if (!isValidHwregOffset(Offset)) { 5598 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5599 return false; 5600 } else if (!isValidHwregWidth(Width)) { 5601 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5602 return false; 5603 } 5604 return true; 5605 } 5606 5607 OperandMatchResultTy 5608 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5609 using namespace llvm::AMDGPU::Hwreg; 5610 5611 int64_t ImmVal = 0; 5612 SMLoc Loc = getLoc(); 5613 5614 if (trySkipId("hwreg", AsmToken::LParen)) { 5615 OperandInfoTy HwReg(ID_UNKNOWN_); 5616 int64_t Offset = OFFSET_DEFAULT_; 5617 int64_t Width = WIDTH_DEFAULT_; 5618 if (parseHwregBody(HwReg, Offset, Width) && 5619 validateHwreg(HwReg, Offset, Width, Loc)) { 5620 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5621 } else { 5622 return MatchOperand_ParseFail; 5623 } 5624 } else if (parseExpr(ImmVal)) { 5625 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5626 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5627 return MatchOperand_ParseFail; 5628 } 5629 } else { 5630 return MatchOperand_ParseFail; 5631 } 5632 5633 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5634 return MatchOperand_Success; 5635 } 5636 5637 bool AMDGPUOperand::isHwreg() const { 5638 return isImmTy(ImmTyHwreg); 5639 } 5640 5641 //===----------------------------------------------------------------------===// 5642 // sendmsg 5643 //===----------------------------------------------------------------------===// 5644 5645 bool 5646 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5647 OperandInfoTy &Op, 5648 OperandInfoTy &Stream) { 5649 using namespace llvm::AMDGPU::SendMsg; 5650 5651 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5652 Msg.IsSymbolic = true; 5653 lex(); // skip message name 5654 } else if (!parseExpr(Msg.Id)) { 5655 return false; 5656 } 5657 5658 if (trySkipToken(AsmToken::Comma)) { 5659 Op.IsDefined = true; 5660 if (isToken(AsmToken::Identifier) && 5661 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5662 lex(); // skip operation name 5663 } else if (!parseExpr(Op.Id)) { 5664 return false; 5665 } 5666 5667 if (trySkipToken(AsmToken::Comma)) { 5668 Stream.IsDefined = true; 5669 if (!parseExpr(Stream.Id)) 5670 return false; 5671 } 5672 } 5673 5674 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5675 } 5676 5677 bool 5678 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5679 const OperandInfoTy &Op, 5680 const OperandInfoTy &Stream, 5681 const SMLoc S) { 5682 using namespace llvm::AMDGPU::SendMsg; 5683 5684 // Validation strictness depends on whether message is specified 5685 // in a symbolc or in a numeric form. In the latter case 5686 // only encoding possibility is checked. 5687 bool Strict = Msg.IsSymbolic; 5688 5689 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5690 Error(S, "invalid message id"); 5691 return false; 5692 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5693 Error(S, Op.IsDefined ? 5694 "message does not support operations" : 5695 "missing message operation"); 5696 return false; 5697 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5698 Error(S, "invalid operation id"); 5699 return false; 5700 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5701 Error(S, "message operation does not support streams"); 5702 return false; 5703 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5704 Error(S, "invalid message stream id"); 5705 return false; 5706 } 5707 return true; 5708 } 5709 5710 OperandMatchResultTy 5711 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5712 using namespace llvm::AMDGPU::SendMsg; 5713 5714 int64_t ImmVal = 0; 5715 SMLoc Loc = getLoc(); 5716 5717 if (trySkipId("sendmsg", AsmToken::LParen)) { 5718 OperandInfoTy Msg(ID_UNKNOWN_); 5719 OperandInfoTy Op(OP_NONE_); 5720 OperandInfoTy Stream(STREAM_ID_NONE_); 5721 if (parseSendMsgBody(Msg, Op, Stream) && 5722 validateSendMsg(Msg, Op, Stream, Loc)) { 5723 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5724 } else { 5725 return MatchOperand_ParseFail; 5726 } 5727 } else if (parseExpr(ImmVal)) { 5728 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5729 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5730 return MatchOperand_ParseFail; 5731 } 5732 } else { 5733 return MatchOperand_ParseFail; 5734 } 5735 5736 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5737 return MatchOperand_Success; 5738 } 5739 5740 bool AMDGPUOperand::isSendMsg() const { 5741 return isImmTy(ImmTySendMsg); 5742 } 5743 5744 //===----------------------------------------------------------------------===// 5745 // v_interp 5746 //===----------------------------------------------------------------------===// 5747 5748 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5749 if (getLexer().getKind() != AsmToken::Identifier) 5750 return MatchOperand_NoMatch; 5751 5752 StringRef Str = Parser.getTok().getString(); 5753 int Slot = StringSwitch<int>(Str) 5754 .Case("p10", 0) 5755 .Case("p20", 1) 5756 .Case("p0", 2) 5757 .Default(-1); 5758 5759 SMLoc S = Parser.getTok().getLoc(); 5760 if (Slot == -1) 5761 return MatchOperand_ParseFail; 5762 5763 Parser.Lex(); 5764 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5765 AMDGPUOperand::ImmTyInterpSlot)); 5766 return MatchOperand_Success; 5767 } 5768 5769 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5770 if (getLexer().getKind() != AsmToken::Identifier) 5771 return MatchOperand_NoMatch; 5772 5773 StringRef Str = Parser.getTok().getString(); 5774 if (!Str.startswith("attr")) 5775 return MatchOperand_NoMatch; 5776 5777 StringRef Chan = Str.take_back(2); 5778 int AttrChan = StringSwitch<int>(Chan) 5779 .Case(".x", 0) 5780 .Case(".y", 1) 5781 .Case(".z", 2) 5782 .Case(".w", 3) 5783 .Default(-1); 5784 if (AttrChan == -1) 5785 return MatchOperand_ParseFail; 5786 5787 Str = Str.drop_back(2).drop_front(4); 5788 5789 uint8_t Attr; 5790 if (Str.getAsInteger(10, Attr)) 5791 return MatchOperand_ParseFail; 5792 5793 SMLoc S = Parser.getTok().getLoc(); 5794 Parser.Lex(); 5795 if (Attr > 63) { 5796 Error(S, "out of bounds attr"); 5797 return MatchOperand_ParseFail; 5798 } 5799 5800 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5801 5802 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5803 AMDGPUOperand::ImmTyInterpAttr)); 5804 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5805 AMDGPUOperand::ImmTyAttrChan)); 5806 return MatchOperand_Success; 5807 } 5808 5809 //===----------------------------------------------------------------------===// 5810 // exp 5811 //===----------------------------------------------------------------------===// 5812 5813 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5814 uint8_t &Val) { 5815 if (Str == "null") { 5816 Val = 9; 5817 return MatchOperand_Success; 5818 } 5819 5820 if (Str.startswith("mrt")) { 5821 Str = Str.drop_front(3); 5822 if (Str == "z") { // == mrtz 5823 Val = 8; 5824 return MatchOperand_Success; 5825 } 5826 5827 if (Str.getAsInteger(10, Val)) 5828 return MatchOperand_ParseFail; 5829 5830 if (Val > 7) { 5831 Error(getLoc(), "invalid exp target"); 5832 return MatchOperand_ParseFail; 5833 } 5834 5835 return MatchOperand_Success; 5836 } 5837 5838 if (Str.startswith("pos")) { 5839 Str = Str.drop_front(3); 5840 if (Str.getAsInteger(10, Val)) 5841 return MatchOperand_ParseFail; 5842 5843 if (Val > 4 || (Val == 4 && !isGFX10())) { 5844 Error(getLoc(), "invalid exp target"); 5845 return MatchOperand_ParseFail; 5846 } 5847 5848 Val += 12; 5849 return MatchOperand_Success; 5850 } 5851 5852 if (isGFX10() && Str == "prim") { 5853 Val = 20; 5854 return MatchOperand_Success; 5855 } 5856 5857 if (Str.startswith("param")) { 5858 Str = Str.drop_front(5); 5859 if (Str.getAsInteger(10, Val)) 5860 return MatchOperand_ParseFail; 5861 5862 if (Val >= 32) { 5863 Error(getLoc(), "invalid exp target"); 5864 return MatchOperand_ParseFail; 5865 } 5866 5867 Val += 32; 5868 return MatchOperand_Success; 5869 } 5870 5871 if (Str.startswith("invalid_target_")) { 5872 Str = Str.drop_front(15); 5873 if (Str.getAsInteger(10, Val)) 5874 return MatchOperand_ParseFail; 5875 5876 Error(getLoc(), "invalid exp target"); 5877 return MatchOperand_ParseFail; 5878 } 5879 5880 return MatchOperand_NoMatch; 5881 } 5882 5883 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5884 uint8_t Val; 5885 StringRef Str = Parser.getTok().getString(); 5886 5887 auto Res = parseExpTgtImpl(Str, Val); 5888 if (Res != MatchOperand_Success) 5889 return Res; 5890 5891 SMLoc S = Parser.getTok().getLoc(); 5892 Parser.Lex(); 5893 5894 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5895 AMDGPUOperand::ImmTyExpTgt)); 5896 return MatchOperand_Success; 5897 } 5898 5899 //===----------------------------------------------------------------------===// 5900 // parser helpers 5901 //===----------------------------------------------------------------------===// 5902 5903 bool 5904 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5905 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5906 } 5907 5908 bool 5909 AMDGPUAsmParser::isId(const StringRef Id) const { 5910 return isId(getToken(), Id); 5911 } 5912 5913 bool 5914 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5915 return getTokenKind() == Kind; 5916 } 5917 5918 bool 5919 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5920 if (isId(Id)) { 5921 lex(); 5922 return true; 5923 } 5924 return false; 5925 } 5926 5927 bool 5928 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5929 if (isId(Id) && peekToken().is(Kind)) { 5930 lex(); 5931 lex(); 5932 return true; 5933 } 5934 return false; 5935 } 5936 5937 bool 5938 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5939 if (isToken(Kind)) { 5940 lex(); 5941 return true; 5942 } 5943 return false; 5944 } 5945 5946 bool 5947 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5948 const StringRef ErrMsg) { 5949 if (!trySkipToken(Kind)) { 5950 Error(getLoc(), ErrMsg); 5951 return false; 5952 } 5953 return true; 5954 } 5955 5956 bool 5957 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5958 return !getParser().parseAbsoluteExpression(Imm); 5959 } 5960 5961 bool 5962 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5963 SMLoc S = getLoc(); 5964 5965 const MCExpr *Expr; 5966 if (Parser.parseExpression(Expr)) 5967 return false; 5968 5969 int64_t IntVal; 5970 if (Expr->evaluateAsAbsolute(IntVal)) { 5971 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5972 } else { 5973 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5974 } 5975 return true; 5976 } 5977 5978 bool 5979 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5980 if (isToken(AsmToken::String)) { 5981 Val = getToken().getStringContents(); 5982 lex(); 5983 return true; 5984 } else { 5985 Error(getLoc(), ErrMsg); 5986 return false; 5987 } 5988 } 5989 5990 bool 5991 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5992 if (isToken(AsmToken::Identifier)) { 5993 Val = getTokenStr(); 5994 lex(); 5995 return true; 5996 } else { 5997 Error(getLoc(), ErrMsg); 5998 return false; 5999 } 6000 } 6001 6002 AsmToken 6003 AMDGPUAsmParser::getToken() const { 6004 return Parser.getTok(); 6005 } 6006 6007 AsmToken 6008 AMDGPUAsmParser::peekToken() { 6009 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6010 } 6011 6012 void 6013 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6014 auto TokCount = getLexer().peekTokens(Tokens); 6015 6016 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6017 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6018 } 6019 6020 AsmToken::TokenKind 6021 AMDGPUAsmParser::getTokenKind() const { 6022 return getLexer().getKind(); 6023 } 6024 6025 SMLoc 6026 AMDGPUAsmParser::getLoc() const { 6027 return getToken().getLoc(); 6028 } 6029 6030 StringRef 6031 AMDGPUAsmParser::getTokenStr() const { 6032 return getToken().getString(); 6033 } 6034 6035 void 6036 AMDGPUAsmParser::lex() { 6037 Parser.Lex(); 6038 } 6039 6040 //===----------------------------------------------------------------------===// 6041 // swizzle 6042 //===----------------------------------------------------------------------===// 6043 6044 LLVM_READNONE 6045 static unsigned 6046 encodeBitmaskPerm(const unsigned AndMask, 6047 const unsigned OrMask, 6048 const unsigned XorMask) { 6049 using namespace llvm::AMDGPU::Swizzle; 6050 6051 return BITMASK_PERM_ENC | 6052 (AndMask << BITMASK_AND_SHIFT) | 6053 (OrMask << BITMASK_OR_SHIFT) | 6054 (XorMask << BITMASK_XOR_SHIFT); 6055 } 6056 6057 bool 6058 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6059 const unsigned MinVal, 6060 const unsigned MaxVal, 6061 const StringRef ErrMsg) { 6062 for (unsigned i = 0; i < OpNum; ++i) { 6063 if (!skipToken(AsmToken::Comma, "expected a comma")){ 6064 return false; 6065 } 6066 SMLoc ExprLoc = Parser.getTok().getLoc(); 6067 if (!parseExpr(Op[i])) { 6068 return false; 6069 } 6070 if (Op[i] < MinVal || Op[i] > MaxVal) { 6071 Error(ExprLoc, ErrMsg); 6072 return false; 6073 } 6074 } 6075 6076 return true; 6077 } 6078 6079 bool 6080 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6081 using namespace llvm::AMDGPU::Swizzle; 6082 6083 int64_t Lane[LANE_NUM]; 6084 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6085 "expected a 2-bit lane id")) { 6086 Imm = QUAD_PERM_ENC; 6087 for (unsigned I = 0; I < LANE_NUM; ++I) { 6088 Imm |= Lane[I] << (LANE_SHIFT * I); 6089 } 6090 return true; 6091 } 6092 return false; 6093 } 6094 6095 bool 6096 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6097 using namespace llvm::AMDGPU::Swizzle; 6098 6099 SMLoc S = Parser.getTok().getLoc(); 6100 int64_t GroupSize; 6101 int64_t LaneIdx; 6102 6103 if (!parseSwizzleOperands(1, &GroupSize, 6104 2, 32, 6105 "group size must be in the interval [2,32]")) { 6106 return false; 6107 } 6108 if (!isPowerOf2_64(GroupSize)) { 6109 Error(S, "group size must be a power of two"); 6110 return false; 6111 } 6112 if (parseSwizzleOperands(1, &LaneIdx, 6113 0, GroupSize - 1, 6114 "lane id must be in the interval [0,group size - 1]")) { 6115 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6116 return true; 6117 } 6118 return false; 6119 } 6120 6121 bool 6122 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6123 using namespace llvm::AMDGPU::Swizzle; 6124 6125 SMLoc S = Parser.getTok().getLoc(); 6126 int64_t GroupSize; 6127 6128 if (!parseSwizzleOperands(1, &GroupSize, 6129 2, 32, "group size must be in the interval [2,32]")) { 6130 return false; 6131 } 6132 if (!isPowerOf2_64(GroupSize)) { 6133 Error(S, "group size must be a power of two"); 6134 return false; 6135 } 6136 6137 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6138 return true; 6139 } 6140 6141 bool 6142 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6143 using namespace llvm::AMDGPU::Swizzle; 6144 6145 SMLoc S = Parser.getTok().getLoc(); 6146 int64_t GroupSize; 6147 6148 if (!parseSwizzleOperands(1, &GroupSize, 6149 1, 16, "group size must be in the interval [1,16]")) { 6150 return false; 6151 } 6152 if (!isPowerOf2_64(GroupSize)) { 6153 Error(S, "group size must be a power of two"); 6154 return false; 6155 } 6156 6157 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6158 return true; 6159 } 6160 6161 bool 6162 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6163 using namespace llvm::AMDGPU::Swizzle; 6164 6165 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6166 return false; 6167 } 6168 6169 StringRef Ctl; 6170 SMLoc StrLoc = Parser.getTok().getLoc(); 6171 if (!parseString(Ctl)) { 6172 return false; 6173 } 6174 if (Ctl.size() != BITMASK_WIDTH) { 6175 Error(StrLoc, "expected a 5-character mask"); 6176 return false; 6177 } 6178 6179 unsigned AndMask = 0; 6180 unsigned OrMask = 0; 6181 unsigned XorMask = 0; 6182 6183 for (size_t i = 0; i < Ctl.size(); ++i) { 6184 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6185 switch(Ctl[i]) { 6186 default: 6187 Error(StrLoc, "invalid mask"); 6188 return false; 6189 case '0': 6190 break; 6191 case '1': 6192 OrMask |= Mask; 6193 break; 6194 case 'p': 6195 AndMask |= Mask; 6196 break; 6197 case 'i': 6198 AndMask |= Mask; 6199 XorMask |= Mask; 6200 break; 6201 } 6202 } 6203 6204 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6205 return true; 6206 } 6207 6208 bool 6209 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6210 6211 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6212 6213 if (!parseExpr(Imm)) { 6214 return false; 6215 } 6216 if (!isUInt<16>(Imm)) { 6217 Error(OffsetLoc, "expected a 16-bit offset"); 6218 return false; 6219 } 6220 return true; 6221 } 6222 6223 bool 6224 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6225 using namespace llvm::AMDGPU::Swizzle; 6226 6227 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6228 6229 SMLoc ModeLoc = Parser.getTok().getLoc(); 6230 bool Ok = false; 6231 6232 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6233 Ok = parseSwizzleQuadPerm(Imm); 6234 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6235 Ok = parseSwizzleBitmaskPerm(Imm); 6236 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6237 Ok = parseSwizzleBroadcast(Imm); 6238 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6239 Ok = parseSwizzleSwap(Imm); 6240 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6241 Ok = parseSwizzleReverse(Imm); 6242 } else { 6243 Error(ModeLoc, "expected a swizzle mode"); 6244 } 6245 6246 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6247 } 6248 6249 return false; 6250 } 6251 6252 OperandMatchResultTy 6253 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6254 SMLoc S = Parser.getTok().getLoc(); 6255 int64_t Imm = 0; 6256 6257 if (trySkipId("offset")) { 6258 6259 bool Ok = false; 6260 if (skipToken(AsmToken::Colon, "expected a colon")) { 6261 if (trySkipId("swizzle")) { 6262 Ok = parseSwizzleMacro(Imm); 6263 } else { 6264 Ok = parseSwizzleOffset(Imm); 6265 } 6266 } 6267 6268 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6269 6270 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6271 } else { 6272 // Swizzle "offset" operand is optional. 6273 // If it is omitted, try parsing other optional operands. 6274 return parseOptionalOpr(Operands); 6275 } 6276 } 6277 6278 bool 6279 AMDGPUOperand::isSwizzle() const { 6280 return isImmTy(ImmTySwizzle); 6281 } 6282 6283 //===----------------------------------------------------------------------===// 6284 // VGPR Index Mode 6285 //===----------------------------------------------------------------------===// 6286 6287 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6288 6289 using namespace llvm::AMDGPU::VGPRIndexMode; 6290 6291 if (trySkipToken(AsmToken::RParen)) { 6292 return OFF; 6293 } 6294 6295 int64_t Imm = 0; 6296 6297 while (true) { 6298 unsigned Mode = 0; 6299 SMLoc S = Parser.getTok().getLoc(); 6300 6301 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6302 if (trySkipId(IdSymbolic[ModeId])) { 6303 Mode = 1 << ModeId; 6304 break; 6305 } 6306 } 6307 6308 if (Mode == 0) { 6309 Error(S, (Imm == 0)? 6310 "expected a VGPR index mode or a closing parenthesis" : 6311 "expected a VGPR index mode"); 6312 return UNDEF; 6313 } 6314 6315 if (Imm & Mode) { 6316 Error(S, "duplicate VGPR index mode"); 6317 return UNDEF; 6318 } 6319 Imm |= Mode; 6320 6321 if (trySkipToken(AsmToken::RParen)) 6322 break; 6323 if (!skipToken(AsmToken::Comma, 6324 "expected a comma or a closing parenthesis")) 6325 return UNDEF; 6326 } 6327 6328 return Imm; 6329 } 6330 6331 OperandMatchResultTy 6332 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6333 6334 using namespace llvm::AMDGPU::VGPRIndexMode; 6335 6336 int64_t Imm = 0; 6337 SMLoc S = Parser.getTok().getLoc(); 6338 6339 if (getLexer().getKind() == AsmToken::Identifier && 6340 Parser.getTok().getString() == "gpr_idx" && 6341 getLexer().peekTok().is(AsmToken::LParen)) { 6342 6343 Parser.Lex(); 6344 Parser.Lex(); 6345 6346 Imm = parseGPRIdxMacro(); 6347 if (Imm == UNDEF) 6348 return MatchOperand_ParseFail; 6349 6350 } else { 6351 if (getParser().parseAbsoluteExpression(Imm)) 6352 return MatchOperand_ParseFail; 6353 if (Imm < 0 || !isUInt<4>(Imm)) { 6354 Error(S, "invalid immediate: only 4-bit values are legal"); 6355 return MatchOperand_ParseFail; 6356 } 6357 } 6358 6359 Operands.push_back( 6360 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6361 return MatchOperand_Success; 6362 } 6363 6364 bool AMDGPUOperand::isGPRIdxMode() const { 6365 return isImmTy(ImmTyGprIdxMode); 6366 } 6367 6368 //===----------------------------------------------------------------------===// 6369 // sopp branch targets 6370 //===----------------------------------------------------------------------===// 6371 6372 OperandMatchResultTy 6373 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6374 6375 // Make sure we are not parsing something 6376 // that looks like a label or an expression but is not. 6377 // This will improve error messages. 6378 if (isRegister() || isModifier()) 6379 return MatchOperand_NoMatch; 6380 6381 if (!parseExpr(Operands)) 6382 return MatchOperand_ParseFail; 6383 6384 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6385 assert(Opr.isImm() || Opr.isExpr()); 6386 SMLoc Loc = Opr.getStartLoc(); 6387 6388 // Currently we do not support arbitrary expressions as branch targets. 6389 // Only labels and absolute expressions are accepted. 6390 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6391 Error(Loc, "expected an absolute expression or a label"); 6392 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6393 Error(Loc, "expected a 16-bit signed jump offset"); 6394 } 6395 6396 return MatchOperand_Success; 6397 } 6398 6399 //===----------------------------------------------------------------------===// 6400 // Boolean holding registers 6401 //===----------------------------------------------------------------------===// 6402 6403 OperandMatchResultTy 6404 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6405 return parseReg(Operands); 6406 } 6407 6408 //===----------------------------------------------------------------------===// 6409 // mubuf 6410 //===----------------------------------------------------------------------===// 6411 6412 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6413 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6414 } 6415 6416 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6417 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6418 } 6419 6420 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6421 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6422 } 6423 6424 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6425 const OperandVector &Operands, 6426 bool IsAtomic, 6427 bool IsAtomicReturn, 6428 bool IsLds) { 6429 bool IsLdsOpcode = IsLds; 6430 bool HasLdsModifier = false; 6431 OptionalImmIndexMap OptionalIdx; 6432 assert(IsAtomicReturn ? IsAtomic : true); 6433 unsigned FirstOperandIdx = 1; 6434 6435 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6436 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6437 6438 // Add the register arguments 6439 if (Op.isReg()) { 6440 Op.addRegOperands(Inst, 1); 6441 // Insert a tied src for atomic return dst. 6442 // This cannot be postponed as subsequent calls to 6443 // addImmOperands rely on correct number of MC operands. 6444 if (IsAtomicReturn && i == FirstOperandIdx) 6445 Op.addRegOperands(Inst, 1); 6446 continue; 6447 } 6448 6449 // Handle the case where soffset is an immediate 6450 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6451 Op.addImmOperands(Inst, 1); 6452 continue; 6453 } 6454 6455 HasLdsModifier |= Op.isLDS(); 6456 6457 // Handle tokens like 'offen' which are sometimes hard-coded into the 6458 // asm string. There are no MCInst operands for these. 6459 if (Op.isToken()) { 6460 continue; 6461 } 6462 assert(Op.isImm()); 6463 6464 // Handle optional arguments 6465 OptionalIdx[Op.getImmTy()] = i; 6466 } 6467 6468 // This is a workaround for an llvm quirk which may result in an 6469 // incorrect instruction selection. Lds and non-lds versions of 6470 // MUBUF instructions are identical except that lds versions 6471 // have mandatory 'lds' modifier. However this modifier follows 6472 // optional modifiers and llvm asm matcher regards this 'lds' 6473 // modifier as an optional one. As a result, an lds version 6474 // of opcode may be selected even if it has no 'lds' modifier. 6475 if (IsLdsOpcode && !HasLdsModifier) { 6476 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6477 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6478 Inst.setOpcode(NoLdsOpcode); 6479 IsLdsOpcode = false; 6480 } 6481 } 6482 6483 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6484 if (!IsAtomic) { // glc is hard-coded. 6485 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6486 } 6487 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6488 6489 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6490 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6491 } 6492 6493 if (isGFX10()) 6494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6495 } 6496 6497 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6498 OptionalImmIndexMap OptionalIdx; 6499 6500 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6501 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6502 6503 // Add the register arguments 6504 if (Op.isReg()) { 6505 Op.addRegOperands(Inst, 1); 6506 continue; 6507 } 6508 6509 // Handle the case where soffset is an immediate 6510 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6511 Op.addImmOperands(Inst, 1); 6512 continue; 6513 } 6514 6515 // Handle tokens like 'offen' which are sometimes hard-coded into the 6516 // asm string. There are no MCInst operands for these. 6517 if (Op.isToken()) { 6518 continue; 6519 } 6520 assert(Op.isImm()); 6521 6522 // Handle optional arguments 6523 OptionalIdx[Op.getImmTy()] = i; 6524 } 6525 6526 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6527 AMDGPUOperand::ImmTyOffset); 6528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6532 6533 if (isGFX10()) 6534 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6535 } 6536 6537 //===----------------------------------------------------------------------===// 6538 // mimg 6539 //===----------------------------------------------------------------------===// 6540 6541 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6542 bool IsAtomic) { 6543 unsigned I = 1; 6544 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6545 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6546 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6547 } 6548 6549 if (IsAtomic) { 6550 // Add src, same as dst 6551 assert(Desc.getNumDefs() == 1); 6552 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6553 } 6554 6555 OptionalImmIndexMap OptionalIdx; 6556 6557 for (unsigned E = Operands.size(); I != E; ++I) { 6558 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6559 6560 // Add the register arguments 6561 if (Op.isReg()) { 6562 Op.addRegOperands(Inst, 1); 6563 } else if (Op.isImmModifier()) { 6564 OptionalIdx[Op.getImmTy()] = I; 6565 } else if (!Op.isToken()) { 6566 llvm_unreachable("unexpected operand type"); 6567 } 6568 } 6569 6570 bool IsGFX10 = isGFX10(); 6571 6572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6573 if (IsGFX10) 6574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6576 if (IsGFX10) 6577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6578 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6581 if (IsGFX10) 6582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6584 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6585 if (!IsGFX10) 6586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6587 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6588 } 6589 6590 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6591 cvtMIMG(Inst, Operands, true); 6592 } 6593 6594 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6595 const OperandVector &Operands) { 6596 for (unsigned I = 1; I < Operands.size(); ++I) { 6597 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6598 if (Operand.isReg()) 6599 Operand.addRegOperands(Inst, 1); 6600 } 6601 6602 Inst.addOperand(MCOperand::createImm(1)); // a16 6603 } 6604 6605 //===----------------------------------------------------------------------===// 6606 // smrd 6607 //===----------------------------------------------------------------------===// 6608 6609 bool AMDGPUOperand::isSMRDOffset8() const { 6610 return isImm() && isUInt<8>(getImm()); 6611 } 6612 6613 bool AMDGPUOperand::isSMEMOffset() const { 6614 return isImm(); // Offset range is checked later by validator. 6615 } 6616 6617 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6618 // 32-bit literals are only supported on CI and we only want to use them 6619 // when the offset is > 8-bits. 6620 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6621 } 6622 6623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6624 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6625 } 6626 6627 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6628 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6629 } 6630 6631 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6632 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6633 } 6634 6635 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6636 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6637 } 6638 6639 //===----------------------------------------------------------------------===// 6640 // vop3 6641 //===----------------------------------------------------------------------===// 6642 6643 static bool ConvertOmodMul(int64_t &Mul) { 6644 if (Mul != 1 && Mul != 2 && Mul != 4) 6645 return false; 6646 6647 Mul >>= 1; 6648 return true; 6649 } 6650 6651 static bool ConvertOmodDiv(int64_t &Div) { 6652 if (Div == 1) { 6653 Div = 0; 6654 return true; 6655 } 6656 6657 if (Div == 2) { 6658 Div = 3; 6659 return true; 6660 } 6661 6662 return false; 6663 } 6664 6665 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6666 if (BoundCtrl == 0) { 6667 BoundCtrl = 1; 6668 return true; 6669 } 6670 6671 if (BoundCtrl == -1) { 6672 BoundCtrl = 0; 6673 return true; 6674 } 6675 6676 return false; 6677 } 6678 6679 // Note: the order in this table matches the order of operands in AsmString. 6680 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6681 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6682 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6683 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6684 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6685 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6686 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6687 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6688 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6689 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6690 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6691 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6692 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6693 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6694 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6695 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6696 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6697 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6698 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6699 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6700 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6701 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6702 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6703 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6704 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6705 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6706 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6707 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6708 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6709 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6710 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6711 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6712 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6713 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6714 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6715 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6716 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6717 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6718 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6719 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6720 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6721 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6722 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6723 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6724 }; 6725 6726 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6727 6728 OperandMatchResultTy res = parseOptionalOpr(Operands); 6729 6730 // This is a hack to enable hardcoded mandatory operands which follow 6731 // optional operands. 6732 // 6733 // Current design assumes that all operands after the first optional operand 6734 // are also optional. However implementation of some instructions violates 6735 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6736 // 6737 // To alleviate this problem, we have to (implicitly) parse extra operands 6738 // to make sure autogenerated parser of custom operands never hit hardcoded 6739 // mandatory operands. 6740 6741 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6742 if (res != MatchOperand_Success || 6743 isToken(AsmToken::EndOfStatement)) 6744 break; 6745 6746 trySkipToken(AsmToken::Comma); 6747 res = parseOptionalOpr(Operands); 6748 } 6749 6750 return res; 6751 } 6752 6753 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6754 OperandMatchResultTy res; 6755 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6756 // try to parse any optional operand here 6757 if (Op.IsBit) { 6758 res = parseNamedBit(Op.Name, Operands, Op.Type); 6759 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6760 res = parseOModOperand(Operands); 6761 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6762 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6763 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6764 res = parseSDWASel(Operands, Op.Name, Op.Type); 6765 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6766 res = parseSDWADstUnused(Operands); 6767 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6768 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6769 Op.Type == AMDGPUOperand::ImmTyNegLo || 6770 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6771 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6772 Op.ConvertResult); 6773 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6774 res = parseDim(Operands); 6775 } else { 6776 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6777 } 6778 if (res != MatchOperand_NoMatch) { 6779 return res; 6780 } 6781 } 6782 return MatchOperand_NoMatch; 6783 } 6784 6785 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6786 StringRef Name = Parser.getTok().getString(); 6787 if (Name == "mul") { 6788 return parseIntWithPrefix("mul", Operands, 6789 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6790 } 6791 6792 if (Name == "div") { 6793 return parseIntWithPrefix("div", Operands, 6794 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6795 } 6796 6797 return MatchOperand_NoMatch; 6798 } 6799 6800 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6801 cvtVOP3P(Inst, Operands); 6802 6803 int Opc = Inst.getOpcode(); 6804 6805 int SrcNum; 6806 const int Ops[] = { AMDGPU::OpName::src0, 6807 AMDGPU::OpName::src1, 6808 AMDGPU::OpName::src2 }; 6809 for (SrcNum = 0; 6810 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6811 ++SrcNum); 6812 assert(SrcNum > 0); 6813 6814 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6815 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6816 6817 if ((OpSel & (1 << SrcNum)) != 0) { 6818 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6819 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6820 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6821 } 6822 } 6823 6824 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6825 // 1. This operand is input modifiers 6826 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6827 // 2. This is not last operand 6828 && Desc.NumOperands > (OpNum + 1) 6829 // 3. Next operand is register class 6830 && Desc.OpInfo[OpNum + 1].RegClass != -1 6831 // 4. Next register is not tied to any other operand 6832 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6833 } 6834 6835 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6836 { 6837 OptionalImmIndexMap OptionalIdx; 6838 unsigned Opc = Inst.getOpcode(); 6839 6840 unsigned I = 1; 6841 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6842 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6843 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6844 } 6845 6846 for (unsigned E = Operands.size(); I != E; ++I) { 6847 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6848 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6849 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6850 } else if (Op.isInterpSlot() || 6851 Op.isInterpAttr() || 6852 Op.isAttrChan()) { 6853 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6854 } else if (Op.isImmModifier()) { 6855 OptionalIdx[Op.getImmTy()] = I; 6856 } else { 6857 llvm_unreachable("unhandled operand type"); 6858 } 6859 } 6860 6861 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6862 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6863 } 6864 6865 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6866 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6867 } 6868 6869 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6870 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6871 } 6872 } 6873 6874 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6875 OptionalImmIndexMap &OptionalIdx) { 6876 unsigned Opc = Inst.getOpcode(); 6877 6878 unsigned I = 1; 6879 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6880 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6881 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6882 } 6883 6884 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6885 // This instruction has src modifiers 6886 for (unsigned E = Operands.size(); I != E; ++I) { 6887 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6888 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6889 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6890 } else if (Op.isImmModifier()) { 6891 OptionalIdx[Op.getImmTy()] = I; 6892 } else if (Op.isRegOrImm()) { 6893 Op.addRegOrImmOperands(Inst, 1); 6894 } else { 6895 llvm_unreachable("unhandled operand type"); 6896 } 6897 } 6898 } else { 6899 // No src modifiers 6900 for (unsigned E = Operands.size(); I != E; ++I) { 6901 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6902 if (Op.isMod()) { 6903 OptionalIdx[Op.getImmTy()] = I; 6904 } else { 6905 Op.addRegOrImmOperands(Inst, 1); 6906 } 6907 } 6908 } 6909 6910 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6911 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6912 } 6913 6914 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6916 } 6917 6918 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6919 // it has src2 register operand that is tied to dst operand 6920 // we don't allow modifiers for this operand in assembler so src2_modifiers 6921 // should be 0. 6922 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6923 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6924 Opc == AMDGPU::V_MAC_F32_e64_vi || 6925 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 6926 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 6927 Opc == AMDGPU::V_MAC_F16_e64_vi || 6928 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6929 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6930 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 6931 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6932 auto it = Inst.begin(); 6933 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6934 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6935 ++it; 6936 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6937 } 6938 } 6939 6940 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6941 OptionalImmIndexMap OptionalIdx; 6942 cvtVOP3(Inst, Operands, OptionalIdx); 6943 } 6944 6945 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6946 const OperandVector &Operands) { 6947 OptionalImmIndexMap OptIdx; 6948 const int Opc = Inst.getOpcode(); 6949 const MCInstrDesc &Desc = MII.get(Opc); 6950 6951 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6952 6953 cvtVOP3(Inst, Operands, OptIdx); 6954 6955 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6956 assert(!IsPacked); 6957 Inst.addOperand(Inst.getOperand(0)); 6958 } 6959 6960 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6961 // instruction, and then figure out where to actually put the modifiers 6962 6963 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6964 6965 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6966 if (OpSelHiIdx != -1) { 6967 int DefaultVal = IsPacked ? -1 : 0; 6968 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6969 DefaultVal); 6970 } 6971 6972 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6973 if (NegLoIdx != -1) { 6974 assert(IsPacked); 6975 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6976 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6977 } 6978 6979 const int Ops[] = { AMDGPU::OpName::src0, 6980 AMDGPU::OpName::src1, 6981 AMDGPU::OpName::src2 }; 6982 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6983 AMDGPU::OpName::src1_modifiers, 6984 AMDGPU::OpName::src2_modifiers }; 6985 6986 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6987 6988 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6989 unsigned OpSelHi = 0; 6990 unsigned NegLo = 0; 6991 unsigned NegHi = 0; 6992 6993 if (OpSelHiIdx != -1) { 6994 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6995 } 6996 6997 if (NegLoIdx != -1) { 6998 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6999 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7000 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7001 } 7002 7003 for (int J = 0; J < 3; ++J) { 7004 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7005 if (OpIdx == -1) 7006 break; 7007 7008 uint32_t ModVal = 0; 7009 7010 if ((OpSel & (1 << J)) != 0) 7011 ModVal |= SISrcMods::OP_SEL_0; 7012 7013 if ((OpSelHi & (1 << J)) != 0) 7014 ModVal |= SISrcMods::OP_SEL_1; 7015 7016 if ((NegLo & (1 << J)) != 0) 7017 ModVal |= SISrcMods::NEG; 7018 7019 if ((NegHi & (1 << J)) != 0) 7020 ModVal |= SISrcMods::NEG_HI; 7021 7022 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7023 7024 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7025 } 7026 } 7027 7028 //===----------------------------------------------------------------------===// 7029 // dpp 7030 //===----------------------------------------------------------------------===// 7031 7032 bool AMDGPUOperand::isDPP8() const { 7033 return isImmTy(ImmTyDPP8); 7034 } 7035 7036 bool AMDGPUOperand::isDPPCtrl() const { 7037 using namespace AMDGPU::DPP; 7038 7039 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7040 if (result) { 7041 int64_t Imm = getImm(); 7042 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7043 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7044 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7045 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7046 (Imm == DppCtrl::WAVE_SHL1) || 7047 (Imm == DppCtrl::WAVE_ROL1) || 7048 (Imm == DppCtrl::WAVE_SHR1) || 7049 (Imm == DppCtrl::WAVE_ROR1) || 7050 (Imm == DppCtrl::ROW_MIRROR) || 7051 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7052 (Imm == DppCtrl::BCAST15) || 7053 (Imm == DppCtrl::BCAST31) || 7054 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7055 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7056 } 7057 return false; 7058 } 7059 7060 //===----------------------------------------------------------------------===// 7061 // mAI 7062 //===----------------------------------------------------------------------===// 7063 7064 bool AMDGPUOperand::isBLGP() const { 7065 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7066 } 7067 7068 bool AMDGPUOperand::isCBSZ() const { 7069 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7070 } 7071 7072 bool AMDGPUOperand::isABID() const { 7073 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7074 } 7075 7076 bool AMDGPUOperand::isS16Imm() const { 7077 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7078 } 7079 7080 bool AMDGPUOperand::isU16Imm() const { 7081 return isImm() && isUInt<16>(getImm()); 7082 } 7083 7084 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7085 if (!isGFX10()) 7086 return MatchOperand_NoMatch; 7087 7088 SMLoc S = Parser.getTok().getLoc(); 7089 7090 if (getLexer().isNot(AsmToken::Identifier)) 7091 return MatchOperand_NoMatch; 7092 if (getLexer().getTok().getString() != "dim") 7093 return MatchOperand_NoMatch; 7094 7095 Parser.Lex(); 7096 if (getLexer().isNot(AsmToken::Colon)) 7097 return MatchOperand_ParseFail; 7098 7099 Parser.Lex(); 7100 7101 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7102 // integer. 7103 std::string Token; 7104 if (getLexer().is(AsmToken::Integer)) { 7105 SMLoc Loc = getLexer().getTok().getEndLoc(); 7106 Token = std::string(getLexer().getTok().getString()); 7107 Parser.Lex(); 7108 if (getLexer().getTok().getLoc() != Loc) 7109 return MatchOperand_ParseFail; 7110 } 7111 if (getLexer().isNot(AsmToken::Identifier)) 7112 return MatchOperand_ParseFail; 7113 Token += getLexer().getTok().getString(); 7114 7115 StringRef DimId = Token; 7116 if (DimId.startswith("SQ_RSRC_IMG_")) 7117 DimId = DimId.substr(12); 7118 7119 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7120 if (!DimInfo) 7121 return MatchOperand_ParseFail; 7122 7123 Parser.Lex(); 7124 7125 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7126 AMDGPUOperand::ImmTyDim)); 7127 return MatchOperand_Success; 7128 } 7129 7130 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7131 SMLoc S = Parser.getTok().getLoc(); 7132 StringRef Prefix; 7133 7134 if (getLexer().getKind() == AsmToken::Identifier) { 7135 Prefix = Parser.getTok().getString(); 7136 } else { 7137 return MatchOperand_NoMatch; 7138 } 7139 7140 if (Prefix != "dpp8") 7141 return parseDPPCtrl(Operands); 7142 if (!isGFX10()) 7143 return MatchOperand_NoMatch; 7144 7145 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7146 7147 int64_t Sels[8]; 7148 7149 Parser.Lex(); 7150 if (getLexer().isNot(AsmToken::Colon)) 7151 return MatchOperand_ParseFail; 7152 7153 Parser.Lex(); 7154 if (getLexer().isNot(AsmToken::LBrac)) 7155 return MatchOperand_ParseFail; 7156 7157 Parser.Lex(); 7158 if (getParser().parseAbsoluteExpression(Sels[0])) 7159 return MatchOperand_ParseFail; 7160 if (0 > Sels[0] || 7 < Sels[0]) 7161 return MatchOperand_ParseFail; 7162 7163 for (size_t i = 1; i < 8; ++i) { 7164 if (getLexer().isNot(AsmToken::Comma)) 7165 return MatchOperand_ParseFail; 7166 7167 Parser.Lex(); 7168 if (getParser().parseAbsoluteExpression(Sels[i])) 7169 return MatchOperand_ParseFail; 7170 if (0 > Sels[i] || 7 < Sels[i]) 7171 return MatchOperand_ParseFail; 7172 } 7173 7174 if (getLexer().isNot(AsmToken::RBrac)) 7175 return MatchOperand_ParseFail; 7176 Parser.Lex(); 7177 7178 unsigned DPP8 = 0; 7179 for (size_t i = 0; i < 8; ++i) 7180 DPP8 |= (Sels[i] << (i * 3)); 7181 7182 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7183 return MatchOperand_Success; 7184 } 7185 7186 OperandMatchResultTy 7187 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7188 using namespace AMDGPU::DPP; 7189 7190 SMLoc S = Parser.getTok().getLoc(); 7191 StringRef Prefix; 7192 int64_t Int; 7193 7194 if (getLexer().getKind() == AsmToken::Identifier) { 7195 Prefix = Parser.getTok().getString(); 7196 } else { 7197 return MatchOperand_NoMatch; 7198 } 7199 7200 if (Prefix == "row_mirror") { 7201 Int = DppCtrl::ROW_MIRROR; 7202 Parser.Lex(); 7203 } else if (Prefix == "row_half_mirror") { 7204 Int = DppCtrl::ROW_HALF_MIRROR; 7205 Parser.Lex(); 7206 } else { 7207 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7208 if (Prefix != "quad_perm" 7209 && Prefix != "row_shl" 7210 && Prefix != "row_shr" 7211 && Prefix != "row_ror" 7212 && Prefix != "wave_shl" 7213 && Prefix != "wave_rol" 7214 && Prefix != "wave_shr" 7215 && Prefix != "wave_ror" 7216 && Prefix != "row_bcast" 7217 && Prefix != "row_share" 7218 && Prefix != "row_xmask") { 7219 return MatchOperand_NoMatch; 7220 } 7221 7222 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7223 return MatchOperand_NoMatch; 7224 7225 if (!isVI() && !isGFX9() && 7226 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7227 Prefix == "wave_rol" || Prefix == "wave_ror" || 7228 Prefix == "row_bcast")) 7229 return MatchOperand_NoMatch; 7230 7231 Parser.Lex(); 7232 if (getLexer().isNot(AsmToken::Colon)) 7233 return MatchOperand_ParseFail; 7234 7235 if (Prefix == "quad_perm") { 7236 // quad_perm:[%d,%d,%d,%d] 7237 Parser.Lex(); 7238 if (getLexer().isNot(AsmToken::LBrac)) 7239 return MatchOperand_ParseFail; 7240 Parser.Lex(); 7241 7242 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7243 return MatchOperand_ParseFail; 7244 7245 for (int i = 0; i < 3; ++i) { 7246 if (getLexer().isNot(AsmToken::Comma)) 7247 return MatchOperand_ParseFail; 7248 Parser.Lex(); 7249 7250 int64_t Temp; 7251 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7252 return MatchOperand_ParseFail; 7253 const int shift = i*2 + 2; 7254 Int += (Temp << shift); 7255 } 7256 7257 if (getLexer().isNot(AsmToken::RBrac)) 7258 return MatchOperand_ParseFail; 7259 Parser.Lex(); 7260 } else { 7261 // sel:%d 7262 Parser.Lex(); 7263 if (getParser().parseAbsoluteExpression(Int)) 7264 return MatchOperand_ParseFail; 7265 7266 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7267 Int |= DppCtrl::ROW_SHL0; 7268 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7269 Int |= DppCtrl::ROW_SHR0; 7270 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7271 Int |= DppCtrl::ROW_ROR0; 7272 } else if (Prefix == "wave_shl" && 1 == Int) { 7273 Int = DppCtrl::WAVE_SHL1; 7274 } else if (Prefix == "wave_rol" && 1 == Int) { 7275 Int = DppCtrl::WAVE_ROL1; 7276 } else if (Prefix == "wave_shr" && 1 == Int) { 7277 Int = DppCtrl::WAVE_SHR1; 7278 } else if (Prefix == "wave_ror" && 1 == Int) { 7279 Int = DppCtrl::WAVE_ROR1; 7280 } else if (Prefix == "row_bcast") { 7281 if (Int == 15) { 7282 Int = DppCtrl::BCAST15; 7283 } else if (Int == 31) { 7284 Int = DppCtrl::BCAST31; 7285 } else { 7286 return MatchOperand_ParseFail; 7287 } 7288 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7289 Int |= DppCtrl::ROW_SHARE_FIRST; 7290 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7291 Int |= DppCtrl::ROW_XMASK_FIRST; 7292 } else { 7293 return MatchOperand_ParseFail; 7294 } 7295 } 7296 } 7297 7298 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7299 return MatchOperand_Success; 7300 } 7301 7302 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7303 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7304 } 7305 7306 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7307 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7308 } 7309 7310 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7311 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7312 } 7313 7314 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7315 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7316 } 7317 7318 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7319 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7320 } 7321 7322 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7323 OptionalImmIndexMap OptionalIdx; 7324 7325 unsigned I = 1; 7326 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7327 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7328 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7329 } 7330 7331 int Fi = 0; 7332 for (unsigned E = Operands.size(); I != E; ++I) { 7333 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7334 MCOI::TIED_TO); 7335 if (TiedTo != -1) { 7336 assert((unsigned)TiedTo < Inst.getNumOperands()); 7337 // handle tied old or src2 for MAC instructions 7338 Inst.addOperand(Inst.getOperand(TiedTo)); 7339 } 7340 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7341 // Add the register arguments 7342 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7343 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7344 // Skip it. 7345 continue; 7346 } 7347 7348 if (IsDPP8) { 7349 if (Op.isDPP8()) { 7350 Op.addImmOperands(Inst, 1); 7351 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7352 Op.addRegWithFPInputModsOperands(Inst, 2); 7353 } else if (Op.isFI()) { 7354 Fi = Op.getImm(); 7355 } else if (Op.isReg()) { 7356 Op.addRegOperands(Inst, 1); 7357 } else { 7358 llvm_unreachable("Invalid operand type"); 7359 } 7360 } else { 7361 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7362 Op.addRegWithFPInputModsOperands(Inst, 2); 7363 } else if (Op.isDPPCtrl()) { 7364 Op.addImmOperands(Inst, 1); 7365 } else if (Op.isImm()) { 7366 // Handle optional arguments 7367 OptionalIdx[Op.getImmTy()] = I; 7368 } else { 7369 llvm_unreachable("Invalid operand type"); 7370 } 7371 } 7372 } 7373 7374 if (IsDPP8) { 7375 using namespace llvm::AMDGPU::DPP; 7376 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7377 } else { 7378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7380 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7381 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7383 } 7384 } 7385 } 7386 7387 //===----------------------------------------------------------------------===// 7388 // sdwa 7389 //===----------------------------------------------------------------------===// 7390 7391 OperandMatchResultTy 7392 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7393 AMDGPUOperand::ImmTy Type) { 7394 using namespace llvm::AMDGPU::SDWA; 7395 7396 SMLoc S = Parser.getTok().getLoc(); 7397 StringRef Value; 7398 OperandMatchResultTy res; 7399 7400 res = parseStringWithPrefix(Prefix, Value); 7401 if (res != MatchOperand_Success) { 7402 return res; 7403 } 7404 7405 int64_t Int; 7406 Int = StringSwitch<int64_t>(Value) 7407 .Case("BYTE_0", SdwaSel::BYTE_0) 7408 .Case("BYTE_1", SdwaSel::BYTE_1) 7409 .Case("BYTE_2", SdwaSel::BYTE_2) 7410 .Case("BYTE_3", SdwaSel::BYTE_3) 7411 .Case("WORD_0", SdwaSel::WORD_0) 7412 .Case("WORD_1", SdwaSel::WORD_1) 7413 .Case("DWORD", SdwaSel::DWORD) 7414 .Default(0xffffffff); 7415 Parser.Lex(); // eat last token 7416 7417 if (Int == 0xffffffff) { 7418 return MatchOperand_ParseFail; 7419 } 7420 7421 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7422 return MatchOperand_Success; 7423 } 7424 7425 OperandMatchResultTy 7426 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7427 using namespace llvm::AMDGPU::SDWA; 7428 7429 SMLoc S = Parser.getTok().getLoc(); 7430 StringRef Value; 7431 OperandMatchResultTy res; 7432 7433 res = parseStringWithPrefix("dst_unused", Value); 7434 if (res != MatchOperand_Success) { 7435 return res; 7436 } 7437 7438 int64_t Int; 7439 Int = StringSwitch<int64_t>(Value) 7440 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7441 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7442 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7443 .Default(0xffffffff); 7444 Parser.Lex(); // eat last token 7445 7446 if (Int == 0xffffffff) { 7447 return MatchOperand_ParseFail; 7448 } 7449 7450 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7451 return MatchOperand_Success; 7452 } 7453 7454 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7455 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7456 } 7457 7458 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7459 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7460 } 7461 7462 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7463 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7464 } 7465 7466 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7467 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7468 } 7469 7470 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7471 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7472 } 7473 7474 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7475 uint64_t BasicInstType, 7476 bool SkipDstVcc, 7477 bool SkipSrcVcc) { 7478 using namespace llvm::AMDGPU::SDWA; 7479 7480 OptionalImmIndexMap OptionalIdx; 7481 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7482 bool SkippedVcc = false; 7483 7484 unsigned I = 1; 7485 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7486 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7487 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7488 } 7489 7490 for (unsigned E = Operands.size(); I != E; ++I) { 7491 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7492 if (SkipVcc && !SkippedVcc && Op.isReg() && 7493 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7494 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7495 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7496 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7497 // Skip VCC only if we didn't skip it on previous iteration. 7498 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7499 if (BasicInstType == SIInstrFlags::VOP2 && 7500 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7501 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7502 SkippedVcc = true; 7503 continue; 7504 } else if (BasicInstType == SIInstrFlags::VOPC && 7505 Inst.getNumOperands() == 0) { 7506 SkippedVcc = true; 7507 continue; 7508 } 7509 } 7510 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7511 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7512 } else if (Op.isImm()) { 7513 // Handle optional arguments 7514 OptionalIdx[Op.getImmTy()] = I; 7515 } else { 7516 llvm_unreachable("Invalid operand type"); 7517 } 7518 SkippedVcc = false; 7519 } 7520 7521 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7522 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7523 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7524 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7525 switch (BasicInstType) { 7526 case SIInstrFlags::VOP1: 7527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7528 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7530 } 7531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7533 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7534 break; 7535 7536 case SIInstrFlags::VOP2: 7537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7538 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7540 } 7541 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7543 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7544 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7545 break; 7546 7547 case SIInstrFlags::VOPC: 7548 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7552 break; 7553 7554 default: 7555 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7556 } 7557 } 7558 7559 // special case v_mac_{f16, f32}: 7560 // it has src2 register operand that is tied to dst operand 7561 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7562 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7563 auto it = Inst.begin(); 7564 std::advance( 7565 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7566 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7567 } 7568 } 7569 7570 //===----------------------------------------------------------------------===// 7571 // mAI 7572 //===----------------------------------------------------------------------===// 7573 7574 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7575 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7576 } 7577 7578 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7579 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7580 } 7581 7582 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7583 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7584 } 7585 7586 /// Force static initialization. 7587 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7588 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7589 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7590 } 7591 7592 #define GET_REGISTER_MATCHER 7593 #define GET_MATCHER_IMPLEMENTATION 7594 #define GET_MNEMONIC_SPELL_CHECKER 7595 #define GET_MNEMONIC_CHECKER 7596 #include "AMDGPUGenAsmMatcher.inc" 7597 7598 // This fuction should be defined after auto-generated include so that we have 7599 // MatchClassKind enum defined 7600 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7601 unsigned Kind) { 7602 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7603 // But MatchInstructionImpl() expects to meet token and fails to validate 7604 // operand. This method checks if we are given immediate operand but expect to 7605 // get corresponding token. 7606 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7607 switch (Kind) { 7608 case MCK_addr64: 7609 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7610 case MCK_gds: 7611 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7612 case MCK_lds: 7613 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7614 case MCK_glc: 7615 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7616 case MCK_idxen: 7617 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7618 case MCK_offen: 7619 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7620 case MCK_SSrcB32: 7621 // When operands have expression values, they will return true for isToken, 7622 // because it is not possible to distinguish between a token and an 7623 // expression at parse time. MatchInstructionImpl() will always try to 7624 // match an operand as a token, when isToken returns true, and when the 7625 // name of the expression is not a valid token, the match will fail, 7626 // so we need to handle it here. 7627 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7628 case MCK_SSrcF32: 7629 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7630 case MCK_SoppBrTarget: 7631 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7632 case MCK_VReg32OrOff: 7633 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7634 case MCK_InterpSlot: 7635 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7636 case MCK_Attr: 7637 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7638 case MCK_AttrChan: 7639 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7640 case MCK_ImmSMEMOffset: 7641 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7642 case MCK_SReg_64: 7643 case MCK_SReg_64_XEXEC: 7644 // Null is defined as a 32-bit register but 7645 // it should also be enabled with 64-bit operands. 7646 // The following code enables it for SReg_64 operands 7647 // used as source and destination. Remaining source 7648 // operands are handled in isInlinableImm. 7649 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7650 default: 7651 return Match_InvalidOperand; 7652 } 7653 } 7654 7655 //===----------------------------------------------------------------------===// 7656 // endpgm 7657 //===----------------------------------------------------------------------===// 7658 7659 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7660 SMLoc S = Parser.getTok().getLoc(); 7661 int64_t Imm = 0; 7662 7663 if (!parseExpr(Imm)) { 7664 // The operand is optional, if not present default to 0 7665 Imm = 0; 7666 } 7667 7668 if (!isUInt<16>(Imm)) { 7669 Error(S, "expected a 16-bit value"); 7670 return MatchOperand_ParseFail; 7671 } 7672 7673 Operands.push_back( 7674 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7675 return MatchOperand_Success; 7676 } 7677 7678 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7679