1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth, 1092 SMLoc Loc); 1093 1094 bool isRegister(); 1095 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1096 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1097 void initializeGprCountSymbol(RegisterKind RegKind); 1098 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1099 unsigned RegWidth); 1100 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1101 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1102 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1103 bool IsGdsHardcoded); 1104 1105 public: 1106 enum AMDGPUMatchResultTy { 1107 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1108 }; 1109 enum OperandMode { 1110 OperandMode_Default, 1111 OperandMode_NSA, 1112 }; 1113 1114 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1115 1116 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1117 const MCInstrInfo &MII, 1118 const MCTargetOptions &Options) 1119 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1120 MCAsmParserExtension::Initialize(Parser); 1121 1122 if (getFeatureBits().none()) { 1123 // Set default features. 1124 copySTI().ToggleFeature("southern-islands"); 1125 } 1126 1127 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1128 1129 { 1130 // TODO: make those pre-defined variables read-only. 1131 // Currently there is none suitable machinery in the core llvm-mc for this. 1132 // MCSymbol::isRedefinable is intended for another purpose, and 1133 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1135 MCContext &Ctx = getContext(); 1136 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1137 MCSymbol *Sym = 1138 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1144 } else { 1145 MCSymbol *Sym = 1146 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1147 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1148 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1149 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1150 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1151 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1152 } 1153 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1154 initializeGprCountSymbol(IS_VGPR); 1155 initializeGprCountSymbol(IS_SGPR); 1156 } else 1157 KernelScope.initialize(getContext()); 1158 } 1159 } 1160 1161 bool hasXNACK() const { 1162 return AMDGPU::hasXNACK(getSTI()); 1163 } 1164 1165 bool hasMIMG_R128() const { 1166 return AMDGPU::hasMIMG_R128(getSTI()); 1167 } 1168 1169 bool hasPackedD16() const { 1170 return AMDGPU::hasPackedD16(getSTI()); 1171 } 1172 1173 bool hasGFX10A16() const { 1174 return AMDGPU::hasGFX10A16(getSTI()); 1175 } 1176 1177 bool isSI() const { 1178 return AMDGPU::isSI(getSTI()); 1179 } 1180 1181 bool isCI() const { 1182 return AMDGPU::isCI(getSTI()); 1183 } 1184 1185 bool isVI() const { 1186 return AMDGPU::isVI(getSTI()); 1187 } 1188 1189 bool isGFX9() const { 1190 return AMDGPU::isGFX9(getSTI()); 1191 } 1192 1193 bool isGFX9Plus() const { 1194 return AMDGPU::isGFX9Plus(getSTI()); 1195 } 1196 1197 bool isGFX10() const { 1198 return AMDGPU::isGFX10(getSTI()); 1199 } 1200 1201 bool isGFX10_BEncoding() const { 1202 return AMDGPU::isGFX10_BEncoding(getSTI()); 1203 } 1204 1205 bool hasInv2PiInlineImm() const { 1206 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1207 } 1208 1209 bool hasFlatOffsets() const { 1210 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1211 } 1212 1213 bool hasSGPR102_SGPR103() const { 1214 return !isVI() && !isGFX9(); 1215 } 1216 1217 bool hasSGPR104_SGPR105() const { 1218 return isGFX10(); 1219 } 1220 1221 bool hasIntClamp() const { 1222 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1223 } 1224 1225 AMDGPUTargetStreamer &getTargetStreamer() { 1226 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1227 return static_cast<AMDGPUTargetStreamer &>(TS); 1228 } 1229 1230 const MCRegisterInfo *getMRI() const { 1231 // We need this const_cast because for some reason getContext() is not const 1232 // in MCAsmParser. 1233 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1234 } 1235 1236 const MCInstrInfo *getMII() const { 1237 return &MII; 1238 } 1239 1240 const FeatureBitset &getFeatureBits() const { 1241 return getSTI().getFeatureBits(); 1242 } 1243 1244 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1245 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1246 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1247 1248 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1249 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1250 bool isForcedDPP() const { return ForcedDPP; } 1251 bool isForcedSDWA() const { return ForcedSDWA; } 1252 ArrayRef<unsigned> getMatchedVariants() const; 1253 StringRef getMatchedVariantName() const; 1254 1255 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1256 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1257 bool RestoreOnFailure); 1258 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1259 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1260 SMLoc &EndLoc) override; 1261 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1262 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1263 unsigned Kind) override; 1264 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1265 OperandVector &Operands, MCStreamer &Out, 1266 uint64_t &ErrorInfo, 1267 bool MatchingInlineAsm) override; 1268 bool ParseDirective(AsmToken DirectiveID) override; 1269 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1270 OperandMode Mode = OperandMode_Default); 1271 StringRef parseMnemonicSuffix(StringRef Name); 1272 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1273 SMLoc NameLoc, OperandVector &Operands) override; 1274 //bool ProcessInstruction(MCInst &Inst); 1275 1276 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1277 1278 OperandMatchResultTy 1279 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1281 bool (*ConvertResult)(int64_t &) = nullptr); 1282 1283 OperandMatchResultTy 1284 parseOperandArrayWithPrefix(const char *Prefix, 1285 OperandVector &Operands, 1286 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1287 bool (*ConvertResult)(int64_t&) = nullptr); 1288 1289 OperandMatchResultTy 1290 parseNamedBit(const char *Name, OperandVector &Operands, 1291 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1292 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1293 StringRef &Value); 1294 1295 bool isModifier(); 1296 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1297 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1298 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1299 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1300 bool parseSP3NegModifier(); 1301 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1302 OperandMatchResultTy parseReg(OperandVector &Operands); 1303 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1304 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1305 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1306 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1307 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1308 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1309 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1310 OperandMatchResultTy parseUfmt(int64_t &Format); 1311 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1312 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1313 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1314 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1315 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1316 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1317 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1318 1319 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1320 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1321 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1322 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1323 1324 bool parseCnt(int64_t &IntVal); 1325 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1326 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1327 1328 private: 1329 struct OperandInfoTy { 1330 int64_t Id; 1331 bool IsSymbolic = false; 1332 bool IsDefined = false; 1333 1334 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1335 }; 1336 1337 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1338 bool validateSendMsg(const OperandInfoTy &Msg, 1339 const OperandInfoTy &Op, 1340 const OperandInfoTy &Stream, 1341 const SMLoc Loc); 1342 1343 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1344 bool validateHwreg(const OperandInfoTy &HwReg, 1345 const int64_t Offset, 1346 const int64_t Width, 1347 const SMLoc Loc); 1348 1349 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1350 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1351 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1352 1353 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1354 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1355 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1356 bool validateSOPLiteral(const MCInst &Inst) const; 1357 bool validateConstantBusLimitations(const MCInst &Inst); 1358 bool validateEarlyClobberLimitations(const MCInst &Inst); 1359 bool validateIntClampSupported(const MCInst &Inst); 1360 bool validateMIMGAtomicDMask(const MCInst &Inst); 1361 bool validateMIMGGatherDMask(const MCInst &Inst); 1362 bool validateMovrels(const MCInst &Inst); 1363 bool validateMIMGDataSize(const MCInst &Inst); 1364 bool validateMIMGAddrSize(const MCInst &Inst); 1365 bool validateMIMGD16(const MCInst &Inst); 1366 bool validateMIMGDim(const MCInst &Inst); 1367 bool validateLdsDirect(const MCInst &Inst); 1368 bool validateOpSel(const MCInst &Inst); 1369 bool validateVccOperand(unsigned Reg) const; 1370 bool validateVOP3Literal(const MCInst &Inst) const; 1371 bool validateMAIAccWrite(const MCInst &Inst); 1372 unsigned getConstantBusLimit(unsigned Opcode) const; 1373 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1374 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1375 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1376 1377 bool isSupportedMnemo(StringRef Mnemo, 1378 const FeatureBitset &FBS); 1379 bool isSupportedMnemo(StringRef Mnemo, 1380 const FeatureBitset &FBS, 1381 ArrayRef<unsigned> Variants); 1382 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1383 1384 bool isId(const StringRef Id) const; 1385 bool isId(const AsmToken &Token, const StringRef Id) const; 1386 bool isToken(const AsmToken::TokenKind Kind) const; 1387 bool trySkipId(const StringRef Id); 1388 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1389 bool trySkipToken(const AsmToken::TokenKind Kind); 1390 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1391 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1392 bool parseId(StringRef &Val, const StringRef ErrMsg); 1393 1394 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1395 AsmToken::TokenKind getTokenKind() const; 1396 bool parseExpr(int64_t &Imm); 1397 bool parseExpr(OperandVector &Operands); 1398 StringRef getTokenStr() const; 1399 AsmToken peekToken(); 1400 AsmToken getToken() const; 1401 SMLoc getLoc() const; 1402 void lex(); 1403 1404 public: 1405 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1406 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1407 1408 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1409 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1410 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1411 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1412 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1413 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1414 1415 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1416 const unsigned MinVal, 1417 const unsigned MaxVal, 1418 const StringRef ErrMsg); 1419 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1420 bool parseSwizzleOffset(int64_t &Imm); 1421 bool parseSwizzleMacro(int64_t &Imm); 1422 bool parseSwizzleQuadPerm(int64_t &Imm); 1423 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1424 bool parseSwizzleBroadcast(int64_t &Imm); 1425 bool parseSwizzleSwap(int64_t &Imm); 1426 bool parseSwizzleReverse(int64_t &Imm); 1427 1428 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1429 int64_t parseGPRIdxMacro(); 1430 1431 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1432 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1433 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1434 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1435 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1436 1437 AMDGPUOperand::Ptr defaultDLC() const; 1438 AMDGPUOperand::Ptr defaultGLC() const; 1439 AMDGPUOperand::Ptr defaultSLC() const; 1440 1441 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1442 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1443 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1444 AMDGPUOperand::Ptr defaultFlatOffset() const; 1445 1446 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1447 1448 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1449 OptionalImmIndexMap &OptionalIdx); 1450 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1451 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1452 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1453 1454 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1455 1456 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1457 bool IsAtomic = false); 1458 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1459 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1460 1461 OperandMatchResultTy parseDim(OperandVector &Operands); 1462 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1463 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1464 AMDGPUOperand::Ptr defaultRowMask() const; 1465 AMDGPUOperand::Ptr defaultBankMask() const; 1466 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1467 AMDGPUOperand::Ptr defaultFI() const; 1468 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1469 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1470 1471 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1472 AMDGPUOperand::ImmTy Type); 1473 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1474 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1475 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1476 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1477 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1478 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1479 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1480 uint64_t BasicInstType, 1481 bool SkipDstVcc = false, 1482 bool SkipSrcVcc = false); 1483 1484 AMDGPUOperand::Ptr defaultBLGP() const; 1485 AMDGPUOperand::Ptr defaultCBSZ() const; 1486 AMDGPUOperand::Ptr defaultABID() const; 1487 1488 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1489 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1490 }; 1491 1492 struct OptionalOperand { 1493 const char *Name; 1494 AMDGPUOperand::ImmTy Type; 1495 bool IsBit; 1496 bool (*ConvertResult)(int64_t&); 1497 }; 1498 1499 } // end anonymous namespace 1500 1501 // May be called with integer type with equivalent bitwidth. 1502 static const fltSemantics *getFltSemantics(unsigned Size) { 1503 switch (Size) { 1504 case 4: 1505 return &APFloat::IEEEsingle(); 1506 case 8: 1507 return &APFloat::IEEEdouble(); 1508 case 2: 1509 return &APFloat::IEEEhalf(); 1510 default: 1511 llvm_unreachable("unsupported fp type"); 1512 } 1513 } 1514 1515 static const fltSemantics *getFltSemantics(MVT VT) { 1516 return getFltSemantics(VT.getSizeInBits() / 8); 1517 } 1518 1519 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1520 switch (OperandType) { 1521 case AMDGPU::OPERAND_REG_IMM_INT32: 1522 case AMDGPU::OPERAND_REG_IMM_FP32: 1523 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1524 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1525 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1527 return &APFloat::IEEEsingle(); 1528 case AMDGPU::OPERAND_REG_IMM_INT64: 1529 case AMDGPU::OPERAND_REG_IMM_FP64: 1530 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1531 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1532 return &APFloat::IEEEdouble(); 1533 case AMDGPU::OPERAND_REG_IMM_INT16: 1534 case AMDGPU::OPERAND_REG_IMM_FP16: 1535 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1536 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1537 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1538 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1539 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1540 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1541 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1542 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1543 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1544 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1545 return &APFloat::IEEEhalf(); 1546 default: 1547 llvm_unreachable("unsupported fp type"); 1548 } 1549 } 1550 1551 //===----------------------------------------------------------------------===// 1552 // Operand 1553 //===----------------------------------------------------------------------===// 1554 1555 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1556 bool Lost; 1557 1558 // Convert literal to single precision 1559 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1560 APFloat::rmNearestTiesToEven, 1561 &Lost); 1562 // We allow precision lost but not overflow or underflow 1563 if (Status != APFloat::opOK && 1564 Lost && 1565 ((Status & APFloat::opOverflow) != 0 || 1566 (Status & APFloat::opUnderflow) != 0)) { 1567 return false; 1568 } 1569 1570 return true; 1571 } 1572 1573 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1574 return isUIntN(Size, Val) || isIntN(Size, Val); 1575 } 1576 1577 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1578 if (VT.getScalarType() == MVT::i16) { 1579 // FP immediate values are broken. 1580 return isInlinableIntLiteral(Val); 1581 } 1582 1583 // f16/v2f16 operands work correctly for all values. 1584 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1585 } 1586 1587 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1588 1589 // This is a hack to enable named inline values like 1590 // shared_base with both 32-bit and 64-bit operands. 1591 // Note that these values are defined as 1592 // 32-bit operands only. 1593 if (isInlineValue()) { 1594 return true; 1595 } 1596 1597 if (!isImmTy(ImmTyNone)) { 1598 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1599 return false; 1600 } 1601 // TODO: We should avoid using host float here. It would be better to 1602 // check the float bit values which is what a few other places do. 1603 // We've had bot failures before due to weird NaN support on mips hosts. 1604 1605 APInt Literal(64, Imm.Val); 1606 1607 if (Imm.IsFPImm) { // We got fp literal token 1608 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1609 return AMDGPU::isInlinableLiteral64(Imm.Val, 1610 AsmParser->hasInv2PiInlineImm()); 1611 } 1612 1613 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1614 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1615 return false; 1616 1617 if (type.getScalarSizeInBits() == 16) { 1618 return isInlineableLiteralOp16( 1619 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1620 type, AsmParser->hasInv2PiInlineImm()); 1621 } 1622 1623 // Check if single precision literal is inlinable 1624 return AMDGPU::isInlinableLiteral32( 1625 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1626 AsmParser->hasInv2PiInlineImm()); 1627 } 1628 1629 // We got int literal token. 1630 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1631 return AMDGPU::isInlinableLiteral64(Imm.Val, 1632 AsmParser->hasInv2PiInlineImm()); 1633 } 1634 1635 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1636 return false; 1637 } 1638 1639 if (type.getScalarSizeInBits() == 16) { 1640 return isInlineableLiteralOp16( 1641 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1642 type, AsmParser->hasInv2PiInlineImm()); 1643 } 1644 1645 return AMDGPU::isInlinableLiteral32( 1646 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1647 AsmParser->hasInv2PiInlineImm()); 1648 } 1649 1650 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1651 // Check that this immediate can be added as literal 1652 if (!isImmTy(ImmTyNone)) { 1653 return false; 1654 } 1655 1656 if (!Imm.IsFPImm) { 1657 // We got int literal token. 1658 1659 if (type == MVT::f64 && hasFPModifiers()) { 1660 // Cannot apply fp modifiers to int literals preserving the same semantics 1661 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1662 // disable these cases. 1663 return false; 1664 } 1665 1666 unsigned Size = type.getSizeInBits(); 1667 if (Size == 64) 1668 Size = 32; 1669 1670 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1671 // types. 1672 return isSafeTruncation(Imm.Val, Size); 1673 } 1674 1675 // We got fp literal token 1676 if (type == MVT::f64) { // Expected 64-bit fp operand 1677 // We would set low 64-bits of literal to zeroes but we accept this literals 1678 return true; 1679 } 1680 1681 if (type == MVT::i64) { // Expected 64-bit int operand 1682 // We don't allow fp literals in 64-bit integer instructions. It is 1683 // unclear how we should encode them. 1684 return false; 1685 } 1686 1687 // We allow fp literals with f16x2 operands assuming that the specified 1688 // literal goes into the lower half and the upper half is zero. We also 1689 // require that the literal may be losslesly converted to f16. 1690 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1691 (type == MVT::v2i16)? MVT::i16 : type; 1692 1693 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1694 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1695 } 1696 1697 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1698 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1699 } 1700 1701 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1702 if (AsmParser->isVI()) 1703 return isVReg32(); 1704 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1705 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1706 else 1707 return false; 1708 } 1709 1710 bool AMDGPUOperand::isSDWAFP16Operand() const { 1711 return isSDWAOperand(MVT::f16); 1712 } 1713 1714 bool AMDGPUOperand::isSDWAFP32Operand() const { 1715 return isSDWAOperand(MVT::f32); 1716 } 1717 1718 bool AMDGPUOperand::isSDWAInt16Operand() const { 1719 return isSDWAOperand(MVT::i16); 1720 } 1721 1722 bool AMDGPUOperand::isSDWAInt32Operand() const { 1723 return isSDWAOperand(MVT::i32); 1724 } 1725 1726 bool AMDGPUOperand::isBoolReg() const { 1727 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1728 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1729 } 1730 1731 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1732 { 1733 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1734 assert(Size == 2 || Size == 4 || Size == 8); 1735 1736 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1737 1738 if (Imm.Mods.Abs) { 1739 Val &= ~FpSignMask; 1740 } 1741 if (Imm.Mods.Neg) { 1742 Val ^= FpSignMask; 1743 } 1744 1745 return Val; 1746 } 1747 1748 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1749 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1750 Inst.getNumOperands())) { 1751 addLiteralImmOperand(Inst, Imm.Val, 1752 ApplyModifiers & 1753 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1754 } else { 1755 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1756 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1757 } 1758 } 1759 1760 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1761 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1762 auto OpNum = Inst.getNumOperands(); 1763 // Check that this operand accepts literals 1764 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1765 1766 if (ApplyModifiers) { 1767 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1768 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1769 Val = applyInputFPModifiers(Val, Size); 1770 } 1771 1772 APInt Literal(64, Val); 1773 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1774 1775 if (Imm.IsFPImm) { // We got fp literal token 1776 switch (OpTy) { 1777 case AMDGPU::OPERAND_REG_IMM_INT64: 1778 case AMDGPU::OPERAND_REG_IMM_FP64: 1779 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1780 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1781 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1782 AsmParser->hasInv2PiInlineImm())) { 1783 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1784 return; 1785 } 1786 1787 // Non-inlineable 1788 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1789 // For fp operands we check if low 32 bits are zeros 1790 if (Literal.getLoBits(32) != 0) { 1791 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1792 "Can't encode literal as exact 64-bit floating-point operand. " 1793 "Low 32-bits will be set to zero"); 1794 } 1795 1796 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1797 return; 1798 } 1799 1800 // We don't allow fp literals in 64-bit integer instructions. It is 1801 // unclear how we should encode them. This case should be checked earlier 1802 // in predicate methods (isLiteralImm()) 1803 llvm_unreachable("fp literal in 64-bit integer instruction."); 1804 1805 case AMDGPU::OPERAND_REG_IMM_INT32: 1806 case AMDGPU::OPERAND_REG_IMM_FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1808 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1809 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1811 case AMDGPU::OPERAND_REG_IMM_INT16: 1812 case AMDGPU::OPERAND_REG_IMM_FP16: 1813 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1814 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1816 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1817 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1821 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1822 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1823 bool lost; 1824 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1825 // Convert literal to single precision 1826 FPLiteral.convert(*getOpFltSemantics(OpTy), 1827 APFloat::rmNearestTiesToEven, &lost); 1828 // We allow precision lost but not overflow or underflow. This should be 1829 // checked earlier in isLiteralImm() 1830 1831 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1832 Inst.addOperand(MCOperand::createImm(ImmVal)); 1833 return; 1834 } 1835 default: 1836 llvm_unreachable("invalid operand size"); 1837 } 1838 1839 return; 1840 } 1841 1842 // We got int literal token. 1843 // Only sign extend inline immediates. 1844 switch (OpTy) { 1845 case AMDGPU::OPERAND_REG_IMM_INT32: 1846 case AMDGPU::OPERAND_REG_IMM_FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1848 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1851 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1852 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1853 if (isSafeTruncation(Val, 32) && 1854 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1855 AsmParser->hasInv2PiInlineImm())) { 1856 Inst.addOperand(MCOperand::createImm(Val)); 1857 return; 1858 } 1859 1860 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1861 return; 1862 1863 case AMDGPU::OPERAND_REG_IMM_INT64: 1864 case AMDGPU::OPERAND_REG_IMM_FP64: 1865 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1866 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1867 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1868 Inst.addOperand(MCOperand::createImm(Val)); 1869 return; 1870 } 1871 1872 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1873 return; 1874 1875 case AMDGPU::OPERAND_REG_IMM_INT16: 1876 case AMDGPU::OPERAND_REG_IMM_FP16: 1877 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1878 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1879 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1881 if (isSafeTruncation(Val, 16) && 1882 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1883 AsmParser->hasInv2PiInlineImm())) { 1884 Inst.addOperand(MCOperand::createImm(Val)); 1885 return; 1886 } 1887 1888 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1889 return; 1890 1891 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1892 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1893 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1894 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1895 assert(isSafeTruncation(Val, 16)); 1896 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1897 AsmParser->hasInv2PiInlineImm())); 1898 1899 Inst.addOperand(MCOperand::createImm(Val)); 1900 return; 1901 } 1902 default: 1903 llvm_unreachable("invalid operand size"); 1904 } 1905 } 1906 1907 template <unsigned Bitwidth> 1908 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1909 APInt Literal(64, Imm.Val); 1910 1911 if (!Imm.IsFPImm) { 1912 // We got int literal token. 1913 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1914 return; 1915 } 1916 1917 bool Lost; 1918 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1919 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1920 APFloat::rmNearestTiesToEven, &Lost); 1921 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1922 } 1923 1924 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1925 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1926 } 1927 1928 static bool isInlineValue(unsigned Reg) { 1929 switch (Reg) { 1930 case AMDGPU::SRC_SHARED_BASE: 1931 case AMDGPU::SRC_SHARED_LIMIT: 1932 case AMDGPU::SRC_PRIVATE_BASE: 1933 case AMDGPU::SRC_PRIVATE_LIMIT: 1934 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1935 return true; 1936 case AMDGPU::SRC_VCCZ: 1937 case AMDGPU::SRC_EXECZ: 1938 case AMDGPU::SRC_SCC: 1939 return true; 1940 case AMDGPU::SGPR_NULL: 1941 return true; 1942 default: 1943 return false; 1944 } 1945 } 1946 1947 bool AMDGPUOperand::isInlineValue() const { 1948 return isRegKind() && ::isInlineValue(getReg()); 1949 } 1950 1951 //===----------------------------------------------------------------------===// 1952 // AsmParser 1953 //===----------------------------------------------------------------------===// 1954 1955 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1956 if (Is == IS_VGPR) { 1957 switch (RegWidth) { 1958 default: return -1; 1959 case 1: return AMDGPU::VGPR_32RegClassID; 1960 case 2: return AMDGPU::VReg_64RegClassID; 1961 case 3: return AMDGPU::VReg_96RegClassID; 1962 case 4: return AMDGPU::VReg_128RegClassID; 1963 case 5: return AMDGPU::VReg_160RegClassID; 1964 case 6: return AMDGPU::VReg_192RegClassID; 1965 case 8: return AMDGPU::VReg_256RegClassID; 1966 case 16: return AMDGPU::VReg_512RegClassID; 1967 case 32: return AMDGPU::VReg_1024RegClassID; 1968 } 1969 } else if (Is == IS_TTMP) { 1970 switch (RegWidth) { 1971 default: return -1; 1972 case 1: return AMDGPU::TTMP_32RegClassID; 1973 case 2: return AMDGPU::TTMP_64RegClassID; 1974 case 4: return AMDGPU::TTMP_128RegClassID; 1975 case 8: return AMDGPU::TTMP_256RegClassID; 1976 case 16: return AMDGPU::TTMP_512RegClassID; 1977 } 1978 } else if (Is == IS_SGPR) { 1979 switch (RegWidth) { 1980 default: return -1; 1981 case 1: return AMDGPU::SGPR_32RegClassID; 1982 case 2: return AMDGPU::SGPR_64RegClassID; 1983 case 3: return AMDGPU::SGPR_96RegClassID; 1984 case 4: return AMDGPU::SGPR_128RegClassID; 1985 case 5: return AMDGPU::SGPR_160RegClassID; 1986 case 6: return AMDGPU::SGPR_192RegClassID; 1987 case 8: return AMDGPU::SGPR_256RegClassID; 1988 case 16: return AMDGPU::SGPR_512RegClassID; 1989 } 1990 } else if (Is == IS_AGPR) { 1991 switch (RegWidth) { 1992 default: return -1; 1993 case 1: return AMDGPU::AGPR_32RegClassID; 1994 case 2: return AMDGPU::AReg_64RegClassID; 1995 case 3: return AMDGPU::AReg_96RegClassID; 1996 case 4: return AMDGPU::AReg_128RegClassID; 1997 case 5: return AMDGPU::AReg_160RegClassID; 1998 case 6: return AMDGPU::AReg_192RegClassID; 1999 case 8: return AMDGPU::AReg_256RegClassID; 2000 case 16: return AMDGPU::AReg_512RegClassID; 2001 case 32: return AMDGPU::AReg_1024RegClassID; 2002 } 2003 } 2004 return -1; 2005 } 2006 2007 static unsigned getSpecialRegForName(StringRef RegName) { 2008 return StringSwitch<unsigned>(RegName) 2009 .Case("exec", AMDGPU::EXEC) 2010 .Case("vcc", AMDGPU::VCC) 2011 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2012 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2013 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2014 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2015 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2016 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2017 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2018 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2019 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2020 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2021 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2022 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2023 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2024 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2025 .Case("m0", AMDGPU::M0) 2026 .Case("vccz", AMDGPU::SRC_VCCZ) 2027 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2028 .Case("execz", AMDGPU::SRC_EXECZ) 2029 .Case("src_execz", AMDGPU::SRC_EXECZ) 2030 .Case("scc", AMDGPU::SRC_SCC) 2031 .Case("src_scc", AMDGPU::SRC_SCC) 2032 .Case("tba", AMDGPU::TBA) 2033 .Case("tma", AMDGPU::TMA) 2034 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2035 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2036 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2037 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2038 .Case("vcc_lo", AMDGPU::VCC_LO) 2039 .Case("vcc_hi", AMDGPU::VCC_HI) 2040 .Case("exec_lo", AMDGPU::EXEC_LO) 2041 .Case("exec_hi", AMDGPU::EXEC_HI) 2042 .Case("tma_lo", AMDGPU::TMA_LO) 2043 .Case("tma_hi", AMDGPU::TMA_HI) 2044 .Case("tba_lo", AMDGPU::TBA_LO) 2045 .Case("tba_hi", AMDGPU::TBA_HI) 2046 .Case("pc", AMDGPU::PC_REG) 2047 .Case("null", AMDGPU::SGPR_NULL) 2048 .Default(AMDGPU::NoRegister); 2049 } 2050 2051 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2052 SMLoc &EndLoc, bool RestoreOnFailure) { 2053 auto R = parseRegister(); 2054 if (!R) return true; 2055 assert(R->isReg()); 2056 RegNo = R->getReg(); 2057 StartLoc = R->getStartLoc(); 2058 EndLoc = R->getEndLoc(); 2059 return false; 2060 } 2061 2062 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2063 SMLoc &EndLoc) { 2064 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2065 } 2066 2067 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2068 SMLoc &StartLoc, 2069 SMLoc &EndLoc) { 2070 bool Result = 2071 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2072 bool PendingErrors = getParser().hasPendingError(); 2073 getParser().clearPendingErrors(); 2074 if (PendingErrors) 2075 return MatchOperand_ParseFail; 2076 if (Result) 2077 return MatchOperand_NoMatch; 2078 return MatchOperand_Success; 2079 } 2080 2081 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2082 RegisterKind RegKind, unsigned Reg1, 2083 SMLoc Loc) { 2084 switch (RegKind) { 2085 case IS_SPECIAL: 2086 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2087 Reg = AMDGPU::EXEC; 2088 RegWidth = 2; 2089 return true; 2090 } 2091 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2092 Reg = AMDGPU::FLAT_SCR; 2093 RegWidth = 2; 2094 return true; 2095 } 2096 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2097 Reg = AMDGPU::XNACK_MASK; 2098 RegWidth = 2; 2099 return true; 2100 } 2101 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2102 Reg = AMDGPU::VCC; 2103 RegWidth = 2; 2104 return true; 2105 } 2106 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2107 Reg = AMDGPU::TBA; 2108 RegWidth = 2; 2109 return true; 2110 } 2111 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2112 Reg = AMDGPU::TMA; 2113 RegWidth = 2; 2114 return true; 2115 } 2116 Error(Loc, "register does not fit in the list"); 2117 return false; 2118 case IS_VGPR: 2119 case IS_SGPR: 2120 case IS_AGPR: 2121 case IS_TTMP: 2122 if (Reg1 != Reg + RegWidth) { 2123 Error(Loc, "registers in a list must have consecutive indices"); 2124 return false; 2125 } 2126 RegWidth++; 2127 return true; 2128 default: 2129 llvm_unreachable("unexpected register kind"); 2130 } 2131 } 2132 2133 struct RegInfo { 2134 StringLiteral Name; 2135 RegisterKind Kind; 2136 }; 2137 2138 static constexpr RegInfo RegularRegisters[] = { 2139 {{"v"}, IS_VGPR}, 2140 {{"s"}, IS_SGPR}, 2141 {{"ttmp"}, IS_TTMP}, 2142 {{"acc"}, IS_AGPR}, 2143 {{"a"}, IS_AGPR}, 2144 }; 2145 2146 static bool isRegularReg(RegisterKind Kind) { 2147 return Kind == IS_VGPR || 2148 Kind == IS_SGPR || 2149 Kind == IS_TTMP || 2150 Kind == IS_AGPR; 2151 } 2152 2153 static const RegInfo* getRegularRegInfo(StringRef Str) { 2154 for (const RegInfo &Reg : RegularRegisters) 2155 if (Str.startswith(Reg.Name)) 2156 return &Reg; 2157 return nullptr; 2158 } 2159 2160 static bool getRegNum(StringRef Str, unsigned& Num) { 2161 return !Str.getAsInteger(10, Num); 2162 } 2163 2164 bool 2165 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2166 const AsmToken &NextToken) const { 2167 2168 // A list of consecutive registers: [s0,s1,s2,s3] 2169 if (Token.is(AsmToken::LBrac)) 2170 return true; 2171 2172 if (!Token.is(AsmToken::Identifier)) 2173 return false; 2174 2175 // A single register like s0 or a range of registers like s[0:1] 2176 2177 StringRef Str = Token.getString(); 2178 const RegInfo *Reg = getRegularRegInfo(Str); 2179 if (Reg) { 2180 StringRef RegName = Reg->Name; 2181 StringRef RegSuffix = Str.substr(RegName.size()); 2182 if (!RegSuffix.empty()) { 2183 unsigned Num; 2184 // A single register with an index: rXX 2185 if (getRegNum(RegSuffix, Num)) 2186 return true; 2187 } else { 2188 // A range of registers: r[XX:YY]. 2189 if (NextToken.is(AsmToken::LBrac)) 2190 return true; 2191 } 2192 } 2193 2194 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2195 } 2196 2197 bool 2198 AMDGPUAsmParser::isRegister() 2199 { 2200 return isRegister(getToken(), peekToken()); 2201 } 2202 2203 unsigned 2204 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2205 unsigned RegNum, 2206 unsigned RegWidth, 2207 SMLoc Loc) { 2208 2209 assert(isRegularReg(RegKind)); 2210 2211 unsigned AlignSize = 1; 2212 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2213 // SGPR and TTMP registers must be aligned. 2214 // Max required alignment is 4 dwords. 2215 AlignSize = std::min(RegWidth, 4u); 2216 } 2217 2218 if (RegNum % AlignSize != 0) { 2219 Error(Loc, "invalid register alignment"); 2220 return AMDGPU::NoRegister; 2221 } 2222 2223 unsigned RegIdx = RegNum / AlignSize; 2224 int RCID = getRegClass(RegKind, RegWidth); 2225 if (RCID == -1) { 2226 Error(Loc, "invalid or unsupported register size"); 2227 return AMDGPU::NoRegister; 2228 } 2229 2230 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2231 const MCRegisterClass RC = TRI->getRegClass(RCID); 2232 if (RegIdx >= RC.getNumRegs()) { 2233 Error(Loc, "register index is out of range"); 2234 return AMDGPU::NoRegister; 2235 } 2236 2237 return RC.getRegister(RegIdx); 2238 } 2239 2240 bool 2241 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2242 int64_t RegLo, RegHi; 2243 if (!skipToken(AsmToken::LBrac, "missing register index")) 2244 return false; 2245 2246 SMLoc FirstIdxLoc = getLoc(); 2247 SMLoc SecondIdxLoc; 2248 2249 if (!parseExpr(RegLo)) 2250 return false; 2251 2252 if (trySkipToken(AsmToken::Colon)) { 2253 SecondIdxLoc = getLoc(); 2254 if (!parseExpr(RegHi)) 2255 return false; 2256 } else { 2257 RegHi = RegLo; 2258 } 2259 2260 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2261 return false; 2262 2263 if (!isUInt<32>(RegLo)) { 2264 Error(FirstIdxLoc, "invalid register index"); 2265 return false; 2266 } 2267 2268 if (!isUInt<32>(RegHi)) { 2269 Error(SecondIdxLoc, "invalid register index"); 2270 return false; 2271 } 2272 2273 if (RegLo > RegHi) { 2274 Error(FirstIdxLoc, "first register index should not exceed second index"); 2275 return false; 2276 } 2277 2278 Num = static_cast<unsigned>(RegLo); 2279 Width = (RegHi - RegLo) + 1; 2280 return true; 2281 } 2282 2283 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2284 unsigned &RegNum, unsigned &RegWidth, 2285 SmallVectorImpl<AsmToken> &Tokens) { 2286 assert(isToken(AsmToken::Identifier)); 2287 unsigned Reg = getSpecialRegForName(getTokenStr()); 2288 if (Reg) { 2289 RegNum = 0; 2290 RegWidth = 1; 2291 RegKind = IS_SPECIAL; 2292 Tokens.push_back(getToken()); 2293 lex(); // skip register name 2294 } 2295 return Reg; 2296 } 2297 2298 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2299 unsigned &RegNum, unsigned &RegWidth, 2300 SmallVectorImpl<AsmToken> &Tokens) { 2301 assert(isToken(AsmToken::Identifier)); 2302 StringRef RegName = getTokenStr(); 2303 auto Loc = getLoc(); 2304 2305 const RegInfo *RI = getRegularRegInfo(RegName); 2306 if (!RI) { 2307 Error(Loc, "invalid register name"); 2308 return AMDGPU::NoRegister; 2309 } 2310 2311 Tokens.push_back(getToken()); 2312 lex(); // skip register name 2313 2314 RegKind = RI->Kind; 2315 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2316 if (!RegSuffix.empty()) { 2317 // Single 32-bit register: vXX. 2318 if (!getRegNum(RegSuffix, RegNum)) { 2319 Error(Loc, "invalid register index"); 2320 return AMDGPU::NoRegister; 2321 } 2322 RegWidth = 1; 2323 } else { 2324 // Range of registers: v[XX:YY]. ":YY" is optional. 2325 if (!ParseRegRange(RegNum, RegWidth)) 2326 return AMDGPU::NoRegister; 2327 } 2328 2329 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2330 } 2331 2332 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2333 unsigned &RegWidth, 2334 SmallVectorImpl<AsmToken> &Tokens) { 2335 unsigned Reg = AMDGPU::NoRegister; 2336 auto ListLoc = getLoc(); 2337 2338 if (!skipToken(AsmToken::LBrac, 2339 "expected a register or a list of registers")) { 2340 return AMDGPU::NoRegister; 2341 } 2342 2343 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2344 2345 auto Loc = getLoc(); 2346 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2347 return AMDGPU::NoRegister; 2348 if (RegWidth != 1) { 2349 Error(Loc, "expected a single 32-bit register"); 2350 return AMDGPU::NoRegister; 2351 } 2352 2353 for (; trySkipToken(AsmToken::Comma); ) { 2354 RegisterKind NextRegKind; 2355 unsigned NextReg, NextRegNum, NextRegWidth; 2356 Loc = getLoc(); 2357 2358 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2359 NextRegNum, NextRegWidth, 2360 Tokens)) { 2361 return AMDGPU::NoRegister; 2362 } 2363 if (NextRegWidth != 1) { 2364 Error(Loc, "expected a single 32-bit register"); 2365 return AMDGPU::NoRegister; 2366 } 2367 if (NextRegKind != RegKind) { 2368 Error(Loc, "registers in a list must be of the same kind"); 2369 return AMDGPU::NoRegister; 2370 } 2371 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2372 return AMDGPU::NoRegister; 2373 } 2374 2375 if (!skipToken(AsmToken::RBrac, 2376 "expected a comma or a closing square bracket")) { 2377 return AMDGPU::NoRegister; 2378 } 2379 2380 if (isRegularReg(RegKind)) 2381 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2382 2383 return Reg; 2384 } 2385 2386 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2387 unsigned &RegNum, unsigned &RegWidth, 2388 SmallVectorImpl<AsmToken> &Tokens) { 2389 auto Loc = getLoc(); 2390 Reg = AMDGPU::NoRegister; 2391 2392 if (isToken(AsmToken::Identifier)) { 2393 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2394 if (Reg == AMDGPU::NoRegister) 2395 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2396 } else { 2397 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2398 } 2399 2400 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2401 if (Reg == AMDGPU::NoRegister) { 2402 assert(Parser.hasPendingError()); 2403 return false; 2404 } 2405 2406 if (!subtargetHasRegister(*TRI, Reg)) { 2407 if (Reg == AMDGPU::SGPR_NULL) { 2408 Error(Loc, "'null' operand is not supported on this GPU"); 2409 } else { 2410 Error(Loc, "register not available on this GPU"); 2411 } 2412 return false; 2413 } 2414 2415 return true; 2416 } 2417 2418 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2419 unsigned &RegNum, unsigned &RegWidth, 2420 bool RestoreOnFailure /*=false*/) { 2421 Reg = AMDGPU::NoRegister; 2422 2423 SmallVector<AsmToken, 1> Tokens; 2424 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2425 if (RestoreOnFailure) { 2426 while (!Tokens.empty()) { 2427 getLexer().UnLex(Tokens.pop_back_val()); 2428 } 2429 } 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 Optional<StringRef> 2436 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2437 switch (RegKind) { 2438 case IS_VGPR: 2439 return StringRef(".amdgcn.next_free_vgpr"); 2440 case IS_SGPR: 2441 return StringRef(".amdgcn.next_free_sgpr"); 2442 default: 2443 return None; 2444 } 2445 } 2446 2447 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2448 auto SymbolName = getGprCountSymbolName(RegKind); 2449 assert(SymbolName && "initializing invalid register kind"); 2450 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2451 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2452 } 2453 2454 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2455 unsigned DwordRegIndex, 2456 unsigned RegWidth) { 2457 // Symbols are only defined for GCN targets 2458 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2459 return true; 2460 2461 auto SymbolName = getGprCountSymbolName(RegKind); 2462 if (!SymbolName) 2463 return true; 2464 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2465 2466 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2467 int64_t OldCount; 2468 2469 if (!Sym->isVariable()) 2470 return !Error(getParser().getTok().getLoc(), 2471 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2472 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2473 return !Error( 2474 getParser().getTok().getLoc(), 2475 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2476 2477 if (OldCount <= NewMax) 2478 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2479 2480 return true; 2481 } 2482 2483 std::unique_ptr<AMDGPUOperand> 2484 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2485 const auto &Tok = Parser.getTok(); 2486 SMLoc StartLoc = Tok.getLoc(); 2487 SMLoc EndLoc = Tok.getEndLoc(); 2488 RegisterKind RegKind; 2489 unsigned Reg, RegNum, RegWidth; 2490 2491 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2492 return nullptr; 2493 } 2494 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2495 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2496 return nullptr; 2497 } else 2498 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2499 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2500 } 2501 2502 OperandMatchResultTy 2503 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2504 // TODO: add syntactic sugar for 1/(2*PI) 2505 2506 assert(!isRegister()); 2507 assert(!isModifier()); 2508 2509 const auto& Tok = getToken(); 2510 const auto& NextTok = peekToken(); 2511 bool IsReal = Tok.is(AsmToken::Real); 2512 SMLoc S = getLoc(); 2513 bool Negate = false; 2514 2515 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2516 lex(); 2517 IsReal = true; 2518 Negate = true; 2519 } 2520 2521 if (IsReal) { 2522 // Floating-point expressions are not supported. 2523 // Can only allow floating-point literals with an 2524 // optional sign. 2525 2526 StringRef Num = getTokenStr(); 2527 lex(); 2528 2529 APFloat RealVal(APFloat::IEEEdouble()); 2530 auto roundMode = APFloat::rmNearestTiesToEven; 2531 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2532 return MatchOperand_ParseFail; 2533 } 2534 if (Negate) 2535 RealVal.changeSign(); 2536 2537 Operands.push_back( 2538 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2539 AMDGPUOperand::ImmTyNone, true)); 2540 2541 return MatchOperand_Success; 2542 2543 } else { 2544 int64_t IntVal; 2545 const MCExpr *Expr; 2546 SMLoc S = getLoc(); 2547 2548 if (HasSP3AbsModifier) { 2549 // This is a workaround for handling expressions 2550 // as arguments of SP3 'abs' modifier, for example: 2551 // |1.0| 2552 // |-1| 2553 // |1+x| 2554 // This syntax is not compatible with syntax of standard 2555 // MC expressions (due to the trailing '|'). 2556 SMLoc EndLoc; 2557 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2558 return MatchOperand_ParseFail; 2559 } else { 2560 if (Parser.parseExpression(Expr)) 2561 return MatchOperand_ParseFail; 2562 } 2563 2564 if (Expr->evaluateAsAbsolute(IntVal)) { 2565 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2566 } else { 2567 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2568 } 2569 2570 return MatchOperand_Success; 2571 } 2572 2573 return MatchOperand_NoMatch; 2574 } 2575 2576 OperandMatchResultTy 2577 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2578 if (!isRegister()) 2579 return MatchOperand_NoMatch; 2580 2581 if (auto R = parseRegister()) { 2582 assert(R->isReg()); 2583 Operands.push_back(std::move(R)); 2584 return MatchOperand_Success; 2585 } 2586 return MatchOperand_ParseFail; 2587 } 2588 2589 OperandMatchResultTy 2590 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2591 auto res = parseReg(Operands); 2592 if (res != MatchOperand_NoMatch) { 2593 return res; 2594 } else if (isModifier()) { 2595 return MatchOperand_NoMatch; 2596 } else { 2597 return parseImm(Operands, HasSP3AbsMod); 2598 } 2599 } 2600 2601 bool 2602 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2603 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2604 const auto &str = Token.getString(); 2605 return str == "abs" || str == "neg" || str == "sext"; 2606 } 2607 return false; 2608 } 2609 2610 bool 2611 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2612 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2613 } 2614 2615 bool 2616 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2617 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2618 } 2619 2620 bool 2621 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2622 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2623 } 2624 2625 // Check if this is an operand modifier or an opcode modifier 2626 // which may look like an expression but it is not. We should 2627 // avoid parsing these modifiers as expressions. Currently 2628 // recognized sequences are: 2629 // |...| 2630 // abs(...) 2631 // neg(...) 2632 // sext(...) 2633 // -reg 2634 // -|...| 2635 // -abs(...) 2636 // name:... 2637 // Note that simple opcode modifiers like 'gds' may be parsed as 2638 // expressions; this is a special case. See getExpressionAsToken. 2639 // 2640 bool 2641 AMDGPUAsmParser::isModifier() { 2642 2643 AsmToken Tok = getToken(); 2644 AsmToken NextToken[2]; 2645 peekTokens(NextToken); 2646 2647 return isOperandModifier(Tok, NextToken[0]) || 2648 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2649 isOpcodeModifierWithVal(Tok, NextToken[0]); 2650 } 2651 2652 // Check if the current token is an SP3 'neg' modifier. 2653 // Currently this modifier is allowed in the following context: 2654 // 2655 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2656 // 2. Before an 'abs' modifier: -abs(...) 2657 // 3. Before an SP3 'abs' modifier: -|...| 2658 // 2659 // In all other cases "-" is handled as a part 2660 // of an expression that follows the sign. 2661 // 2662 // Note: When "-" is followed by an integer literal, 2663 // this is interpreted as integer negation rather 2664 // than a floating-point NEG modifier applied to N. 2665 // Beside being contr-intuitive, such use of floating-point 2666 // NEG modifier would have resulted in different meaning 2667 // of integer literals used with VOP1/2/C and VOP3, 2668 // for example: 2669 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2670 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2671 // Negative fp literals with preceding "-" are 2672 // handled likewise for unifomtity 2673 // 2674 bool 2675 AMDGPUAsmParser::parseSP3NegModifier() { 2676 2677 AsmToken NextToken[2]; 2678 peekTokens(NextToken); 2679 2680 if (isToken(AsmToken::Minus) && 2681 (isRegister(NextToken[0], NextToken[1]) || 2682 NextToken[0].is(AsmToken::Pipe) || 2683 isId(NextToken[0], "abs"))) { 2684 lex(); 2685 return true; 2686 } 2687 2688 return false; 2689 } 2690 2691 OperandMatchResultTy 2692 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2693 bool AllowImm) { 2694 bool Neg, SP3Neg; 2695 bool Abs, SP3Abs; 2696 SMLoc Loc; 2697 2698 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2699 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2700 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2701 return MatchOperand_ParseFail; 2702 } 2703 2704 SP3Neg = parseSP3NegModifier(); 2705 2706 Loc = getLoc(); 2707 Neg = trySkipId("neg"); 2708 if (Neg && SP3Neg) { 2709 Error(Loc, "expected register or immediate"); 2710 return MatchOperand_ParseFail; 2711 } 2712 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2713 return MatchOperand_ParseFail; 2714 2715 Abs = trySkipId("abs"); 2716 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2717 return MatchOperand_ParseFail; 2718 2719 Loc = getLoc(); 2720 SP3Abs = trySkipToken(AsmToken::Pipe); 2721 if (Abs && SP3Abs) { 2722 Error(Loc, "expected register or immediate"); 2723 return MatchOperand_ParseFail; 2724 } 2725 2726 OperandMatchResultTy Res; 2727 if (AllowImm) { 2728 Res = parseRegOrImm(Operands, SP3Abs); 2729 } else { 2730 Res = parseReg(Operands); 2731 } 2732 if (Res != MatchOperand_Success) { 2733 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2734 } 2735 2736 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2737 return MatchOperand_ParseFail; 2738 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2739 return MatchOperand_ParseFail; 2740 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2741 return MatchOperand_ParseFail; 2742 2743 AMDGPUOperand::Modifiers Mods; 2744 Mods.Abs = Abs || SP3Abs; 2745 Mods.Neg = Neg || SP3Neg; 2746 2747 if (Mods.hasFPModifiers()) { 2748 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2749 if (Op.isExpr()) { 2750 Error(Op.getStartLoc(), "expected an absolute expression"); 2751 return MatchOperand_ParseFail; 2752 } 2753 Op.setModifiers(Mods); 2754 } 2755 return MatchOperand_Success; 2756 } 2757 2758 OperandMatchResultTy 2759 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2760 bool AllowImm) { 2761 bool Sext = trySkipId("sext"); 2762 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2763 return MatchOperand_ParseFail; 2764 2765 OperandMatchResultTy Res; 2766 if (AllowImm) { 2767 Res = parseRegOrImm(Operands); 2768 } else { 2769 Res = parseReg(Operands); 2770 } 2771 if (Res != MatchOperand_Success) { 2772 return Sext? MatchOperand_ParseFail : Res; 2773 } 2774 2775 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2776 return MatchOperand_ParseFail; 2777 2778 AMDGPUOperand::Modifiers Mods; 2779 Mods.Sext = Sext; 2780 2781 if (Mods.hasIntModifiers()) { 2782 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2783 if (Op.isExpr()) { 2784 Error(Op.getStartLoc(), "expected an absolute expression"); 2785 return MatchOperand_ParseFail; 2786 } 2787 Op.setModifiers(Mods); 2788 } 2789 2790 return MatchOperand_Success; 2791 } 2792 2793 OperandMatchResultTy 2794 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2795 return parseRegOrImmWithFPInputMods(Operands, false); 2796 } 2797 2798 OperandMatchResultTy 2799 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2800 return parseRegOrImmWithIntInputMods(Operands, false); 2801 } 2802 2803 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2804 auto Loc = getLoc(); 2805 if (trySkipId("off")) { 2806 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2807 AMDGPUOperand::ImmTyOff, false)); 2808 return MatchOperand_Success; 2809 } 2810 2811 if (!isRegister()) 2812 return MatchOperand_NoMatch; 2813 2814 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2815 if (Reg) { 2816 Operands.push_back(std::move(Reg)); 2817 return MatchOperand_Success; 2818 } 2819 2820 return MatchOperand_ParseFail; 2821 2822 } 2823 2824 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2825 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2826 2827 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2828 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2829 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2830 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2831 return Match_InvalidOperand; 2832 2833 if ((TSFlags & SIInstrFlags::VOP3) && 2834 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2835 getForcedEncodingSize() != 64) 2836 return Match_PreferE32; 2837 2838 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2839 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2840 // v_mac_f32/16 allow only dst_sel == DWORD; 2841 auto OpNum = 2842 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2843 const auto &Op = Inst.getOperand(OpNum); 2844 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2845 return Match_InvalidOperand; 2846 } 2847 } 2848 2849 return Match_Success; 2850 } 2851 2852 static ArrayRef<unsigned> getAllVariants() { 2853 static const unsigned Variants[] = { 2854 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2855 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2856 }; 2857 2858 return makeArrayRef(Variants); 2859 } 2860 2861 // What asm variants we should check 2862 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2863 if (getForcedEncodingSize() == 32) { 2864 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2865 return makeArrayRef(Variants); 2866 } 2867 2868 if (isForcedVOP3()) { 2869 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2870 return makeArrayRef(Variants); 2871 } 2872 2873 if (isForcedSDWA()) { 2874 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2875 AMDGPUAsmVariants::SDWA9}; 2876 return makeArrayRef(Variants); 2877 } 2878 2879 if (isForcedDPP()) { 2880 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2881 return makeArrayRef(Variants); 2882 } 2883 2884 return getAllVariants(); 2885 } 2886 2887 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2888 if (getForcedEncodingSize() == 32) 2889 return "e32"; 2890 2891 if (isForcedVOP3()) 2892 return "e64"; 2893 2894 if (isForcedSDWA()) 2895 return "sdwa"; 2896 2897 if (isForcedDPP()) 2898 return "dpp"; 2899 2900 return ""; 2901 } 2902 2903 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2904 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2905 const unsigned Num = Desc.getNumImplicitUses(); 2906 for (unsigned i = 0; i < Num; ++i) { 2907 unsigned Reg = Desc.ImplicitUses[i]; 2908 switch (Reg) { 2909 case AMDGPU::FLAT_SCR: 2910 case AMDGPU::VCC: 2911 case AMDGPU::VCC_LO: 2912 case AMDGPU::VCC_HI: 2913 case AMDGPU::M0: 2914 return Reg; 2915 default: 2916 break; 2917 } 2918 } 2919 return AMDGPU::NoRegister; 2920 } 2921 2922 // NB: This code is correct only when used to check constant 2923 // bus limitations because GFX7 support no f16 inline constants. 2924 // Note that there are no cases when a GFX7 opcode violates 2925 // constant bus limitations due to the use of an f16 constant. 2926 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2927 unsigned OpIdx) const { 2928 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2929 2930 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2931 return false; 2932 } 2933 2934 const MCOperand &MO = Inst.getOperand(OpIdx); 2935 2936 int64_t Val = MO.getImm(); 2937 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2938 2939 switch (OpSize) { // expected operand size 2940 case 8: 2941 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2942 case 4: 2943 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2944 case 2: { 2945 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2946 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2947 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2948 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2949 return AMDGPU::isInlinableIntLiteral(Val); 2950 2951 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2952 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2953 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2954 return AMDGPU::isInlinableIntLiteralV216(Val); 2955 2956 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2957 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2958 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2959 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2960 2961 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2962 } 2963 default: 2964 llvm_unreachable("invalid operand size"); 2965 } 2966 } 2967 2968 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2969 if (!isGFX10()) 2970 return 1; 2971 2972 switch (Opcode) { 2973 // 64-bit shift instructions can use only one scalar value input 2974 case AMDGPU::V_LSHLREV_B64: 2975 case AMDGPU::V_LSHLREV_B64_gfx10: 2976 case AMDGPU::V_LSHL_B64: 2977 case AMDGPU::V_LSHRREV_B64: 2978 case AMDGPU::V_LSHRREV_B64_gfx10: 2979 case AMDGPU::V_LSHR_B64: 2980 case AMDGPU::V_ASHRREV_I64: 2981 case AMDGPU::V_ASHRREV_I64_gfx10: 2982 case AMDGPU::V_ASHR_I64: 2983 return 1; 2984 default: 2985 return 2; 2986 } 2987 } 2988 2989 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2990 const MCOperand &MO = Inst.getOperand(OpIdx); 2991 if (MO.isImm()) { 2992 return !isInlineConstant(Inst, OpIdx); 2993 } else if (MO.isReg()) { 2994 auto Reg = MO.getReg(); 2995 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2996 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2997 } else { 2998 return true; 2999 } 3000 } 3001 3002 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 3003 const unsigned Opcode = Inst.getOpcode(); 3004 const MCInstrDesc &Desc = MII.get(Opcode); 3005 unsigned ConstantBusUseCount = 0; 3006 unsigned NumLiterals = 0; 3007 unsigned LiteralSize; 3008 3009 if (Desc.TSFlags & 3010 (SIInstrFlags::VOPC | 3011 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3012 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3013 SIInstrFlags::SDWA)) { 3014 // Check special imm operands (used by madmk, etc) 3015 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3016 ++ConstantBusUseCount; 3017 } 3018 3019 SmallDenseSet<unsigned> SGPRsUsed; 3020 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3021 if (SGPRUsed != AMDGPU::NoRegister) { 3022 SGPRsUsed.insert(SGPRUsed); 3023 ++ConstantBusUseCount; 3024 } 3025 3026 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3027 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3028 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3029 3030 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3031 3032 for (int OpIdx : OpIndices) { 3033 if (OpIdx == -1) break; 3034 3035 const MCOperand &MO = Inst.getOperand(OpIdx); 3036 if (usesConstantBus(Inst, OpIdx)) { 3037 if (MO.isReg()) { 3038 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3039 // Pairs of registers with a partial intersections like these 3040 // s0, s[0:1] 3041 // flat_scratch_lo, flat_scratch 3042 // flat_scratch_lo, flat_scratch_hi 3043 // are theoretically valid but they are disabled anyway. 3044 // Note that this code mimics SIInstrInfo::verifyInstruction 3045 if (!SGPRsUsed.count(Reg)) { 3046 SGPRsUsed.insert(Reg); 3047 ++ConstantBusUseCount; 3048 } 3049 } else { // Expression or a literal 3050 3051 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3052 continue; // special operand like VINTERP attr_chan 3053 3054 // An instruction may use only one literal. 3055 // This has been validated on the previous step. 3056 // See validateVOP3Literal. 3057 // This literal may be used as more than one operand. 3058 // If all these operands are of the same size, 3059 // this literal counts as one scalar value. 3060 // Otherwise it counts as 2 scalar values. 3061 // See "GFX10 Shader Programming", section 3.6.2.3. 3062 3063 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3064 if (Size < 4) Size = 4; 3065 3066 if (NumLiterals == 0) { 3067 NumLiterals = 1; 3068 LiteralSize = Size; 3069 } else if (LiteralSize != Size) { 3070 NumLiterals = 2; 3071 } 3072 } 3073 } 3074 } 3075 } 3076 ConstantBusUseCount += NumLiterals; 3077 3078 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3079 } 3080 3081 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3082 const unsigned Opcode = Inst.getOpcode(); 3083 const MCInstrDesc &Desc = MII.get(Opcode); 3084 3085 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3086 if (DstIdx == -1 || 3087 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3088 return true; 3089 } 3090 3091 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3092 3093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3094 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3095 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3096 3097 assert(DstIdx != -1); 3098 const MCOperand &Dst = Inst.getOperand(DstIdx); 3099 assert(Dst.isReg()); 3100 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3101 3102 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3103 3104 for (int SrcIdx : SrcIndices) { 3105 if (SrcIdx == -1) break; 3106 const MCOperand &Src = Inst.getOperand(SrcIdx); 3107 if (Src.isReg()) { 3108 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3109 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3110 return false; 3111 } 3112 } 3113 } 3114 3115 return true; 3116 } 3117 3118 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3119 3120 const unsigned Opc = Inst.getOpcode(); 3121 const MCInstrDesc &Desc = MII.get(Opc); 3122 3123 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3124 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3125 assert(ClampIdx != -1); 3126 return Inst.getOperand(ClampIdx).getImm() == 0; 3127 } 3128 3129 return true; 3130 } 3131 3132 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3133 3134 const unsigned Opc = Inst.getOpcode(); 3135 const MCInstrDesc &Desc = MII.get(Opc); 3136 3137 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3138 return true; 3139 3140 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3141 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3142 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3143 3144 assert(VDataIdx != -1); 3145 3146 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3147 return true; 3148 3149 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3150 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3151 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3152 if (DMask == 0) 3153 DMask = 1; 3154 3155 unsigned DataSize = 3156 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3157 if (hasPackedD16()) { 3158 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3159 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3160 DataSize = (DataSize + 1) / 2; 3161 } 3162 3163 return (VDataSize / 4) == DataSize + TFESize; 3164 } 3165 3166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3167 const unsigned Opc = Inst.getOpcode(); 3168 const MCInstrDesc &Desc = MII.get(Opc); 3169 3170 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3171 return true; 3172 3173 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3174 3175 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3176 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3177 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3178 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3179 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3180 3181 assert(VAddr0Idx != -1); 3182 assert(SrsrcIdx != -1); 3183 assert(SrsrcIdx > VAddr0Idx); 3184 3185 if (DimIdx == -1) 3186 return true; // intersect_ray 3187 3188 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3189 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3190 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3191 unsigned VAddrSize = 3192 IsNSA ? SrsrcIdx - VAddr0Idx 3193 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3194 3195 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3196 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3197 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3198 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3199 if (!IsNSA) { 3200 if (AddrSize > 8) 3201 AddrSize = 16; 3202 else if (AddrSize > 4) 3203 AddrSize = 8; 3204 } 3205 3206 return VAddrSize == AddrSize; 3207 } 3208 3209 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3210 3211 const unsigned Opc = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opc); 3213 3214 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3215 return true; 3216 if (!Desc.mayLoad() || !Desc.mayStore()) 3217 return true; // Not atomic 3218 3219 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3220 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3221 3222 // This is an incomplete check because image_atomic_cmpswap 3223 // may only use 0x3 and 0xf while other atomic operations 3224 // may use 0x1 and 0x3. However these limitations are 3225 // verified when we check that dmask matches dst size. 3226 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3227 } 3228 3229 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3230 3231 const unsigned Opc = Inst.getOpcode(); 3232 const MCInstrDesc &Desc = MII.get(Opc); 3233 3234 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3235 return true; 3236 3237 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3238 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3239 3240 // GATHER4 instructions use dmask in a different fashion compared to 3241 // other MIMG instructions. The only useful DMASK values are 3242 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3243 // (red,red,red,red) etc.) The ISA document doesn't mention 3244 // this. 3245 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3246 } 3247 3248 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3249 { 3250 switch (Opcode) { 3251 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3252 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3253 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3254 return true; 3255 default: 3256 return false; 3257 } 3258 } 3259 3260 // movrels* opcodes should only allow VGPRS as src0. 3261 // This is specified in .td description for vop1/vop3, 3262 // but sdwa is handled differently. See isSDWAOperand. 3263 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3264 3265 const unsigned Opc = Inst.getOpcode(); 3266 const MCInstrDesc &Desc = MII.get(Opc); 3267 3268 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3269 return true; 3270 3271 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3272 assert(Src0Idx != -1); 3273 3274 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3275 if (!Src0.isReg()) 3276 return false; 3277 3278 auto Reg = Src0.getReg(); 3279 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3280 return !isSGPR(mc2PseudoReg(Reg), TRI); 3281 } 3282 3283 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3284 3285 const unsigned Opc = Inst.getOpcode(); 3286 3287 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3288 return true; 3289 3290 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3291 assert(Src0Idx != -1); 3292 3293 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3294 if (!Src0.isReg()) 3295 return true; 3296 3297 auto Reg = Src0.getReg(); 3298 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3299 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3300 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3301 return false; 3302 } 3303 3304 return true; 3305 } 3306 3307 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3308 3309 const unsigned Opc = Inst.getOpcode(); 3310 const MCInstrDesc &Desc = MII.get(Opc); 3311 3312 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3313 return true; 3314 3315 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3316 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3317 if (isCI() || isSI()) 3318 return false; 3319 } 3320 3321 return true; 3322 } 3323 3324 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3325 const unsigned Opc = Inst.getOpcode(); 3326 const MCInstrDesc &Desc = MII.get(Opc); 3327 3328 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3329 return true; 3330 3331 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3332 if (DimIdx < 0) 3333 return true; 3334 3335 long Imm = Inst.getOperand(DimIdx).getImm(); 3336 if (Imm < 0 || Imm >= 8) 3337 return false; 3338 3339 return true; 3340 } 3341 3342 static bool IsRevOpcode(const unsigned Opcode) 3343 { 3344 switch (Opcode) { 3345 case AMDGPU::V_SUBREV_F32_e32: 3346 case AMDGPU::V_SUBREV_F32_e64: 3347 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3348 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3349 case AMDGPU::V_SUBREV_F32_e32_vi: 3350 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3351 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3352 case AMDGPU::V_SUBREV_F32_e64_vi: 3353 3354 case AMDGPU::V_SUBREV_CO_U32_e32: 3355 case AMDGPU::V_SUBREV_CO_U32_e64: 3356 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3357 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3358 3359 case AMDGPU::V_SUBBREV_U32_e32: 3360 case AMDGPU::V_SUBBREV_U32_e64: 3361 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3362 case AMDGPU::V_SUBBREV_U32_e32_vi: 3363 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3364 case AMDGPU::V_SUBBREV_U32_e64_vi: 3365 3366 case AMDGPU::V_SUBREV_U32_e32: 3367 case AMDGPU::V_SUBREV_U32_e64: 3368 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3369 case AMDGPU::V_SUBREV_U32_e32_vi: 3370 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3371 case AMDGPU::V_SUBREV_U32_e64_vi: 3372 3373 case AMDGPU::V_SUBREV_F16_e32: 3374 case AMDGPU::V_SUBREV_F16_e64: 3375 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3376 case AMDGPU::V_SUBREV_F16_e32_vi: 3377 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3378 case AMDGPU::V_SUBREV_F16_e64_vi: 3379 3380 case AMDGPU::V_SUBREV_U16_e32: 3381 case AMDGPU::V_SUBREV_U16_e64: 3382 case AMDGPU::V_SUBREV_U16_e32_vi: 3383 case AMDGPU::V_SUBREV_U16_e64_vi: 3384 3385 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3386 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3387 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3388 3389 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3390 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3391 3392 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3393 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3394 3395 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3396 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3397 3398 case AMDGPU::V_LSHRREV_B32_e32: 3399 case AMDGPU::V_LSHRREV_B32_e64: 3400 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3401 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3402 case AMDGPU::V_LSHRREV_B32_e32_vi: 3403 case AMDGPU::V_LSHRREV_B32_e64_vi: 3404 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3405 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3406 3407 case AMDGPU::V_ASHRREV_I32_e32: 3408 case AMDGPU::V_ASHRREV_I32_e64: 3409 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3410 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3411 case AMDGPU::V_ASHRREV_I32_e32_vi: 3412 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3413 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3414 case AMDGPU::V_ASHRREV_I32_e64_vi: 3415 3416 case AMDGPU::V_LSHLREV_B32_e32: 3417 case AMDGPU::V_LSHLREV_B32_e64: 3418 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3419 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3420 case AMDGPU::V_LSHLREV_B32_e32_vi: 3421 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3422 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3423 case AMDGPU::V_LSHLREV_B32_e64_vi: 3424 3425 case AMDGPU::V_LSHLREV_B16_e32: 3426 case AMDGPU::V_LSHLREV_B16_e64: 3427 case AMDGPU::V_LSHLREV_B16_e32_vi: 3428 case AMDGPU::V_LSHLREV_B16_e64_vi: 3429 case AMDGPU::V_LSHLREV_B16_gfx10: 3430 3431 case AMDGPU::V_LSHRREV_B16_e32: 3432 case AMDGPU::V_LSHRREV_B16_e64: 3433 case AMDGPU::V_LSHRREV_B16_e32_vi: 3434 case AMDGPU::V_LSHRREV_B16_e64_vi: 3435 case AMDGPU::V_LSHRREV_B16_gfx10: 3436 3437 case AMDGPU::V_ASHRREV_I16_e32: 3438 case AMDGPU::V_ASHRREV_I16_e64: 3439 case AMDGPU::V_ASHRREV_I16_e32_vi: 3440 case AMDGPU::V_ASHRREV_I16_e64_vi: 3441 case AMDGPU::V_ASHRREV_I16_gfx10: 3442 3443 case AMDGPU::V_LSHLREV_B64: 3444 case AMDGPU::V_LSHLREV_B64_gfx10: 3445 case AMDGPU::V_LSHLREV_B64_vi: 3446 3447 case AMDGPU::V_LSHRREV_B64: 3448 case AMDGPU::V_LSHRREV_B64_gfx10: 3449 case AMDGPU::V_LSHRREV_B64_vi: 3450 3451 case AMDGPU::V_ASHRREV_I64: 3452 case AMDGPU::V_ASHRREV_I64_gfx10: 3453 case AMDGPU::V_ASHRREV_I64_vi: 3454 3455 case AMDGPU::V_PK_LSHLREV_B16: 3456 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3457 case AMDGPU::V_PK_LSHLREV_B16_vi: 3458 3459 case AMDGPU::V_PK_LSHRREV_B16: 3460 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3461 case AMDGPU::V_PK_LSHRREV_B16_vi: 3462 case AMDGPU::V_PK_ASHRREV_I16: 3463 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3464 case AMDGPU::V_PK_ASHRREV_I16_vi: 3465 return true; 3466 default: 3467 return false; 3468 } 3469 } 3470 3471 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3472 3473 using namespace SIInstrFlags; 3474 const unsigned Opcode = Inst.getOpcode(); 3475 const MCInstrDesc &Desc = MII.get(Opcode); 3476 3477 // lds_direct register is defined so that it can be used 3478 // with 9-bit operands only. Ignore encodings which do not accept these. 3479 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3480 return true; 3481 3482 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3483 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3484 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3485 3486 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3487 3488 // lds_direct cannot be specified as either src1 or src2. 3489 for (int SrcIdx : SrcIndices) { 3490 if (SrcIdx == -1) break; 3491 const MCOperand &Src = Inst.getOperand(SrcIdx); 3492 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3493 return false; 3494 } 3495 } 3496 3497 if (Src0Idx == -1) 3498 return true; 3499 3500 const MCOperand &Src = Inst.getOperand(Src0Idx); 3501 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3502 return true; 3503 3504 // lds_direct is specified as src0. Check additional limitations. 3505 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3506 } 3507 3508 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3509 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3510 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3511 if (Op.isFlatOffset()) 3512 return Op.getStartLoc(); 3513 } 3514 return getLoc(); 3515 } 3516 3517 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3518 const OperandVector &Operands) { 3519 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3520 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3521 return true; 3522 3523 auto Opcode = Inst.getOpcode(); 3524 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3525 assert(OpNum != -1); 3526 3527 const auto &Op = Inst.getOperand(OpNum); 3528 if (!hasFlatOffsets() && Op.getImm() != 0) { 3529 Error(getFlatOffsetLoc(Operands), 3530 "flat offset modifier is not supported on this GPU"); 3531 return false; 3532 } 3533 3534 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3535 // For FLAT segment the offset must be positive; 3536 // MSB is ignored and forced to zero. 3537 unsigned OffsetSize = isGFX9() ? 13 : 12; 3538 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3539 if (!isIntN(OffsetSize, Op.getImm())) { 3540 Error(getFlatOffsetLoc(Operands), 3541 isGFX9() ? "expected a 13-bit signed offset" : 3542 "expected a 12-bit signed offset"); 3543 return false; 3544 } 3545 } else { 3546 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3547 Error(getFlatOffsetLoc(Operands), 3548 isGFX9() ? "expected a 12-bit unsigned offset" : 3549 "expected an 11-bit unsigned offset"); 3550 return false; 3551 } 3552 } 3553 3554 return true; 3555 } 3556 3557 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3558 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3559 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3560 if (Op.isSMEMOffset()) 3561 return Op.getStartLoc(); 3562 } 3563 return getLoc(); 3564 } 3565 3566 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3567 const OperandVector &Operands) { 3568 if (isCI() || isSI()) 3569 return true; 3570 3571 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3572 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3573 return true; 3574 3575 auto Opcode = Inst.getOpcode(); 3576 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3577 if (OpNum == -1) 3578 return true; 3579 3580 const auto &Op = Inst.getOperand(OpNum); 3581 if (!Op.isImm()) 3582 return true; 3583 3584 uint64_t Offset = Op.getImm(); 3585 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3586 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3587 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3588 return true; 3589 3590 Error(getSMEMOffsetLoc(Operands), 3591 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3592 "expected a 21-bit signed offset"); 3593 3594 return false; 3595 } 3596 3597 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3598 unsigned Opcode = Inst.getOpcode(); 3599 const MCInstrDesc &Desc = MII.get(Opcode); 3600 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3601 return true; 3602 3603 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3604 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3605 3606 const int OpIndices[] = { Src0Idx, Src1Idx }; 3607 3608 unsigned NumExprs = 0; 3609 unsigned NumLiterals = 0; 3610 uint32_t LiteralValue; 3611 3612 for (int OpIdx : OpIndices) { 3613 if (OpIdx == -1) break; 3614 3615 const MCOperand &MO = Inst.getOperand(OpIdx); 3616 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3617 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3618 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3619 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3620 if (NumLiterals == 0 || LiteralValue != Value) { 3621 LiteralValue = Value; 3622 ++NumLiterals; 3623 } 3624 } else if (MO.isExpr()) { 3625 ++NumExprs; 3626 } 3627 } 3628 } 3629 3630 return NumLiterals + NumExprs <= 1; 3631 } 3632 3633 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3634 const unsigned Opc = Inst.getOpcode(); 3635 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3636 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3637 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3638 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3639 3640 if (OpSel & ~3) 3641 return false; 3642 } 3643 return true; 3644 } 3645 3646 // Check if VCC register matches wavefront size 3647 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3648 auto FB = getFeatureBits(); 3649 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3650 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3651 } 3652 3653 // VOP3 literal is only allowed in GFX10+ and only one can be used 3654 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3655 unsigned Opcode = Inst.getOpcode(); 3656 const MCInstrDesc &Desc = MII.get(Opcode); 3657 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3658 return true; 3659 3660 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3661 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3662 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3663 3664 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3665 3666 unsigned NumExprs = 0; 3667 unsigned NumLiterals = 0; 3668 uint32_t LiteralValue; 3669 3670 for (int OpIdx : OpIndices) { 3671 if (OpIdx == -1) break; 3672 3673 const MCOperand &MO = Inst.getOperand(OpIdx); 3674 if (!MO.isImm() && !MO.isExpr()) 3675 continue; 3676 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3677 continue; 3678 3679 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3680 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3681 return false; 3682 3683 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3684 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3685 if (NumLiterals == 0 || LiteralValue != Value) { 3686 LiteralValue = Value; 3687 ++NumLiterals; 3688 } 3689 } else if (MO.isExpr()) { 3690 ++NumExprs; 3691 } 3692 } 3693 NumLiterals += NumExprs; 3694 3695 return !NumLiterals || 3696 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3697 } 3698 3699 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3700 const SMLoc &IDLoc, 3701 const OperandVector &Operands) { 3702 if (!validateLdsDirect(Inst)) { 3703 Error(IDLoc, 3704 "invalid use of lds_direct"); 3705 return false; 3706 } 3707 if (!validateSOPLiteral(Inst)) { 3708 Error(IDLoc, 3709 "only one literal operand is allowed"); 3710 return false; 3711 } 3712 if (!validateVOP3Literal(Inst)) { 3713 Error(IDLoc, 3714 "invalid literal operand"); 3715 return false; 3716 } 3717 if (!validateConstantBusLimitations(Inst)) { 3718 Error(IDLoc, 3719 "invalid operand (violates constant bus restrictions)"); 3720 return false; 3721 } 3722 if (!validateEarlyClobberLimitations(Inst)) { 3723 Error(IDLoc, 3724 "destination must be different than all sources"); 3725 return false; 3726 } 3727 if (!validateIntClampSupported(Inst)) { 3728 Error(IDLoc, 3729 "integer clamping is not supported on this GPU"); 3730 return false; 3731 } 3732 if (!validateOpSel(Inst)) { 3733 Error(IDLoc, 3734 "invalid op_sel operand"); 3735 return false; 3736 } 3737 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3738 if (!validateMIMGD16(Inst)) { 3739 Error(IDLoc, 3740 "d16 modifier is not supported on this GPU"); 3741 return false; 3742 } 3743 if (!validateMIMGDim(Inst)) { 3744 Error(IDLoc, "dim modifier is required on this GPU"); 3745 return false; 3746 } 3747 if (!validateMIMGDataSize(Inst)) { 3748 Error(IDLoc, 3749 "image data size does not match dmask and tfe"); 3750 return false; 3751 } 3752 if (!validateMIMGAddrSize(Inst)) { 3753 Error(IDLoc, 3754 "image address size does not match dim and a16"); 3755 return false; 3756 } 3757 if (!validateMIMGAtomicDMask(Inst)) { 3758 Error(IDLoc, 3759 "invalid atomic image dmask"); 3760 return false; 3761 } 3762 if (!validateMIMGGatherDMask(Inst)) { 3763 Error(IDLoc, 3764 "invalid image_gather dmask: only one bit must be set"); 3765 return false; 3766 } 3767 if (!validateMovrels(Inst)) { 3768 Error(IDLoc, "source operand must be a VGPR"); 3769 return false; 3770 } 3771 if (!validateFlatOffset(Inst, Operands)) { 3772 return false; 3773 } 3774 if (!validateSMEMOffset(Inst, Operands)) { 3775 return false; 3776 } 3777 if (!validateMAIAccWrite(Inst)) { 3778 return false; 3779 } 3780 3781 return true; 3782 } 3783 3784 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3785 const FeatureBitset &FBS, 3786 unsigned VariantID = 0); 3787 3788 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3789 const FeatureBitset &AvailableFeatures, 3790 unsigned VariantID); 3791 3792 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3793 const FeatureBitset &FBS) { 3794 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3795 } 3796 3797 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3798 const FeatureBitset &FBS, 3799 ArrayRef<unsigned> Variants) { 3800 for (auto Variant : Variants) { 3801 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3802 return true; 3803 } 3804 3805 return false; 3806 } 3807 3808 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3809 const SMLoc &IDLoc) { 3810 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3811 3812 // Check if requested instruction variant is supported. 3813 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3814 return false; 3815 3816 // This instruction is not supported. 3817 // Clear any other pending errors because they are no longer relevant. 3818 getParser().clearPendingErrors(); 3819 3820 // Requested instruction variant is not supported. 3821 // Check if any other variants are supported. 3822 StringRef VariantName = getMatchedVariantName(); 3823 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3824 return Error(IDLoc, 3825 Twine(VariantName, 3826 " variant of this instruction is not supported")); 3827 } 3828 3829 // Finally check if this instruction is supported on any other GPU. 3830 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3831 return Error(IDLoc, "instruction not supported on this GPU"); 3832 } 3833 3834 // Instruction not supported on any GPU. Probably a typo. 3835 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3836 return Error(IDLoc, "invalid instruction" + Suggestion); 3837 } 3838 3839 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3840 OperandVector &Operands, 3841 MCStreamer &Out, 3842 uint64_t &ErrorInfo, 3843 bool MatchingInlineAsm) { 3844 MCInst Inst; 3845 unsigned Result = Match_Success; 3846 for (auto Variant : getMatchedVariants()) { 3847 uint64_t EI; 3848 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3849 Variant); 3850 // We order match statuses from least to most specific. We use most specific 3851 // status as resulting 3852 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3853 if ((R == Match_Success) || 3854 (R == Match_PreferE32) || 3855 (R == Match_MissingFeature && Result != Match_PreferE32) || 3856 (R == Match_InvalidOperand && Result != Match_MissingFeature 3857 && Result != Match_PreferE32) || 3858 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3859 && Result != Match_MissingFeature 3860 && Result != Match_PreferE32)) { 3861 Result = R; 3862 ErrorInfo = EI; 3863 } 3864 if (R == Match_Success) 3865 break; 3866 } 3867 3868 if (Result == Match_Success) { 3869 if (!validateInstruction(Inst, IDLoc, Operands)) { 3870 return true; 3871 } 3872 Inst.setLoc(IDLoc); 3873 Out.emitInstruction(Inst, getSTI()); 3874 return false; 3875 } 3876 3877 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 3878 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 3879 return true; 3880 } 3881 3882 switch (Result) { 3883 default: break; 3884 case Match_MissingFeature: 3885 // It has been verified that the specified instruction 3886 // mnemonic is valid. A match was found but it requires 3887 // features which are not supported on this GPU. 3888 return Error(IDLoc, "operands are not valid for this GPU or mode"); 3889 3890 case Match_InvalidOperand: { 3891 SMLoc ErrorLoc = IDLoc; 3892 if (ErrorInfo != ~0ULL) { 3893 if (ErrorInfo >= Operands.size()) { 3894 return Error(IDLoc, "too few operands for instruction"); 3895 } 3896 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3897 if (ErrorLoc == SMLoc()) 3898 ErrorLoc = IDLoc; 3899 } 3900 return Error(ErrorLoc, "invalid operand for instruction"); 3901 } 3902 3903 case Match_PreferE32: 3904 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3905 "should be encoded as e32"); 3906 case Match_MnemonicFail: 3907 llvm_unreachable("Invalid instructions should have been handled already"); 3908 } 3909 llvm_unreachable("Implement any new match types added!"); 3910 } 3911 3912 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3913 int64_t Tmp = -1; 3914 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3915 return true; 3916 } 3917 if (getParser().parseAbsoluteExpression(Tmp)) { 3918 return true; 3919 } 3920 Ret = static_cast<uint32_t>(Tmp); 3921 return false; 3922 } 3923 3924 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3925 uint32_t &Minor) { 3926 if (ParseAsAbsoluteExpression(Major)) 3927 return TokError("invalid major version"); 3928 3929 if (getLexer().isNot(AsmToken::Comma)) 3930 return TokError("minor version number required, comma expected"); 3931 Lex(); 3932 3933 if (ParseAsAbsoluteExpression(Minor)) 3934 return TokError("invalid minor version"); 3935 3936 return false; 3937 } 3938 3939 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3940 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3941 return TokError("directive only supported for amdgcn architecture"); 3942 3943 std::string Target; 3944 3945 SMLoc TargetStart = getTok().getLoc(); 3946 if (getParser().parseEscapedString(Target)) 3947 return true; 3948 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3949 3950 std::string ExpectedTarget; 3951 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3952 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3953 3954 if (Target != ExpectedTargetOS.str()) 3955 return getParser().Error(TargetRange.Start, "target must match options", 3956 TargetRange); 3957 3958 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3959 return false; 3960 } 3961 3962 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3963 return getParser().Error(Range.Start, "value out of range", Range); 3964 } 3965 3966 bool AMDGPUAsmParser::calculateGPRBlocks( 3967 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3968 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3969 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3970 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3971 // TODO(scott.linder): These calculations are duplicated from 3972 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3973 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3974 3975 unsigned NumVGPRs = NextFreeVGPR; 3976 unsigned NumSGPRs = NextFreeSGPR; 3977 3978 if (Version.Major >= 10) 3979 NumSGPRs = 0; 3980 else { 3981 unsigned MaxAddressableNumSGPRs = 3982 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3983 3984 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3985 NumSGPRs > MaxAddressableNumSGPRs) 3986 return OutOfRangeError(SGPRRange); 3987 3988 NumSGPRs += 3989 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3990 3991 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3992 NumSGPRs > MaxAddressableNumSGPRs) 3993 return OutOfRangeError(SGPRRange); 3994 3995 if (Features.test(FeatureSGPRInitBug)) 3996 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3997 } 3998 3999 VGPRBlocks = 4000 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4001 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4002 4003 return false; 4004 } 4005 4006 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4007 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4008 return TokError("directive only supported for amdgcn architecture"); 4009 4010 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4011 return TokError("directive only supported for amdhsa OS"); 4012 4013 StringRef KernelName; 4014 if (getParser().parseIdentifier(KernelName)) 4015 return true; 4016 4017 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4018 4019 StringSet<> Seen; 4020 4021 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4022 4023 SMRange VGPRRange; 4024 uint64_t NextFreeVGPR = 0; 4025 SMRange SGPRRange; 4026 uint64_t NextFreeSGPR = 0; 4027 unsigned UserSGPRCount = 0; 4028 bool ReserveVCC = true; 4029 bool ReserveFlatScr = true; 4030 bool ReserveXNACK = hasXNACK(); 4031 Optional<bool> EnableWavefrontSize32; 4032 4033 while (true) { 4034 while (getLexer().is(AsmToken::EndOfStatement)) 4035 Lex(); 4036 4037 if (getLexer().isNot(AsmToken::Identifier)) 4038 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4039 4040 StringRef ID = getTok().getIdentifier(); 4041 SMRange IDRange = getTok().getLocRange(); 4042 Lex(); 4043 4044 if (ID == ".end_amdhsa_kernel") 4045 break; 4046 4047 if (Seen.find(ID) != Seen.end()) 4048 return TokError(".amdhsa_ directives cannot be repeated"); 4049 Seen.insert(ID); 4050 4051 SMLoc ValStart = getTok().getLoc(); 4052 int64_t IVal; 4053 if (getParser().parseAbsoluteExpression(IVal)) 4054 return true; 4055 SMLoc ValEnd = getTok().getLoc(); 4056 SMRange ValRange = SMRange(ValStart, ValEnd); 4057 4058 if (IVal < 0) 4059 return OutOfRangeError(ValRange); 4060 4061 uint64_t Val = IVal; 4062 4063 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4064 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4065 return OutOfRangeError(RANGE); \ 4066 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4067 4068 if (ID == ".amdhsa_group_segment_fixed_size") { 4069 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4070 return OutOfRangeError(ValRange); 4071 KD.group_segment_fixed_size = Val; 4072 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4073 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4074 return OutOfRangeError(ValRange); 4075 KD.private_segment_fixed_size = Val; 4076 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4077 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4078 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4079 Val, ValRange); 4080 if (Val) 4081 UserSGPRCount += 4; 4082 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4083 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4084 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4085 ValRange); 4086 if (Val) 4087 UserSGPRCount += 2; 4088 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4089 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4090 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4091 ValRange); 4092 if (Val) 4093 UserSGPRCount += 2; 4094 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4095 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4096 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4097 Val, ValRange); 4098 if (Val) 4099 UserSGPRCount += 2; 4100 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4101 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4102 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4103 ValRange); 4104 if (Val) 4105 UserSGPRCount += 2; 4106 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4107 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4108 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4109 ValRange); 4110 if (Val) 4111 UserSGPRCount += 2; 4112 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4113 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4114 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4115 Val, ValRange); 4116 if (Val) 4117 UserSGPRCount += 1; 4118 } else if (ID == ".amdhsa_wavefront_size32") { 4119 if (IVersion.Major < 10) 4120 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4121 IDRange); 4122 EnableWavefrontSize32 = Val; 4123 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4124 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4125 Val, ValRange); 4126 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4127 PARSE_BITS_ENTRY( 4128 KD.compute_pgm_rsrc2, 4129 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4130 ValRange); 4131 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4132 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4133 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4134 ValRange); 4135 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4136 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4137 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4138 ValRange); 4139 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4140 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4141 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4142 ValRange); 4143 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4144 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4145 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4146 ValRange); 4147 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4148 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4149 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4150 ValRange); 4151 } else if (ID == ".amdhsa_next_free_vgpr") { 4152 VGPRRange = ValRange; 4153 NextFreeVGPR = Val; 4154 } else if (ID == ".amdhsa_next_free_sgpr") { 4155 SGPRRange = ValRange; 4156 NextFreeSGPR = Val; 4157 } else if (ID == ".amdhsa_reserve_vcc") { 4158 if (!isUInt<1>(Val)) 4159 return OutOfRangeError(ValRange); 4160 ReserveVCC = Val; 4161 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4162 if (IVersion.Major < 7) 4163 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4164 IDRange); 4165 if (!isUInt<1>(Val)) 4166 return OutOfRangeError(ValRange); 4167 ReserveFlatScr = Val; 4168 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4169 if (IVersion.Major < 8) 4170 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4171 IDRange); 4172 if (!isUInt<1>(Val)) 4173 return OutOfRangeError(ValRange); 4174 ReserveXNACK = Val; 4175 } else if (ID == ".amdhsa_float_round_mode_32") { 4176 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4177 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4178 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4179 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4180 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4181 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4182 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4183 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4184 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4185 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4186 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4187 ValRange); 4188 } else if (ID == ".amdhsa_dx10_clamp") { 4189 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4190 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4191 } else if (ID == ".amdhsa_ieee_mode") { 4192 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4193 Val, ValRange); 4194 } else if (ID == ".amdhsa_fp16_overflow") { 4195 if (IVersion.Major < 9) 4196 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4197 IDRange); 4198 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4199 ValRange); 4200 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4201 if (IVersion.Major < 10) 4202 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4203 IDRange); 4204 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4205 ValRange); 4206 } else if (ID == ".amdhsa_memory_ordered") { 4207 if (IVersion.Major < 10) 4208 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4209 IDRange); 4210 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4211 ValRange); 4212 } else if (ID == ".amdhsa_forward_progress") { 4213 if (IVersion.Major < 10) 4214 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4215 IDRange); 4216 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4217 ValRange); 4218 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4219 PARSE_BITS_ENTRY( 4220 KD.compute_pgm_rsrc2, 4221 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4222 ValRange); 4223 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4224 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4225 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4226 Val, ValRange); 4227 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4228 PARSE_BITS_ENTRY( 4229 KD.compute_pgm_rsrc2, 4230 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4231 ValRange); 4232 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4233 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4234 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4235 Val, ValRange); 4236 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4237 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4238 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4239 Val, ValRange); 4240 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4241 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4242 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4243 Val, ValRange); 4244 } else if (ID == ".amdhsa_exception_int_div_zero") { 4245 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4246 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4247 Val, ValRange); 4248 } else { 4249 return getParser().Error(IDRange.Start, 4250 "unknown .amdhsa_kernel directive", IDRange); 4251 } 4252 4253 #undef PARSE_BITS_ENTRY 4254 } 4255 4256 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4257 return TokError(".amdhsa_next_free_vgpr directive is required"); 4258 4259 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4260 return TokError(".amdhsa_next_free_sgpr directive is required"); 4261 4262 unsigned VGPRBlocks; 4263 unsigned SGPRBlocks; 4264 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4265 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4266 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4267 SGPRBlocks)) 4268 return true; 4269 4270 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4271 VGPRBlocks)) 4272 return OutOfRangeError(VGPRRange); 4273 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4274 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4275 4276 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4277 SGPRBlocks)) 4278 return OutOfRangeError(SGPRRange); 4279 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4280 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4281 SGPRBlocks); 4282 4283 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4284 return TokError("too many user SGPRs enabled"); 4285 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4286 UserSGPRCount); 4287 4288 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4289 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4290 ReserveFlatScr, ReserveXNACK); 4291 return false; 4292 } 4293 4294 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4295 uint32_t Major; 4296 uint32_t Minor; 4297 4298 if (ParseDirectiveMajorMinor(Major, Minor)) 4299 return true; 4300 4301 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4302 return false; 4303 } 4304 4305 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4306 uint32_t Major; 4307 uint32_t Minor; 4308 uint32_t Stepping; 4309 StringRef VendorName; 4310 StringRef ArchName; 4311 4312 // If this directive has no arguments, then use the ISA version for the 4313 // targeted GPU. 4314 if (getLexer().is(AsmToken::EndOfStatement)) { 4315 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4316 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4317 ISA.Stepping, 4318 "AMD", "AMDGPU"); 4319 return false; 4320 } 4321 4322 if (ParseDirectiveMajorMinor(Major, Minor)) 4323 return true; 4324 4325 if (getLexer().isNot(AsmToken::Comma)) 4326 return TokError("stepping version number required, comma expected"); 4327 Lex(); 4328 4329 if (ParseAsAbsoluteExpression(Stepping)) 4330 return TokError("invalid stepping version"); 4331 4332 if (getLexer().isNot(AsmToken::Comma)) 4333 return TokError("vendor name required, comma expected"); 4334 Lex(); 4335 4336 if (getLexer().isNot(AsmToken::String)) 4337 return TokError("invalid vendor name"); 4338 4339 VendorName = getLexer().getTok().getStringContents(); 4340 Lex(); 4341 4342 if (getLexer().isNot(AsmToken::Comma)) 4343 return TokError("arch name required, comma expected"); 4344 Lex(); 4345 4346 if (getLexer().isNot(AsmToken::String)) 4347 return TokError("invalid arch name"); 4348 4349 ArchName = getLexer().getTok().getStringContents(); 4350 Lex(); 4351 4352 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4353 VendorName, ArchName); 4354 return false; 4355 } 4356 4357 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4358 amd_kernel_code_t &Header) { 4359 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4360 // assembly for backwards compatibility. 4361 if (ID == "max_scratch_backing_memory_byte_size") { 4362 Parser.eatToEndOfStatement(); 4363 return false; 4364 } 4365 4366 SmallString<40> ErrStr; 4367 raw_svector_ostream Err(ErrStr); 4368 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4369 return TokError(Err.str()); 4370 } 4371 Lex(); 4372 4373 if (ID == "enable_wavefront_size32") { 4374 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4375 if (!isGFX10()) 4376 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4377 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4378 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4379 } else { 4380 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4381 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4382 } 4383 } 4384 4385 if (ID == "wavefront_size") { 4386 if (Header.wavefront_size == 5) { 4387 if (!isGFX10()) 4388 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4389 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4390 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4391 } else if (Header.wavefront_size == 6) { 4392 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4393 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4394 } 4395 } 4396 4397 if (ID == "enable_wgp_mode") { 4398 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4399 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4400 } 4401 4402 if (ID == "enable_mem_ordered") { 4403 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4404 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4405 } 4406 4407 if (ID == "enable_fwd_progress") { 4408 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4409 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4410 } 4411 4412 return false; 4413 } 4414 4415 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4416 amd_kernel_code_t Header; 4417 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4418 4419 while (true) { 4420 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4421 // will set the current token to EndOfStatement. 4422 while(getLexer().is(AsmToken::EndOfStatement)) 4423 Lex(); 4424 4425 if (getLexer().isNot(AsmToken::Identifier)) 4426 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4427 4428 StringRef ID = getLexer().getTok().getIdentifier(); 4429 Lex(); 4430 4431 if (ID == ".end_amd_kernel_code_t") 4432 break; 4433 4434 if (ParseAMDKernelCodeTValue(ID, Header)) 4435 return true; 4436 } 4437 4438 getTargetStreamer().EmitAMDKernelCodeT(Header); 4439 4440 return false; 4441 } 4442 4443 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4444 if (getLexer().isNot(AsmToken::Identifier)) 4445 return TokError("expected symbol name"); 4446 4447 StringRef KernelName = Parser.getTok().getString(); 4448 4449 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4450 ELF::STT_AMDGPU_HSA_KERNEL); 4451 Lex(); 4452 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4453 KernelScope.initialize(getContext()); 4454 return false; 4455 } 4456 4457 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4458 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4459 return Error(getParser().getTok().getLoc(), 4460 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4461 "architectures"); 4462 } 4463 4464 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4465 4466 std::string ISAVersionStringFromSTI; 4467 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4468 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4469 4470 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4471 return Error(getParser().getTok().getLoc(), 4472 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4473 "arguments specified through the command line"); 4474 } 4475 4476 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4477 Lex(); 4478 4479 return false; 4480 } 4481 4482 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4483 const char *AssemblerDirectiveBegin; 4484 const char *AssemblerDirectiveEnd; 4485 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4486 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4487 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4488 HSAMD::V3::AssemblerDirectiveEnd) 4489 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4490 HSAMD::AssemblerDirectiveEnd); 4491 4492 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4493 return Error(getParser().getTok().getLoc(), 4494 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4495 "not available on non-amdhsa OSes")).str()); 4496 } 4497 4498 std::string HSAMetadataString; 4499 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4500 HSAMetadataString)) 4501 return true; 4502 4503 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4504 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4505 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4506 } else { 4507 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4508 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4509 } 4510 4511 return false; 4512 } 4513 4514 /// Common code to parse out a block of text (typically YAML) between start and 4515 /// end directives. 4516 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4517 const char *AssemblerDirectiveEnd, 4518 std::string &CollectString) { 4519 4520 raw_string_ostream CollectStream(CollectString); 4521 4522 getLexer().setSkipSpace(false); 4523 4524 bool FoundEnd = false; 4525 while (!getLexer().is(AsmToken::Eof)) { 4526 while (getLexer().is(AsmToken::Space)) { 4527 CollectStream << getLexer().getTok().getString(); 4528 Lex(); 4529 } 4530 4531 if (getLexer().is(AsmToken::Identifier)) { 4532 StringRef ID = getLexer().getTok().getIdentifier(); 4533 if (ID == AssemblerDirectiveEnd) { 4534 Lex(); 4535 FoundEnd = true; 4536 break; 4537 } 4538 } 4539 4540 CollectStream << Parser.parseStringToEndOfStatement() 4541 << getContext().getAsmInfo()->getSeparatorString(); 4542 4543 Parser.eatToEndOfStatement(); 4544 } 4545 4546 getLexer().setSkipSpace(true); 4547 4548 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4549 return TokError(Twine("expected directive ") + 4550 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4551 } 4552 4553 CollectStream.flush(); 4554 return false; 4555 } 4556 4557 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4558 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4559 std::string String; 4560 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4561 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4562 return true; 4563 4564 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4565 if (!PALMetadata->setFromString(String)) 4566 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4567 return false; 4568 } 4569 4570 /// Parse the assembler directive for old linear-format PAL metadata. 4571 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4572 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4573 return Error(getParser().getTok().getLoc(), 4574 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4575 "not available on non-amdpal OSes")).str()); 4576 } 4577 4578 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4579 PALMetadata->setLegacy(); 4580 for (;;) { 4581 uint32_t Key, Value; 4582 if (ParseAsAbsoluteExpression(Key)) { 4583 return TokError(Twine("invalid value in ") + 4584 Twine(PALMD::AssemblerDirective)); 4585 } 4586 if (getLexer().isNot(AsmToken::Comma)) { 4587 return TokError(Twine("expected an even number of values in ") + 4588 Twine(PALMD::AssemblerDirective)); 4589 } 4590 Lex(); 4591 if (ParseAsAbsoluteExpression(Value)) { 4592 return TokError(Twine("invalid value in ") + 4593 Twine(PALMD::AssemblerDirective)); 4594 } 4595 PALMetadata->setRegister(Key, Value); 4596 if (getLexer().isNot(AsmToken::Comma)) 4597 break; 4598 Lex(); 4599 } 4600 return false; 4601 } 4602 4603 /// ParseDirectiveAMDGPULDS 4604 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4605 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4606 if (getParser().checkForValidSection()) 4607 return true; 4608 4609 StringRef Name; 4610 SMLoc NameLoc = getLexer().getLoc(); 4611 if (getParser().parseIdentifier(Name)) 4612 return TokError("expected identifier in directive"); 4613 4614 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4615 if (parseToken(AsmToken::Comma, "expected ','")) 4616 return true; 4617 4618 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4619 4620 int64_t Size; 4621 SMLoc SizeLoc = getLexer().getLoc(); 4622 if (getParser().parseAbsoluteExpression(Size)) 4623 return true; 4624 if (Size < 0) 4625 return Error(SizeLoc, "size must be non-negative"); 4626 if (Size > LocalMemorySize) 4627 return Error(SizeLoc, "size is too large"); 4628 4629 int64_t Alignment = 4; 4630 if (getLexer().is(AsmToken::Comma)) { 4631 Lex(); 4632 SMLoc AlignLoc = getLexer().getLoc(); 4633 if (getParser().parseAbsoluteExpression(Alignment)) 4634 return true; 4635 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4636 return Error(AlignLoc, "alignment must be a power of two"); 4637 4638 // Alignment larger than the size of LDS is possible in theory, as long 4639 // as the linker manages to place to symbol at address 0, but we do want 4640 // to make sure the alignment fits nicely into a 32-bit integer. 4641 if (Alignment >= 1u << 31) 4642 return Error(AlignLoc, "alignment is too large"); 4643 } 4644 4645 if (parseToken(AsmToken::EndOfStatement, 4646 "unexpected token in '.amdgpu_lds' directive")) 4647 return true; 4648 4649 Symbol->redefineIfPossible(); 4650 if (!Symbol->isUndefined()) 4651 return Error(NameLoc, "invalid symbol redefinition"); 4652 4653 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4654 return false; 4655 } 4656 4657 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4658 StringRef IDVal = DirectiveID.getString(); 4659 4660 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4661 if (IDVal == ".amdgcn_target") 4662 return ParseDirectiveAMDGCNTarget(); 4663 4664 if (IDVal == ".amdhsa_kernel") 4665 return ParseDirectiveAMDHSAKernel(); 4666 4667 // TODO: Restructure/combine with PAL metadata directive. 4668 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4669 return ParseDirectiveHSAMetadata(); 4670 } else { 4671 if (IDVal == ".hsa_code_object_version") 4672 return ParseDirectiveHSACodeObjectVersion(); 4673 4674 if (IDVal == ".hsa_code_object_isa") 4675 return ParseDirectiveHSACodeObjectISA(); 4676 4677 if (IDVal == ".amd_kernel_code_t") 4678 return ParseDirectiveAMDKernelCodeT(); 4679 4680 if (IDVal == ".amdgpu_hsa_kernel") 4681 return ParseDirectiveAMDGPUHsaKernel(); 4682 4683 if (IDVal == ".amd_amdgpu_isa") 4684 return ParseDirectiveISAVersion(); 4685 4686 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4687 return ParseDirectiveHSAMetadata(); 4688 } 4689 4690 if (IDVal == ".amdgpu_lds") 4691 return ParseDirectiveAMDGPULDS(); 4692 4693 if (IDVal == PALMD::AssemblerDirectiveBegin) 4694 return ParseDirectivePALMetadataBegin(); 4695 4696 if (IDVal == PALMD::AssemblerDirective) 4697 return ParseDirectivePALMetadata(); 4698 4699 return true; 4700 } 4701 4702 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4703 unsigned RegNo) const { 4704 4705 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4706 R.isValid(); ++R) { 4707 if (*R == RegNo) 4708 return isGFX9Plus(); 4709 } 4710 4711 // GFX10 has 2 more SGPRs 104 and 105. 4712 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4713 R.isValid(); ++R) { 4714 if (*R == RegNo) 4715 return hasSGPR104_SGPR105(); 4716 } 4717 4718 switch (RegNo) { 4719 case AMDGPU::SRC_SHARED_BASE: 4720 case AMDGPU::SRC_SHARED_LIMIT: 4721 case AMDGPU::SRC_PRIVATE_BASE: 4722 case AMDGPU::SRC_PRIVATE_LIMIT: 4723 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4724 return !isCI() && !isSI() && !isVI(); 4725 case AMDGPU::TBA: 4726 case AMDGPU::TBA_LO: 4727 case AMDGPU::TBA_HI: 4728 case AMDGPU::TMA: 4729 case AMDGPU::TMA_LO: 4730 case AMDGPU::TMA_HI: 4731 return !isGFX9() && !isGFX10(); 4732 case AMDGPU::XNACK_MASK: 4733 case AMDGPU::XNACK_MASK_LO: 4734 case AMDGPU::XNACK_MASK_HI: 4735 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4736 case AMDGPU::SGPR_NULL: 4737 return isGFX10(); 4738 default: 4739 break; 4740 } 4741 4742 if (isCI()) 4743 return true; 4744 4745 if (isSI() || isGFX10()) { 4746 // No flat_scr on SI. 4747 // On GFX10 flat scratch is not a valid register operand and can only be 4748 // accessed with s_setreg/s_getreg. 4749 switch (RegNo) { 4750 case AMDGPU::FLAT_SCR: 4751 case AMDGPU::FLAT_SCR_LO: 4752 case AMDGPU::FLAT_SCR_HI: 4753 return false; 4754 default: 4755 return true; 4756 } 4757 } 4758 4759 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4760 // SI/CI have. 4761 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4762 R.isValid(); ++R) { 4763 if (*R == RegNo) 4764 return hasSGPR102_SGPR103(); 4765 } 4766 4767 return true; 4768 } 4769 4770 OperandMatchResultTy 4771 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4772 OperandMode Mode) { 4773 // Try to parse with a custom parser 4774 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4775 4776 // If we successfully parsed the operand or if there as an error parsing, 4777 // we are done. 4778 // 4779 // If we are parsing after we reach EndOfStatement then this means we 4780 // are appending default values to the Operands list. This is only done 4781 // by custom parser, so we shouldn't continue on to the generic parsing. 4782 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4783 getLexer().is(AsmToken::EndOfStatement)) 4784 return ResTy; 4785 4786 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4787 unsigned Prefix = Operands.size(); 4788 SMLoc LBraceLoc = getTok().getLoc(); 4789 Parser.Lex(); // eat the '[' 4790 4791 for (;;) { 4792 ResTy = parseReg(Operands); 4793 if (ResTy != MatchOperand_Success) 4794 return ResTy; 4795 4796 if (getLexer().is(AsmToken::RBrac)) 4797 break; 4798 4799 if (getLexer().isNot(AsmToken::Comma)) 4800 return MatchOperand_ParseFail; 4801 Parser.Lex(); 4802 } 4803 4804 if (Operands.size() - Prefix > 1) { 4805 Operands.insert(Operands.begin() + Prefix, 4806 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4807 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4808 getTok().getLoc())); 4809 } 4810 4811 Parser.Lex(); // eat the ']' 4812 return MatchOperand_Success; 4813 } 4814 4815 return parseRegOrImm(Operands); 4816 } 4817 4818 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4819 // Clear any forced encodings from the previous instruction. 4820 setForcedEncodingSize(0); 4821 setForcedDPP(false); 4822 setForcedSDWA(false); 4823 4824 if (Name.endswith("_e64")) { 4825 setForcedEncodingSize(64); 4826 return Name.substr(0, Name.size() - 4); 4827 } else if (Name.endswith("_e32")) { 4828 setForcedEncodingSize(32); 4829 return Name.substr(0, Name.size() - 4); 4830 } else if (Name.endswith("_dpp")) { 4831 setForcedDPP(true); 4832 return Name.substr(0, Name.size() - 4); 4833 } else if (Name.endswith("_sdwa")) { 4834 setForcedSDWA(true); 4835 return Name.substr(0, Name.size() - 5); 4836 } 4837 return Name; 4838 } 4839 4840 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4841 StringRef Name, 4842 SMLoc NameLoc, OperandVector &Operands) { 4843 // Add the instruction mnemonic 4844 Name = parseMnemonicSuffix(Name); 4845 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4846 4847 bool IsMIMG = Name.startswith("image_"); 4848 4849 while (!getLexer().is(AsmToken::EndOfStatement)) { 4850 OperandMode Mode = OperandMode_Default; 4851 if (IsMIMG && isGFX10() && Operands.size() == 2) 4852 Mode = OperandMode_NSA; 4853 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4854 4855 // Eat the comma or space if there is one. 4856 if (getLexer().is(AsmToken::Comma)) 4857 Parser.Lex(); 4858 4859 if (Res != MatchOperand_Success) { 4860 checkUnsupportedInstruction(Name, NameLoc); 4861 if (!Parser.hasPendingError()) { 4862 // FIXME: use real operand location rather than the current location. 4863 StringRef Msg = 4864 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4865 "not a valid operand."; 4866 Error(getLexer().getLoc(), Msg); 4867 } 4868 while (!getLexer().is(AsmToken::EndOfStatement)) { 4869 Parser.Lex(); 4870 } 4871 return true; 4872 } 4873 } 4874 4875 return false; 4876 } 4877 4878 //===----------------------------------------------------------------------===// 4879 // Utility functions 4880 //===----------------------------------------------------------------------===// 4881 4882 OperandMatchResultTy 4883 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4884 4885 if (!trySkipId(Prefix, AsmToken::Colon)) 4886 return MatchOperand_NoMatch; 4887 4888 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4889 } 4890 4891 OperandMatchResultTy 4892 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4893 AMDGPUOperand::ImmTy ImmTy, 4894 bool (*ConvertResult)(int64_t&)) { 4895 SMLoc S = getLoc(); 4896 int64_t Value = 0; 4897 4898 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4899 if (Res != MatchOperand_Success) 4900 return Res; 4901 4902 if (ConvertResult && !ConvertResult(Value)) { 4903 Error(S, "invalid " + StringRef(Prefix) + " value."); 4904 } 4905 4906 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4907 return MatchOperand_Success; 4908 } 4909 4910 OperandMatchResultTy 4911 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4912 OperandVector &Operands, 4913 AMDGPUOperand::ImmTy ImmTy, 4914 bool (*ConvertResult)(int64_t&)) { 4915 SMLoc S = getLoc(); 4916 if (!trySkipId(Prefix, AsmToken::Colon)) 4917 return MatchOperand_NoMatch; 4918 4919 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4920 return MatchOperand_ParseFail; 4921 4922 unsigned Val = 0; 4923 const unsigned MaxSize = 4; 4924 4925 // FIXME: How to verify the number of elements matches the number of src 4926 // operands? 4927 for (int I = 0; ; ++I) { 4928 int64_t Op; 4929 SMLoc Loc = getLoc(); 4930 if (!parseExpr(Op)) 4931 return MatchOperand_ParseFail; 4932 4933 if (Op != 0 && Op != 1) { 4934 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4935 return MatchOperand_ParseFail; 4936 } 4937 4938 Val |= (Op << I); 4939 4940 if (trySkipToken(AsmToken::RBrac)) 4941 break; 4942 4943 if (I + 1 == MaxSize) { 4944 Error(getLoc(), "expected a closing square bracket"); 4945 return MatchOperand_ParseFail; 4946 } 4947 4948 if (!skipToken(AsmToken::Comma, "expected a comma")) 4949 return MatchOperand_ParseFail; 4950 } 4951 4952 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4953 return MatchOperand_Success; 4954 } 4955 4956 OperandMatchResultTy 4957 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4958 AMDGPUOperand::ImmTy ImmTy) { 4959 int64_t Bit = 0; 4960 SMLoc S = Parser.getTok().getLoc(); 4961 4962 // We are at the end of the statement, and this is a default argument, so 4963 // use a default value. 4964 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4965 switch(getLexer().getKind()) { 4966 case AsmToken::Identifier: { 4967 StringRef Tok = Parser.getTok().getString(); 4968 if (Tok == Name) { 4969 if (Tok == "r128" && !hasMIMG_R128()) 4970 Error(S, "r128 modifier is not supported on this GPU"); 4971 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4972 Error(S, "a16 modifier is not supported on this GPU"); 4973 Bit = 1; 4974 Parser.Lex(); 4975 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4976 Bit = 0; 4977 Parser.Lex(); 4978 } else { 4979 return MatchOperand_NoMatch; 4980 } 4981 break; 4982 } 4983 default: 4984 return MatchOperand_NoMatch; 4985 } 4986 } 4987 4988 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4989 return MatchOperand_ParseFail; 4990 4991 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4992 ImmTy = AMDGPUOperand::ImmTyR128A16; 4993 4994 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4995 return MatchOperand_Success; 4996 } 4997 4998 static void addOptionalImmOperand( 4999 MCInst& Inst, const OperandVector& Operands, 5000 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5001 AMDGPUOperand::ImmTy ImmT, 5002 int64_t Default = 0) { 5003 auto i = OptionalIdx.find(ImmT); 5004 if (i != OptionalIdx.end()) { 5005 unsigned Idx = i->second; 5006 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5007 } else { 5008 Inst.addOperand(MCOperand::createImm(Default)); 5009 } 5010 } 5011 5012 OperandMatchResultTy 5013 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5014 if (getLexer().isNot(AsmToken::Identifier)) { 5015 return MatchOperand_NoMatch; 5016 } 5017 StringRef Tok = Parser.getTok().getString(); 5018 if (Tok != Prefix) { 5019 return MatchOperand_NoMatch; 5020 } 5021 5022 Parser.Lex(); 5023 if (getLexer().isNot(AsmToken::Colon)) { 5024 return MatchOperand_ParseFail; 5025 } 5026 5027 Parser.Lex(); 5028 if (getLexer().isNot(AsmToken::Identifier)) { 5029 return MatchOperand_ParseFail; 5030 } 5031 5032 Value = Parser.getTok().getString(); 5033 return MatchOperand_Success; 5034 } 5035 5036 //===----------------------------------------------------------------------===// 5037 // MTBUF format 5038 //===----------------------------------------------------------------------===// 5039 5040 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5041 int64_t MaxVal, 5042 int64_t &Fmt) { 5043 int64_t Val; 5044 SMLoc Loc = getLoc(); 5045 5046 auto Res = parseIntWithPrefix(Pref, Val); 5047 if (Res == MatchOperand_ParseFail) 5048 return false; 5049 if (Res == MatchOperand_NoMatch) 5050 return true; 5051 5052 if (Val < 0 || Val > MaxVal) { 5053 Error(Loc, Twine("out of range ", StringRef(Pref))); 5054 return false; 5055 } 5056 5057 Fmt = Val; 5058 return true; 5059 } 5060 5061 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5062 // values to live in a joint format operand in the MCInst encoding. 5063 OperandMatchResultTy 5064 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5065 using namespace llvm::AMDGPU::MTBUFFormat; 5066 5067 int64_t Dfmt = DFMT_UNDEF; 5068 int64_t Nfmt = NFMT_UNDEF; 5069 5070 // dfmt and nfmt can appear in either order, and each is optional. 5071 for (int I = 0; I < 2; ++I) { 5072 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5073 return MatchOperand_ParseFail; 5074 5075 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5076 return MatchOperand_ParseFail; 5077 } 5078 // Skip optional comma between dfmt/nfmt 5079 // but guard against 2 commas following each other. 5080 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5081 !peekToken().is(AsmToken::Comma)) { 5082 trySkipToken(AsmToken::Comma); 5083 } 5084 } 5085 5086 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5087 return MatchOperand_NoMatch; 5088 5089 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5090 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5091 5092 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5093 return MatchOperand_Success; 5094 } 5095 5096 OperandMatchResultTy 5097 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5098 using namespace llvm::AMDGPU::MTBUFFormat; 5099 5100 int64_t Fmt = UFMT_UNDEF; 5101 5102 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5103 return MatchOperand_ParseFail; 5104 5105 if (Fmt == UFMT_UNDEF) 5106 return MatchOperand_NoMatch; 5107 5108 Format = Fmt; 5109 return MatchOperand_Success; 5110 } 5111 5112 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5113 int64_t &Nfmt, 5114 StringRef FormatStr, 5115 SMLoc Loc) { 5116 using namespace llvm::AMDGPU::MTBUFFormat; 5117 int64_t Format; 5118 5119 Format = getDfmt(FormatStr); 5120 if (Format != DFMT_UNDEF) { 5121 Dfmt = Format; 5122 return true; 5123 } 5124 5125 Format = getNfmt(FormatStr, getSTI()); 5126 if (Format != NFMT_UNDEF) { 5127 Nfmt = Format; 5128 return true; 5129 } 5130 5131 Error(Loc, "unsupported format"); 5132 return false; 5133 } 5134 5135 OperandMatchResultTy 5136 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5137 SMLoc FormatLoc, 5138 int64_t &Format) { 5139 using namespace llvm::AMDGPU::MTBUFFormat; 5140 5141 int64_t Dfmt = DFMT_UNDEF; 5142 int64_t Nfmt = NFMT_UNDEF; 5143 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5144 return MatchOperand_ParseFail; 5145 5146 if (trySkipToken(AsmToken::Comma)) { 5147 StringRef Str; 5148 SMLoc Loc = getLoc(); 5149 if (!parseId(Str, "expected a format string") || 5150 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5151 return MatchOperand_ParseFail; 5152 } 5153 if (Dfmt == DFMT_UNDEF) { 5154 Error(Loc, "duplicate numeric format"); 5155 return MatchOperand_ParseFail; 5156 } else if (Nfmt == NFMT_UNDEF) { 5157 Error(Loc, "duplicate data format"); 5158 return MatchOperand_ParseFail; 5159 } 5160 } 5161 5162 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5163 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5164 5165 if (isGFX10()) { 5166 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5167 if (Ufmt == UFMT_UNDEF) { 5168 Error(FormatLoc, "unsupported format"); 5169 return MatchOperand_ParseFail; 5170 } 5171 Format = Ufmt; 5172 } else { 5173 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5174 } 5175 5176 return MatchOperand_Success; 5177 } 5178 5179 OperandMatchResultTy 5180 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5181 SMLoc Loc, 5182 int64_t &Format) { 5183 using namespace llvm::AMDGPU::MTBUFFormat; 5184 5185 auto Id = getUnifiedFormat(FormatStr); 5186 if (Id == UFMT_UNDEF) 5187 return MatchOperand_NoMatch; 5188 5189 if (!isGFX10()) { 5190 Error(Loc, "unified format is not supported on this GPU"); 5191 return MatchOperand_ParseFail; 5192 } 5193 5194 Format = Id; 5195 return MatchOperand_Success; 5196 } 5197 5198 OperandMatchResultTy 5199 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5200 using namespace llvm::AMDGPU::MTBUFFormat; 5201 SMLoc Loc = getLoc(); 5202 5203 if (!parseExpr(Format)) 5204 return MatchOperand_ParseFail; 5205 if (!isValidFormatEncoding(Format, getSTI())) { 5206 Error(Loc, "out of range format"); 5207 return MatchOperand_ParseFail; 5208 } 5209 5210 return MatchOperand_Success; 5211 } 5212 5213 OperandMatchResultTy 5214 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5215 using namespace llvm::AMDGPU::MTBUFFormat; 5216 5217 if (!trySkipId("format", AsmToken::Colon)) 5218 return MatchOperand_NoMatch; 5219 5220 if (trySkipToken(AsmToken::LBrac)) { 5221 StringRef FormatStr; 5222 SMLoc Loc = getLoc(); 5223 if (!parseId(FormatStr, "expected a format string")) 5224 return MatchOperand_ParseFail; 5225 5226 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5227 if (Res == MatchOperand_NoMatch) 5228 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5229 if (Res != MatchOperand_Success) 5230 return Res; 5231 5232 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5233 return MatchOperand_ParseFail; 5234 5235 return MatchOperand_Success; 5236 } 5237 5238 return parseNumericFormat(Format); 5239 } 5240 5241 OperandMatchResultTy 5242 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5243 using namespace llvm::AMDGPU::MTBUFFormat; 5244 5245 int64_t Format = getDefaultFormatEncoding(getSTI()); 5246 OperandMatchResultTy Res; 5247 SMLoc Loc = getLoc(); 5248 5249 // Parse legacy format syntax. 5250 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5251 if (Res == MatchOperand_ParseFail) 5252 return Res; 5253 5254 bool FormatFound = (Res == MatchOperand_Success); 5255 5256 Operands.push_back( 5257 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5258 5259 if (FormatFound) 5260 trySkipToken(AsmToken::Comma); 5261 5262 if (isToken(AsmToken::EndOfStatement)) { 5263 // We are expecting an soffset operand, 5264 // but let matcher handle the error. 5265 return MatchOperand_Success; 5266 } 5267 5268 // Parse soffset. 5269 Res = parseRegOrImm(Operands); 5270 if (Res != MatchOperand_Success) 5271 return Res; 5272 5273 trySkipToken(AsmToken::Comma); 5274 5275 if (!FormatFound) { 5276 Res = parseSymbolicOrNumericFormat(Format); 5277 if (Res == MatchOperand_ParseFail) 5278 return Res; 5279 if (Res == MatchOperand_Success) { 5280 auto Size = Operands.size(); 5281 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5282 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5283 Op.setImm(Format); 5284 } 5285 return MatchOperand_Success; 5286 } 5287 5288 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5289 Error(getLoc(), "duplicate format"); 5290 return MatchOperand_ParseFail; 5291 } 5292 return MatchOperand_Success; 5293 } 5294 5295 //===----------------------------------------------------------------------===// 5296 // ds 5297 //===----------------------------------------------------------------------===// 5298 5299 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5300 const OperandVector &Operands) { 5301 OptionalImmIndexMap OptionalIdx; 5302 5303 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5305 5306 // Add the register arguments 5307 if (Op.isReg()) { 5308 Op.addRegOperands(Inst, 1); 5309 continue; 5310 } 5311 5312 // Handle optional arguments 5313 OptionalIdx[Op.getImmTy()] = i; 5314 } 5315 5316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5317 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5318 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5319 5320 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5321 } 5322 5323 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5324 bool IsGdsHardcoded) { 5325 OptionalImmIndexMap OptionalIdx; 5326 5327 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5328 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5329 5330 // Add the register arguments 5331 if (Op.isReg()) { 5332 Op.addRegOperands(Inst, 1); 5333 continue; 5334 } 5335 5336 if (Op.isToken() && Op.getToken() == "gds") { 5337 IsGdsHardcoded = true; 5338 continue; 5339 } 5340 5341 // Handle optional arguments 5342 OptionalIdx[Op.getImmTy()] = i; 5343 } 5344 5345 AMDGPUOperand::ImmTy OffsetType = 5346 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5347 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5348 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5349 AMDGPUOperand::ImmTyOffset; 5350 5351 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5352 5353 if (!IsGdsHardcoded) { 5354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5355 } 5356 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5357 } 5358 5359 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5360 OptionalImmIndexMap OptionalIdx; 5361 5362 unsigned OperandIdx[4]; 5363 unsigned EnMask = 0; 5364 int SrcIdx = 0; 5365 5366 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5367 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5368 5369 // Add the register arguments 5370 if (Op.isReg()) { 5371 assert(SrcIdx < 4); 5372 OperandIdx[SrcIdx] = Inst.size(); 5373 Op.addRegOperands(Inst, 1); 5374 ++SrcIdx; 5375 continue; 5376 } 5377 5378 if (Op.isOff()) { 5379 assert(SrcIdx < 4); 5380 OperandIdx[SrcIdx] = Inst.size(); 5381 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5382 ++SrcIdx; 5383 continue; 5384 } 5385 5386 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5387 Op.addImmOperands(Inst, 1); 5388 continue; 5389 } 5390 5391 if (Op.isToken() && Op.getToken() == "done") 5392 continue; 5393 5394 // Handle optional arguments 5395 OptionalIdx[Op.getImmTy()] = i; 5396 } 5397 5398 assert(SrcIdx == 4); 5399 5400 bool Compr = false; 5401 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5402 Compr = true; 5403 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5404 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5405 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5406 } 5407 5408 for (auto i = 0; i < SrcIdx; ++i) { 5409 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5410 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5411 } 5412 } 5413 5414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5416 5417 Inst.addOperand(MCOperand::createImm(EnMask)); 5418 } 5419 5420 //===----------------------------------------------------------------------===// 5421 // s_waitcnt 5422 //===----------------------------------------------------------------------===// 5423 5424 static bool 5425 encodeCnt( 5426 const AMDGPU::IsaVersion ISA, 5427 int64_t &IntVal, 5428 int64_t CntVal, 5429 bool Saturate, 5430 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5431 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5432 { 5433 bool Failed = false; 5434 5435 IntVal = encode(ISA, IntVal, CntVal); 5436 if (CntVal != decode(ISA, IntVal)) { 5437 if (Saturate) { 5438 IntVal = encode(ISA, IntVal, -1); 5439 } else { 5440 Failed = true; 5441 } 5442 } 5443 return Failed; 5444 } 5445 5446 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5447 5448 SMLoc CntLoc = getLoc(); 5449 StringRef CntName = getTokenStr(); 5450 5451 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5452 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5453 return false; 5454 5455 int64_t CntVal; 5456 SMLoc ValLoc = getLoc(); 5457 if (!parseExpr(CntVal)) 5458 return false; 5459 5460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5461 5462 bool Failed = true; 5463 bool Sat = CntName.endswith("_sat"); 5464 5465 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5466 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5467 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5468 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5469 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5470 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5471 } else { 5472 Error(CntLoc, "invalid counter name " + CntName); 5473 return false; 5474 } 5475 5476 if (Failed) { 5477 Error(ValLoc, "too large value for " + CntName); 5478 return false; 5479 } 5480 5481 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5482 return false; 5483 5484 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5485 if (isToken(AsmToken::EndOfStatement)) { 5486 Error(getLoc(), "expected a counter name"); 5487 return false; 5488 } 5489 } 5490 5491 return true; 5492 } 5493 5494 OperandMatchResultTy 5495 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5496 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5497 int64_t Waitcnt = getWaitcntBitMask(ISA); 5498 SMLoc S = getLoc(); 5499 5500 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5501 while (!isToken(AsmToken::EndOfStatement)) { 5502 if (!parseCnt(Waitcnt)) 5503 return MatchOperand_ParseFail; 5504 } 5505 } else { 5506 if (!parseExpr(Waitcnt)) 5507 return MatchOperand_ParseFail; 5508 } 5509 5510 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5511 return MatchOperand_Success; 5512 } 5513 5514 bool 5515 AMDGPUOperand::isSWaitCnt() const { 5516 return isImm(); 5517 } 5518 5519 //===----------------------------------------------------------------------===// 5520 // hwreg 5521 //===----------------------------------------------------------------------===// 5522 5523 bool 5524 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5525 int64_t &Offset, 5526 int64_t &Width) { 5527 using namespace llvm::AMDGPU::Hwreg; 5528 5529 // The register may be specified by name or using a numeric code 5530 if (isToken(AsmToken::Identifier) && 5531 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5532 HwReg.IsSymbolic = true; 5533 lex(); // skip message name 5534 } else if (!parseExpr(HwReg.Id)) { 5535 return false; 5536 } 5537 5538 if (trySkipToken(AsmToken::RParen)) 5539 return true; 5540 5541 // parse optional params 5542 return 5543 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5544 parseExpr(Offset) && 5545 skipToken(AsmToken::Comma, "expected a comma") && 5546 parseExpr(Width) && 5547 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5548 } 5549 5550 bool 5551 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5552 const int64_t Offset, 5553 const int64_t Width, 5554 const SMLoc Loc) { 5555 5556 using namespace llvm::AMDGPU::Hwreg; 5557 5558 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5559 Error(Loc, "specified hardware register is not supported on this GPU"); 5560 return false; 5561 } else if (!isValidHwreg(HwReg.Id)) { 5562 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5563 return false; 5564 } else if (!isValidHwregOffset(Offset)) { 5565 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5566 return false; 5567 } else if (!isValidHwregWidth(Width)) { 5568 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5569 return false; 5570 } 5571 return true; 5572 } 5573 5574 OperandMatchResultTy 5575 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5576 using namespace llvm::AMDGPU::Hwreg; 5577 5578 int64_t ImmVal = 0; 5579 SMLoc Loc = getLoc(); 5580 5581 if (trySkipId("hwreg", AsmToken::LParen)) { 5582 OperandInfoTy HwReg(ID_UNKNOWN_); 5583 int64_t Offset = OFFSET_DEFAULT_; 5584 int64_t Width = WIDTH_DEFAULT_; 5585 if (parseHwregBody(HwReg, Offset, Width) && 5586 validateHwreg(HwReg, Offset, Width, Loc)) { 5587 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5588 } else { 5589 return MatchOperand_ParseFail; 5590 } 5591 } else if (parseExpr(ImmVal)) { 5592 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5593 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5594 return MatchOperand_ParseFail; 5595 } 5596 } else { 5597 return MatchOperand_ParseFail; 5598 } 5599 5600 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5601 return MatchOperand_Success; 5602 } 5603 5604 bool AMDGPUOperand::isHwreg() const { 5605 return isImmTy(ImmTyHwreg); 5606 } 5607 5608 //===----------------------------------------------------------------------===// 5609 // sendmsg 5610 //===----------------------------------------------------------------------===// 5611 5612 bool 5613 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5614 OperandInfoTy &Op, 5615 OperandInfoTy &Stream) { 5616 using namespace llvm::AMDGPU::SendMsg; 5617 5618 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5619 Msg.IsSymbolic = true; 5620 lex(); // skip message name 5621 } else if (!parseExpr(Msg.Id)) { 5622 return false; 5623 } 5624 5625 if (trySkipToken(AsmToken::Comma)) { 5626 Op.IsDefined = true; 5627 if (isToken(AsmToken::Identifier) && 5628 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5629 lex(); // skip operation name 5630 } else if (!parseExpr(Op.Id)) { 5631 return false; 5632 } 5633 5634 if (trySkipToken(AsmToken::Comma)) { 5635 Stream.IsDefined = true; 5636 if (!parseExpr(Stream.Id)) 5637 return false; 5638 } 5639 } 5640 5641 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5642 } 5643 5644 bool 5645 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5646 const OperandInfoTy &Op, 5647 const OperandInfoTy &Stream, 5648 const SMLoc S) { 5649 using namespace llvm::AMDGPU::SendMsg; 5650 5651 // Validation strictness depends on whether message is specified 5652 // in a symbolc or in a numeric form. In the latter case 5653 // only encoding possibility is checked. 5654 bool Strict = Msg.IsSymbolic; 5655 5656 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5657 Error(S, "invalid message id"); 5658 return false; 5659 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5660 Error(S, Op.IsDefined ? 5661 "message does not support operations" : 5662 "missing message operation"); 5663 return false; 5664 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5665 Error(S, "invalid operation id"); 5666 return false; 5667 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5668 Error(S, "message operation does not support streams"); 5669 return false; 5670 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5671 Error(S, "invalid message stream id"); 5672 return false; 5673 } 5674 return true; 5675 } 5676 5677 OperandMatchResultTy 5678 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5679 using namespace llvm::AMDGPU::SendMsg; 5680 5681 int64_t ImmVal = 0; 5682 SMLoc Loc = getLoc(); 5683 5684 if (trySkipId("sendmsg", AsmToken::LParen)) { 5685 OperandInfoTy Msg(ID_UNKNOWN_); 5686 OperandInfoTy Op(OP_NONE_); 5687 OperandInfoTy Stream(STREAM_ID_NONE_); 5688 if (parseSendMsgBody(Msg, Op, Stream) && 5689 validateSendMsg(Msg, Op, Stream, Loc)) { 5690 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5691 } else { 5692 return MatchOperand_ParseFail; 5693 } 5694 } else if (parseExpr(ImmVal)) { 5695 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5696 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5697 return MatchOperand_ParseFail; 5698 } 5699 } else { 5700 return MatchOperand_ParseFail; 5701 } 5702 5703 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5704 return MatchOperand_Success; 5705 } 5706 5707 bool AMDGPUOperand::isSendMsg() const { 5708 return isImmTy(ImmTySendMsg); 5709 } 5710 5711 //===----------------------------------------------------------------------===// 5712 // v_interp 5713 //===----------------------------------------------------------------------===// 5714 5715 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5716 if (getLexer().getKind() != AsmToken::Identifier) 5717 return MatchOperand_NoMatch; 5718 5719 StringRef Str = Parser.getTok().getString(); 5720 int Slot = StringSwitch<int>(Str) 5721 .Case("p10", 0) 5722 .Case("p20", 1) 5723 .Case("p0", 2) 5724 .Default(-1); 5725 5726 SMLoc S = Parser.getTok().getLoc(); 5727 if (Slot == -1) 5728 return MatchOperand_ParseFail; 5729 5730 Parser.Lex(); 5731 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5732 AMDGPUOperand::ImmTyInterpSlot)); 5733 return MatchOperand_Success; 5734 } 5735 5736 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5737 if (getLexer().getKind() != AsmToken::Identifier) 5738 return MatchOperand_NoMatch; 5739 5740 StringRef Str = Parser.getTok().getString(); 5741 if (!Str.startswith("attr")) 5742 return MatchOperand_NoMatch; 5743 5744 StringRef Chan = Str.take_back(2); 5745 int AttrChan = StringSwitch<int>(Chan) 5746 .Case(".x", 0) 5747 .Case(".y", 1) 5748 .Case(".z", 2) 5749 .Case(".w", 3) 5750 .Default(-1); 5751 if (AttrChan == -1) 5752 return MatchOperand_ParseFail; 5753 5754 Str = Str.drop_back(2).drop_front(4); 5755 5756 uint8_t Attr; 5757 if (Str.getAsInteger(10, Attr)) 5758 return MatchOperand_ParseFail; 5759 5760 SMLoc S = Parser.getTok().getLoc(); 5761 Parser.Lex(); 5762 if (Attr > 63) { 5763 Error(S, "out of bounds attr"); 5764 return MatchOperand_ParseFail; 5765 } 5766 5767 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5768 5769 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5770 AMDGPUOperand::ImmTyInterpAttr)); 5771 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5772 AMDGPUOperand::ImmTyAttrChan)); 5773 return MatchOperand_Success; 5774 } 5775 5776 //===----------------------------------------------------------------------===// 5777 // exp 5778 //===----------------------------------------------------------------------===// 5779 5780 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5781 uint8_t &Val) { 5782 if (Str == "null") { 5783 Val = 9; 5784 return MatchOperand_Success; 5785 } 5786 5787 if (Str.startswith("mrt")) { 5788 Str = Str.drop_front(3); 5789 if (Str == "z") { // == mrtz 5790 Val = 8; 5791 return MatchOperand_Success; 5792 } 5793 5794 if (Str.getAsInteger(10, Val)) 5795 return MatchOperand_ParseFail; 5796 5797 if (Val > 7) { 5798 Error(getLoc(), "invalid exp target"); 5799 return MatchOperand_ParseFail; 5800 } 5801 5802 return MatchOperand_Success; 5803 } 5804 5805 if (Str.startswith("pos")) { 5806 Str = Str.drop_front(3); 5807 if (Str.getAsInteger(10, Val)) 5808 return MatchOperand_ParseFail; 5809 5810 if (Val > 4 || (Val == 4 && !isGFX10())) { 5811 Error(getLoc(), "invalid exp target"); 5812 return MatchOperand_ParseFail; 5813 } 5814 5815 Val += 12; 5816 return MatchOperand_Success; 5817 } 5818 5819 if (isGFX10() && Str == "prim") { 5820 Val = 20; 5821 return MatchOperand_Success; 5822 } 5823 5824 if (Str.startswith("param")) { 5825 Str = Str.drop_front(5); 5826 if (Str.getAsInteger(10, Val)) 5827 return MatchOperand_ParseFail; 5828 5829 if (Val >= 32) { 5830 Error(getLoc(), "invalid exp target"); 5831 return MatchOperand_ParseFail; 5832 } 5833 5834 Val += 32; 5835 return MatchOperand_Success; 5836 } 5837 5838 if (Str.startswith("invalid_target_")) { 5839 Str = Str.drop_front(15); 5840 if (Str.getAsInteger(10, Val)) 5841 return MatchOperand_ParseFail; 5842 5843 Error(getLoc(), "invalid exp target"); 5844 return MatchOperand_ParseFail; 5845 } 5846 5847 return MatchOperand_NoMatch; 5848 } 5849 5850 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5851 uint8_t Val; 5852 StringRef Str = Parser.getTok().getString(); 5853 5854 auto Res = parseExpTgtImpl(Str, Val); 5855 if (Res != MatchOperand_Success) 5856 return Res; 5857 5858 SMLoc S = Parser.getTok().getLoc(); 5859 Parser.Lex(); 5860 5861 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5862 AMDGPUOperand::ImmTyExpTgt)); 5863 return MatchOperand_Success; 5864 } 5865 5866 //===----------------------------------------------------------------------===// 5867 // parser helpers 5868 //===----------------------------------------------------------------------===// 5869 5870 bool 5871 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5872 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5873 } 5874 5875 bool 5876 AMDGPUAsmParser::isId(const StringRef Id) const { 5877 return isId(getToken(), Id); 5878 } 5879 5880 bool 5881 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5882 return getTokenKind() == Kind; 5883 } 5884 5885 bool 5886 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5887 if (isId(Id)) { 5888 lex(); 5889 return true; 5890 } 5891 return false; 5892 } 5893 5894 bool 5895 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5896 if (isId(Id) && peekToken().is(Kind)) { 5897 lex(); 5898 lex(); 5899 return true; 5900 } 5901 return false; 5902 } 5903 5904 bool 5905 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5906 if (isToken(Kind)) { 5907 lex(); 5908 return true; 5909 } 5910 return false; 5911 } 5912 5913 bool 5914 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5915 const StringRef ErrMsg) { 5916 if (!trySkipToken(Kind)) { 5917 Error(getLoc(), ErrMsg); 5918 return false; 5919 } 5920 return true; 5921 } 5922 5923 bool 5924 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5925 return !getParser().parseAbsoluteExpression(Imm); 5926 } 5927 5928 bool 5929 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5930 SMLoc S = getLoc(); 5931 5932 const MCExpr *Expr; 5933 if (Parser.parseExpression(Expr)) 5934 return false; 5935 5936 int64_t IntVal; 5937 if (Expr->evaluateAsAbsolute(IntVal)) { 5938 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5939 } else { 5940 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5941 } 5942 return true; 5943 } 5944 5945 bool 5946 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5947 if (isToken(AsmToken::String)) { 5948 Val = getToken().getStringContents(); 5949 lex(); 5950 return true; 5951 } else { 5952 Error(getLoc(), ErrMsg); 5953 return false; 5954 } 5955 } 5956 5957 bool 5958 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5959 if (isToken(AsmToken::Identifier)) { 5960 Val = getTokenStr(); 5961 lex(); 5962 return true; 5963 } else { 5964 Error(getLoc(), ErrMsg); 5965 return false; 5966 } 5967 } 5968 5969 AsmToken 5970 AMDGPUAsmParser::getToken() const { 5971 return Parser.getTok(); 5972 } 5973 5974 AsmToken 5975 AMDGPUAsmParser::peekToken() { 5976 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 5977 } 5978 5979 void 5980 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5981 auto TokCount = getLexer().peekTokens(Tokens); 5982 5983 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5984 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5985 } 5986 5987 AsmToken::TokenKind 5988 AMDGPUAsmParser::getTokenKind() const { 5989 return getLexer().getKind(); 5990 } 5991 5992 SMLoc 5993 AMDGPUAsmParser::getLoc() const { 5994 return getToken().getLoc(); 5995 } 5996 5997 StringRef 5998 AMDGPUAsmParser::getTokenStr() const { 5999 return getToken().getString(); 6000 } 6001 6002 void 6003 AMDGPUAsmParser::lex() { 6004 Parser.Lex(); 6005 } 6006 6007 //===----------------------------------------------------------------------===// 6008 // swizzle 6009 //===----------------------------------------------------------------------===// 6010 6011 LLVM_READNONE 6012 static unsigned 6013 encodeBitmaskPerm(const unsigned AndMask, 6014 const unsigned OrMask, 6015 const unsigned XorMask) { 6016 using namespace llvm::AMDGPU::Swizzle; 6017 6018 return BITMASK_PERM_ENC | 6019 (AndMask << BITMASK_AND_SHIFT) | 6020 (OrMask << BITMASK_OR_SHIFT) | 6021 (XorMask << BITMASK_XOR_SHIFT); 6022 } 6023 6024 bool 6025 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6026 const unsigned MinVal, 6027 const unsigned MaxVal, 6028 const StringRef ErrMsg) { 6029 for (unsigned i = 0; i < OpNum; ++i) { 6030 if (!skipToken(AsmToken::Comma, "expected a comma")){ 6031 return false; 6032 } 6033 SMLoc ExprLoc = Parser.getTok().getLoc(); 6034 if (!parseExpr(Op[i])) { 6035 return false; 6036 } 6037 if (Op[i] < MinVal || Op[i] > MaxVal) { 6038 Error(ExprLoc, ErrMsg); 6039 return false; 6040 } 6041 } 6042 6043 return true; 6044 } 6045 6046 bool 6047 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6048 using namespace llvm::AMDGPU::Swizzle; 6049 6050 int64_t Lane[LANE_NUM]; 6051 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6052 "expected a 2-bit lane id")) { 6053 Imm = QUAD_PERM_ENC; 6054 for (unsigned I = 0; I < LANE_NUM; ++I) { 6055 Imm |= Lane[I] << (LANE_SHIFT * I); 6056 } 6057 return true; 6058 } 6059 return false; 6060 } 6061 6062 bool 6063 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6064 using namespace llvm::AMDGPU::Swizzle; 6065 6066 SMLoc S = Parser.getTok().getLoc(); 6067 int64_t GroupSize; 6068 int64_t LaneIdx; 6069 6070 if (!parseSwizzleOperands(1, &GroupSize, 6071 2, 32, 6072 "group size must be in the interval [2,32]")) { 6073 return false; 6074 } 6075 if (!isPowerOf2_64(GroupSize)) { 6076 Error(S, "group size must be a power of two"); 6077 return false; 6078 } 6079 if (parseSwizzleOperands(1, &LaneIdx, 6080 0, GroupSize - 1, 6081 "lane id must be in the interval [0,group size - 1]")) { 6082 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6083 return true; 6084 } 6085 return false; 6086 } 6087 6088 bool 6089 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6090 using namespace llvm::AMDGPU::Swizzle; 6091 6092 SMLoc S = Parser.getTok().getLoc(); 6093 int64_t GroupSize; 6094 6095 if (!parseSwizzleOperands(1, &GroupSize, 6096 2, 32, "group size must be in the interval [2,32]")) { 6097 return false; 6098 } 6099 if (!isPowerOf2_64(GroupSize)) { 6100 Error(S, "group size must be a power of two"); 6101 return false; 6102 } 6103 6104 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6105 return true; 6106 } 6107 6108 bool 6109 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6110 using namespace llvm::AMDGPU::Swizzle; 6111 6112 SMLoc S = Parser.getTok().getLoc(); 6113 int64_t GroupSize; 6114 6115 if (!parseSwizzleOperands(1, &GroupSize, 6116 1, 16, "group size must be in the interval [1,16]")) { 6117 return false; 6118 } 6119 if (!isPowerOf2_64(GroupSize)) { 6120 Error(S, "group size must be a power of two"); 6121 return false; 6122 } 6123 6124 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6125 return true; 6126 } 6127 6128 bool 6129 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6130 using namespace llvm::AMDGPU::Swizzle; 6131 6132 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6133 return false; 6134 } 6135 6136 StringRef Ctl; 6137 SMLoc StrLoc = Parser.getTok().getLoc(); 6138 if (!parseString(Ctl)) { 6139 return false; 6140 } 6141 if (Ctl.size() != BITMASK_WIDTH) { 6142 Error(StrLoc, "expected a 5-character mask"); 6143 return false; 6144 } 6145 6146 unsigned AndMask = 0; 6147 unsigned OrMask = 0; 6148 unsigned XorMask = 0; 6149 6150 for (size_t i = 0; i < Ctl.size(); ++i) { 6151 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6152 switch(Ctl[i]) { 6153 default: 6154 Error(StrLoc, "invalid mask"); 6155 return false; 6156 case '0': 6157 break; 6158 case '1': 6159 OrMask |= Mask; 6160 break; 6161 case 'p': 6162 AndMask |= Mask; 6163 break; 6164 case 'i': 6165 AndMask |= Mask; 6166 XorMask |= Mask; 6167 break; 6168 } 6169 } 6170 6171 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6172 return true; 6173 } 6174 6175 bool 6176 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6177 6178 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6179 6180 if (!parseExpr(Imm)) { 6181 return false; 6182 } 6183 if (!isUInt<16>(Imm)) { 6184 Error(OffsetLoc, "expected a 16-bit offset"); 6185 return false; 6186 } 6187 return true; 6188 } 6189 6190 bool 6191 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6192 using namespace llvm::AMDGPU::Swizzle; 6193 6194 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6195 6196 SMLoc ModeLoc = Parser.getTok().getLoc(); 6197 bool Ok = false; 6198 6199 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6200 Ok = parseSwizzleQuadPerm(Imm); 6201 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6202 Ok = parseSwizzleBitmaskPerm(Imm); 6203 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6204 Ok = parseSwizzleBroadcast(Imm); 6205 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6206 Ok = parseSwizzleSwap(Imm); 6207 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6208 Ok = parseSwizzleReverse(Imm); 6209 } else { 6210 Error(ModeLoc, "expected a swizzle mode"); 6211 } 6212 6213 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6214 } 6215 6216 return false; 6217 } 6218 6219 OperandMatchResultTy 6220 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6221 SMLoc S = Parser.getTok().getLoc(); 6222 int64_t Imm = 0; 6223 6224 if (trySkipId("offset")) { 6225 6226 bool Ok = false; 6227 if (skipToken(AsmToken::Colon, "expected a colon")) { 6228 if (trySkipId("swizzle")) { 6229 Ok = parseSwizzleMacro(Imm); 6230 } else { 6231 Ok = parseSwizzleOffset(Imm); 6232 } 6233 } 6234 6235 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6236 6237 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6238 } else { 6239 // Swizzle "offset" operand is optional. 6240 // If it is omitted, try parsing other optional operands. 6241 return parseOptionalOpr(Operands); 6242 } 6243 } 6244 6245 bool 6246 AMDGPUOperand::isSwizzle() const { 6247 return isImmTy(ImmTySwizzle); 6248 } 6249 6250 //===----------------------------------------------------------------------===// 6251 // VGPR Index Mode 6252 //===----------------------------------------------------------------------===// 6253 6254 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6255 6256 using namespace llvm::AMDGPU::VGPRIndexMode; 6257 6258 if (trySkipToken(AsmToken::RParen)) { 6259 return OFF; 6260 } 6261 6262 int64_t Imm = 0; 6263 6264 while (true) { 6265 unsigned Mode = 0; 6266 SMLoc S = Parser.getTok().getLoc(); 6267 6268 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6269 if (trySkipId(IdSymbolic[ModeId])) { 6270 Mode = 1 << ModeId; 6271 break; 6272 } 6273 } 6274 6275 if (Mode == 0) { 6276 Error(S, (Imm == 0)? 6277 "expected a VGPR index mode or a closing parenthesis" : 6278 "expected a VGPR index mode"); 6279 return UNDEF; 6280 } 6281 6282 if (Imm & Mode) { 6283 Error(S, "duplicate VGPR index mode"); 6284 return UNDEF; 6285 } 6286 Imm |= Mode; 6287 6288 if (trySkipToken(AsmToken::RParen)) 6289 break; 6290 if (!skipToken(AsmToken::Comma, 6291 "expected a comma or a closing parenthesis")) 6292 return UNDEF; 6293 } 6294 6295 return Imm; 6296 } 6297 6298 OperandMatchResultTy 6299 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6300 6301 using namespace llvm::AMDGPU::VGPRIndexMode; 6302 6303 int64_t Imm = 0; 6304 SMLoc S = Parser.getTok().getLoc(); 6305 6306 if (getLexer().getKind() == AsmToken::Identifier && 6307 Parser.getTok().getString() == "gpr_idx" && 6308 getLexer().peekTok().is(AsmToken::LParen)) { 6309 6310 Parser.Lex(); 6311 Parser.Lex(); 6312 6313 Imm = parseGPRIdxMacro(); 6314 if (Imm == UNDEF) 6315 return MatchOperand_ParseFail; 6316 6317 } else { 6318 if (getParser().parseAbsoluteExpression(Imm)) 6319 return MatchOperand_ParseFail; 6320 if (Imm < 0 || !isUInt<4>(Imm)) { 6321 Error(S, "invalid immediate: only 4-bit values are legal"); 6322 return MatchOperand_ParseFail; 6323 } 6324 } 6325 6326 Operands.push_back( 6327 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6328 return MatchOperand_Success; 6329 } 6330 6331 bool AMDGPUOperand::isGPRIdxMode() const { 6332 return isImmTy(ImmTyGprIdxMode); 6333 } 6334 6335 //===----------------------------------------------------------------------===// 6336 // sopp branch targets 6337 //===----------------------------------------------------------------------===// 6338 6339 OperandMatchResultTy 6340 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6341 6342 // Make sure we are not parsing something 6343 // that looks like a label or an expression but is not. 6344 // This will improve error messages. 6345 if (isRegister() || isModifier()) 6346 return MatchOperand_NoMatch; 6347 6348 if (!parseExpr(Operands)) 6349 return MatchOperand_ParseFail; 6350 6351 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6352 assert(Opr.isImm() || Opr.isExpr()); 6353 SMLoc Loc = Opr.getStartLoc(); 6354 6355 // Currently we do not support arbitrary expressions as branch targets. 6356 // Only labels and absolute expressions are accepted. 6357 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6358 Error(Loc, "expected an absolute expression or a label"); 6359 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6360 Error(Loc, "expected a 16-bit signed jump offset"); 6361 } 6362 6363 return MatchOperand_Success; 6364 } 6365 6366 //===----------------------------------------------------------------------===// 6367 // Boolean holding registers 6368 //===----------------------------------------------------------------------===// 6369 6370 OperandMatchResultTy 6371 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6372 return parseReg(Operands); 6373 } 6374 6375 //===----------------------------------------------------------------------===// 6376 // mubuf 6377 //===----------------------------------------------------------------------===// 6378 6379 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6380 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6381 } 6382 6383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6384 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6385 } 6386 6387 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6388 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6389 } 6390 6391 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6392 const OperandVector &Operands, 6393 bool IsAtomic, 6394 bool IsAtomicReturn, 6395 bool IsLds) { 6396 bool IsLdsOpcode = IsLds; 6397 bool HasLdsModifier = false; 6398 OptionalImmIndexMap OptionalIdx; 6399 assert(IsAtomicReturn ? IsAtomic : true); 6400 unsigned FirstOperandIdx = 1; 6401 6402 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6403 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6404 6405 // Add the register arguments 6406 if (Op.isReg()) { 6407 Op.addRegOperands(Inst, 1); 6408 // Insert a tied src for atomic return dst. 6409 // This cannot be postponed as subsequent calls to 6410 // addImmOperands rely on correct number of MC operands. 6411 if (IsAtomicReturn && i == FirstOperandIdx) 6412 Op.addRegOperands(Inst, 1); 6413 continue; 6414 } 6415 6416 // Handle the case where soffset is an immediate 6417 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6418 Op.addImmOperands(Inst, 1); 6419 continue; 6420 } 6421 6422 HasLdsModifier |= Op.isLDS(); 6423 6424 // Handle tokens like 'offen' which are sometimes hard-coded into the 6425 // asm string. There are no MCInst operands for these. 6426 if (Op.isToken()) { 6427 continue; 6428 } 6429 assert(Op.isImm()); 6430 6431 // Handle optional arguments 6432 OptionalIdx[Op.getImmTy()] = i; 6433 } 6434 6435 // This is a workaround for an llvm quirk which may result in an 6436 // incorrect instruction selection. Lds and non-lds versions of 6437 // MUBUF instructions are identical except that lds versions 6438 // have mandatory 'lds' modifier. However this modifier follows 6439 // optional modifiers and llvm asm matcher regards this 'lds' 6440 // modifier as an optional one. As a result, an lds version 6441 // of opcode may be selected even if it has no 'lds' modifier. 6442 if (IsLdsOpcode && !HasLdsModifier) { 6443 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6444 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6445 Inst.setOpcode(NoLdsOpcode); 6446 IsLdsOpcode = false; 6447 } 6448 } 6449 6450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6451 if (!IsAtomic) { // glc is hard-coded. 6452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6453 } 6454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6455 6456 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6458 } 6459 6460 if (isGFX10()) 6461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6462 } 6463 6464 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6465 OptionalImmIndexMap OptionalIdx; 6466 6467 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6468 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6469 6470 // Add the register arguments 6471 if (Op.isReg()) { 6472 Op.addRegOperands(Inst, 1); 6473 continue; 6474 } 6475 6476 // Handle the case where soffset is an immediate 6477 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6478 Op.addImmOperands(Inst, 1); 6479 continue; 6480 } 6481 6482 // Handle tokens like 'offen' which are sometimes hard-coded into the 6483 // asm string. There are no MCInst operands for these. 6484 if (Op.isToken()) { 6485 continue; 6486 } 6487 assert(Op.isImm()); 6488 6489 // Handle optional arguments 6490 OptionalIdx[Op.getImmTy()] = i; 6491 } 6492 6493 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6494 AMDGPUOperand::ImmTyOffset); 6495 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6497 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6498 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6499 6500 if (isGFX10()) 6501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6502 } 6503 6504 //===----------------------------------------------------------------------===// 6505 // mimg 6506 //===----------------------------------------------------------------------===// 6507 6508 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6509 bool IsAtomic) { 6510 unsigned I = 1; 6511 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6512 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6513 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6514 } 6515 6516 if (IsAtomic) { 6517 // Add src, same as dst 6518 assert(Desc.getNumDefs() == 1); 6519 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6520 } 6521 6522 OptionalImmIndexMap OptionalIdx; 6523 6524 for (unsigned E = Operands.size(); I != E; ++I) { 6525 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6526 6527 // Add the register arguments 6528 if (Op.isReg()) { 6529 Op.addRegOperands(Inst, 1); 6530 } else if (Op.isImmModifier()) { 6531 OptionalIdx[Op.getImmTy()] = I; 6532 } else if (!Op.isToken()) { 6533 llvm_unreachable("unexpected operand type"); 6534 } 6535 } 6536 6537 bool IsGFX10 = isGFX10(); 6538 6539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6540 if (IsGFX10) 6541 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6543 if (IsGFX10) 6544 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6546 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6548 if (IsGFX10) 6549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6552 if (!IsGFX10) 6553 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6554 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6555 } 6556 6557 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6558 cvtMIMG(Inst, Operands, true); 6559 } 6560 6561 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6562 const OperandVector &Operands) { 6563 for (unsigned I = 1; I < Operands.size(); ++I) { 6564 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6565 if (Operand.isReg()) 6566 Operand.addRegOperands(Inst, 1); 6567 } 6568 6569 Inst.addOperand(MCOperand::createImm(1)); // a16 6570 } 6571 6572 //===----------------------------------------------------------------------===// 6573 // smrd 6574 //===----------------------------------------------------------------------===// 6575 6576 bool AMDGPUOperand::isSMRDOffset8() const { 6577 return isImm() && isUInt<8>(getImm()); 6578 } 6579 6580 bool AMDGPUOperand::isSMEMOffset() const { 6581 return isImm(); // Offset range is checked later by validator. 6582 } 6583 6584 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6585 // 32-bit literals are only supported on CI and we only want to use them 6586 // when the offset is > 8-bits. 6587 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6588 } 6589 6590 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6591 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6592 } 6593 6594 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6595 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6596 } 6597 6598 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6599 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6600 } 6601 6602 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6603 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6604 } 6605 6606 //===----------------------------------------------------------------------===// 6607 // vop3 6608 //===----------------------------------------------------------------------===// 6609 6610 static bool ConvertOmodMul(int64_t &Mul) { 6611 if (Mul != 1 && Mul != 2 && Mul != 4) 6612 return false; 6613 6614 Mul >>= 1; 6615 return true; 6616 } 6617 6618 static bool ConvertOmodDiv(int64_t &Div) { 6619 if (Div == 1) { 6620 Div = 0; 6621 return true; 6622 } 6623 6624 if (Div == 2) { 6625 Div = 3; 6626 return true; 6627 } 6628 6629 return false; 6630 } 6631 6632 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6633 if (BoundCtrl == 0) { 6634 BoundCtrl = 1; 6635 return true; 6636 } 6637 6638 if (BoundCtrl == -1) { 6639 BoundCtrl = 0; 6640 return true; 6641 } 6642 6643 return false; 6644 } 6645 6646 // Note: the order in this table matches the order of operands in AsmString. 6647 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6648 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6649 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6650 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6651 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6652 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6653 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6654 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6655 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6656 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6657 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6658 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6659 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6660 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6661 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6662 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6663 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6664 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6665 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6666 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6667 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6668 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6669 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6670 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6671 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6672 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6673 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6674 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6675 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6676 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6677 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6678 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6679 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6680 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6681 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6682 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6683 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6684 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6685 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6686 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6687 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6688 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6689 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6690 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6691 }; 6692 6693 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6694 6695 OperandMatchResultTy res = parseOptionalOpr(Operands); 6696 6697 // This is a hack to enable hardcoded mandatory operands which follow 6698 // optional operands. 6699 // 6700 // Current design assumes that all operands after the first optional operand 6701 // are also optional. However implementation of some instructions violates 6702 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6703 // 6704 // To alleviate this problem, we have to (implicitly) parse extra operands 6705 // to make sure autogenerated parser of custom operands never hit hardcoded 6706 // mandatory operands. 6707 6708 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6709 if (res != MatchOperand_Success || 6710 isToken(AsmToken::EndOfStatement)) 6711 break; 6712 6713 trySkipToken(AsmToken::Comma); 6714 res = parseOptionalOpr(Operands); 6715 } 6716 6717 return res; 6718 } 6719 6720 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6721 OperandMatchResultTy res; 6722 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6723 // try to parse any optional operand here 6724 if (Op.IsBit) { 6725 res = parseNamedBit(Op.Name, Operands, Op.Type); 6726 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6727 res = parseOModOperand(Operands); 6728 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6729 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6730 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6731 res = parseSDWASel(Operands, Op.Name, Op.Type); 6732 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6733 res = parseSDWADstUnused(Operands); 6734 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6735 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6736 Op.Type == AMDGPUOperand::ImmTyNegLo || 6737 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6738 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6739 Op.ConvertResult); 6740 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6741 res = parseDim(Operands); 6742 } else { 6743 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6744 } 6745 if (res != MatchOperand_NoMatch) { 6746 return res; 6747 } 6748 } 6749 return MatchOperand_NoMatch; 6750 } 6751 6752 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6753 StringRef Name = Parser.getTok().getString(); 6754 if (Name == "mul") { 6755 return parseIntWithPrefix("mul", Operands, 6756 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6757 } 6758 6759 if (Name == "div") { 6760 return parseIntWithPrefix("div", Operands, 6761 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6762 } 6763 6764 return MatchOperand_NoMatch; 6765 } 6766 6767 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6768 cvtVOP3P(Inst, Operands); 6769 6770 int Opc = Inst.getOpcode(); 6771 6772 int SrcNum; 6773 const int Ops[] = { AMDGPU::OpName::src0, 6774 AMDGPU::OpName::src1, 6775 AMDGPU::OpName::src2 }; 6776 for (SrcNum = 0; 6777 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6778 ++SrcNum); 6779 assert(SrcNum > 0); 6780 6781 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6782 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6783 6784 if ((OpSel & (1 << SrcNum)) != 0) { 6785 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6786 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6787 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6788 } 6789 } 6790 6791 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6792 // 1. This operand is input modifiers 6793 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6794 // 2. This is not last operand 6795 && Desc.NumOperands > (OpNum + 1) 6796 // 3. Next operand is register class 6797 && Desc.OpInfo[OpNum + 1].RegClass != -1 6798 // 4. Next register is not tied to any other operand 6799 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6800 } 6801 6802 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6803 { 6804 OptionalImmIndexMap OptionalIdx; 6805 unsigned Opc = Inst.getOpcode(); 6806 6807 unsigned I = 1; 6808 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6809 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6810 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6811 } 6812 6813 for (unsigned E = Operands.size(); I != E; ++I) { 6814 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6815 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6816 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6817 } else if (Op.isInterpSlot() || 6818 Op.isInterpAttr() || 6819 Op.isAttrChan()) { 6820 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6821 } else if (Op.isImmModifier()) { 6822 OptionalIdx[Op.getImmTy()] = I; 6823 } else { 6824 llvm_unreachable("unhandled operand type"); 6825 } 6826 } 6827 6828 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6829 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6830 } 6831 6832 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6834 } 6835 6836 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6838 } 6839 } 6840 6841 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6842 OptionalImmIndexMap &OptionalIdx) { 6843 unsigned Opc = Inst.getOpcode(); 6844 6845 unsigned I = 1; 6846 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6847 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6848 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6849 } 6850 6851 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6852 // This instruction has src modifiers 6853 for (unsigned E = Operands.size(); I != E; ++I) { 6854 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6855 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6856 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6857 } else if (Op.isImmModifier()) { 6858 OptionalIdx[Op.getImmTy()] = I; 6859 } else if (Op.isRegOrImm()) { 6860 Op.addRegOrImmOperands(Inst, 1); 6861 } else { 6862 llvm_unreachable("unhandled operand type"); 6863 } 6864 } 6865 } else { 6866 // No src modifiers 6867 for (unsigned E = Operands.size(); I != E; ++I) { 6868 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6869 if (Op.isMod()) { 6870 OptionalIdx[Op.getImmTy()] = I; 6871 } else { 6872 Op.addRegOrImmOperands(Inst, 1); 6873 } 6874 } 6875 } 6876 6877 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6878 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6879 } 6880 6881 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6882 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6883 } 6884 6885 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6886 // it has src2 register operand that is tied to dst operand 6887 // we don't allow modifiers for this operand in assembler so src2_modifiers 6888 // should be 0. 6889 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6890 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6891 Opc == AMDGPU::V_MAC_F32_e64_vi || 6892 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 6893 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 6894 Opc == AMDGPU::V_MAC_F16_e64_vi || 6895 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6896 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6897 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 6898 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6899 auto it = Inst.begin(); 6900 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6901 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6902 ++it; 6903 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6904 } 6905 } 6906 6907 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6908 OptionalImmIndexMap OptionalIdx; 6909 cvtVOP3(Inst, Operands, OptionalIdx); 6910 } 6911 6912 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6913 const OperandVector &Operands) { 6914 OptionalImmIndexMap OptIdx; 6915 const int Opc = Inst.getOpcode(); 6916 const MCInstrDesc &Desc = MII.get(Opc); 6917 6918 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6919 6920 cvtVOP3(Inst, Operands, OptIdx); 6921 6922 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6923 assert(!IsPacked); 6924 Inst.addOperand(Inst.getOperand(0)); 6925 } 6926 6927 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6928 // instruction, and then figure out where to actually put the modifiers 6929 6930 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6931 6932 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6933 if (OpSelHiIdx != -1) { 6934 int DefaultVal = IsPacked ? -1 : 0; 6935 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6936 DefaultVal); 6937 } 6938 6939 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6940 if (NegLoIdx != -1) { 6941 assert(IsPacked); 6942 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6943 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6944 } 6945 6946 const int Ops[] = { AMDGPU::OpName::src0, 6947 AMDGPU::OpName::src1, 6948 AMDGPU::OpName::src2 }; 6949 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6950 AMDGPU::OpName::src1_modifiers, 6951 AMDGPU::OpName::src2_modifiers }; 6952 6953 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6954 6955 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6956 unsigned OpSelHi = 0; 6957 unsigned NegLo = 0; 6958 unsigned NegHi = 0; 6959 6960 if (OpSelHiIdx != -1) { 6961 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6962 } 6963 6964 if (NegLoIdx != -1) { 6965 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6966 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6967 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6968 } 6969 6970 for (int J = 0; J < 3; ++J) { 6971 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6972 if (OpIdx == -1) 6973 break; 6974 6975 uint32_t ModVal = 0; 6976 6977 if ((OpSel & (1 << J)) != 0) 6978 ModVal |= SISrcMods::OP_SEL_0; 6979 6980 if ((OpSelHi & (1 << J)) != 0) 6981 ModVal |= SISrcMods::OP_SEL_1; 6982 6983 if ((NegLo & (1 << J)) != 0) 6984 ModVal |= SISrcMods::NEG; 6985 6986 if ((NegHi & (1 << J)) != 0) 6987 ModVal |= SISrcMods::NEG_HI; 6988 6989 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6990 6991 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6992 } 6993 } 6994 6995 //===----------------------------------------------------------------------===// 6996 // dpp 6997 //===----------------------------------------------------------------------===// 6998 6999 bool AMDGPUOperand::isDPP8() const { 7000 return isImmTy(ImmTyDPP8); 7001 } 7002 7003 bool AMDGPUOperand::isDPPCtrl() const { 7004 using namespace AMDGPU::DPP; 7005 7006 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7007 if (result) { 7008 int64_t Imm = getImm(); 7009 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7010 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7011 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7012 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7013 (Imm == DppCtrl::WAVE_SHL1) || 7014 (Imm == DppCtrl::WAVE_ROL1) || 7015 (Imm == DppCtrl::WAVE_SHR1) || 7016 (Imm == DppCtrl::WAVE_ROR1) || 7017 (Imm == DppCtrl::ROW_MIRROR) || 7018 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7019 (Imm == DppCtrl::BCAST15) || 7020 (Imm == DppCtrl::BCAST31) || 7021 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7022 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7023 } 7024 return false; 7025 } 7026 7027 //===----------------------------------------------------------------------===// 7028 // mAI 7029 //===----------------------------------------------------------------------===// 7030 7031 bool AMDGPUOperand::isBLGP() const { 7032 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7033 } 7034 7035 bool AMDGPUOperand::isCBSZ() const { 7036 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7037 } 7038 7039 bool AMDGPUOperand::isABID() const { 7040 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7041 } 7042 7043 bool AMDGPUOperand::isS16Imm() const { 7044 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7045 } 7046 7047 bool AMDGPUOperand::isU16Imm() const { 7048 return isImm() && isUInt<16>(getImm()); 7049 } 7050 7051 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7052 if (!isGFX10()) 7053 return MatchOperand_NoMatch; 7054 7055 SMLoc S = Parser.getTok().getLoc(); 7056 7057 if (getLexer().isNot(AsmToken::Identifier)) 7058 return MatchOperand_NoMatch; 7059 if (getLexer().getTok().getString() != "dim") 7060 return MatchOperand_NoMatch; 7061 7062 Parser.Lex(); 7063 if (getLexer().isNot(AsmToken::Colon)) 7064 return MatchOperand_ParseFail; 7065 7066 Parser.Lex(); 7067 7068 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7069 // integer. 7070 std::string Token; 7071 if (getLexer().is(AsmToken::Integer)) { 7072 SMLoc Loc = getLexer().getTok().getEndLoc(); 7073 Token = std::string(getLexer().getTok().getString()); 7074 Parser.Lex(); 7075 if (getLexer().getTok().getLoc() != Loc) 7076 return MatchOperand_ParseFail; 7077 } 7078 if (getLexer().isNot(AsmToken::Identifier)) 7079 return MatchOperand_ParseFail; 7080 Token += getLexer().getTok().getString(); 7081 7082 StringRef DimId = Token; 7083 if (DimId.startswith("SQ_RSRC_IMG_")) 7084 DimId = DimId.substr(12); 7085 7086 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7087 if (!DimInfo) 7088 return MatchOperand_ParseFail; 7089 7090 Parser.Lex(); 7091 7092 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7093 AMDGPUOperand::ImmTyDim)); 7094 return MatchOperand_Success; 7095 } 7096 7097 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7098 SMLoc S = Parser.getTok().getLoc(); 7099 StringRef Prefix; 7100 7101 if (getLexer().getKind() == AsmToken::Identifier) { 7102 Prefix = Parser.getTok().getString(); 7103 } else { 7104 return MatchOperand_NoMatch; 7105 } 7106 7107 if (Prefix != "dpp8") 7108 return parseDPPCtrl(Operands); 7109 if (!isGFX10()) 7110 return MatchOperand_NoMatch; 7111 7112 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7113 7114 int64_t Sels[8]; 7115 7116 Parser.Lex(); 7117 if (getLexer().isNot(AsmToken::Colon)) 7118 return MatchOperand_ParseFail; 7119 7120 Parser.Lex(); 7121 if (getLexer().isNot(AsmToken::LBrac)) 7122 return MatchOperand_ParseFail; 7123 7124 Parser.Lex(); 7125 if (getParser().parseAbsoluteExpression(Sels[0])) 7126 return MatchOperand_ParseFail; 7127 if (0 > Sels[0] || 7 < Sels[0]) 7128 return MatchOperand_ParseFail; 7129 7130 for (size_t i = 1; i < 8; ++i) { 7131 if (getLexer().isNot(AsmToken::Comma)) 7132 return MatchOperand_ParseFail; 7133 7134 Parser.Lex(); 7135 if (getParser().parseAbsoluteExpression(Sels[i])) 7136 return MatchOperand_ParseFail; 7137 if (0 > Sels[i] || 7 < Sels[i]) 7138 return MatchOperand_ParseFail; 7139 } 7140 7141 if (getLexer().isNot(AsmToken::RBrac)) 7142 return MatchOperand_ParseFail; 7143 Parser.Lex(); 7144 7145 unsigned DPP8 = 0; 7146 for (size_t i = 0; i < 8; ++i) 7147 DPP8 |= (Sels[i] << (i * 3)); 7148 7149 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7150 return MatchOperand_Success; 7151 } 7152 7153 OperandMatchResultTy 7154 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7155 using namespace AMDGPU::DPP; 7156 7157 SMLoc S = Parser.getTok().getLoc(); 7158 StringRef Prefix; 7159 int64_t Int; 7160 7161 if (getLexer().getKind() == AsmToken::Identifier) { 7162 Prefix = Parser.getTok().getString(); 7163 } else { 7164 return MatchOperand_NoMatch; 7165 } 7166 7167 if (Prefix == "row_mirror") { 7168 Int = DppCtrl::ROW_MIRROR; 7169 Parser.Lex(); 7170 } else if (Prefix == "row_half_mirror") { 7171 Int = DppCtrl::ROW_HALF_MIRROR; 7172 Parser.Lex(); 7173 } else { 7174 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7175 if (Prefix != "quad_perm" 7176 && Prefix != "row_shl" 7177 && Prefix != "row_shr" 7178 && Prefix != "row_ror" 7179 && Prefix != "wave_shl" 7180 && Prefix != "wave_rol" 7181 && Prefix != "wave_shr" 7182 && Prefix != "wave_ror" 7183 && Prefix != "row_bcast" 7184 && Prefix != "row_share" 7185 && Prefix != "row_xmask") { 7186 return MatchOperand_NoMatch; 7187 } 7188 7189 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7190 return MatchOperand_NoMatch; 7191 7192 if (!isVI() && !isGFX9() && 7193 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7194 Prefix == "wave_rol" || Prefix == "wave_ror" || 7195 Prefix == "row_bcast")) 7196 return MatchOperand_NoMatch; 7197 7198 Parser.Lex(); 7199 if (getLexer().isNot(AsmToken::Colon)) 7200 return MatchOperand_ParseFail; 7201 7202 if (Prefix == "quad_perm") { 7203 // quad_perm:[%d,%d,%d,%d] 7204 Parser.Lex(); 7205 if (getLexer().isNot(AsmToken::LBrac)) 7206 return MatchOperand_ParseFail; 7207 Parser.Lex(); 7208 7209 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7210 return MatchOperand_ParseFail; 7211 7212 for (int i = 0; i < 3; ++i) { 7213 if (getLexer().isNot(AsmToken::Comma)) 7214 return MatchOperand_ParseFail; 7215 Parser.Lex(); 7216 7217 int64_t Temp; 7218 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7219 return MatchOperand_ParseFail; 7220 const int shift = i*2 + 2; 7221 Int += (Temp << shift); 7222 } 7223 7224 if (getLexer().isNot(AsmToken::RBrac)) 7225 return MatchOperand_ParseFail; 7226 Parser.Lex(); 7227 } else { 7228 // sel:%d 7229 Parser.Lex(); 7230 if (getParser().parseAbsoluteExpression(Int)) 7231 return MatchOperand_ParseFail; 7232 7233 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7234 Int |= DppCtrl::ROW_SHL0; 7235 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7236 Int |= DppCtrl::ROW_SHR0; 7237 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7238 Int |= DppCtrl::ROW_ROR0; 7239 } else if (Prefix == "wave_shl" && 1 == Int) { 7240 Int = DppCtrl::WAVE_SHL1; 7241 } else if (Prefix == "wave_rol" && 1 == Int) { 7242 Int = DppCtrl::WAVE_ROL1; 7243 } else if (Prefix == "wave_shr" && 1 == Int) { 7244 Int = DppCtrl::WAVE_SHR1; 7245 } else if (Prefix == "wave_ror" && 1 == Int) { 7246 Int = DppCtrl::WAVE_ROR1; 7247 } else if (Prefix == "row_bcast") { 7248 if (Int == 15) { 7249 Int = DppCtrl::BCAST15; 7250 } else if (Int == 31) { 7251 Int = DppCtrl::BCAST31; 7252 } else { 7253 return MatchOperand_ParseFail; 7254 } 7255 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7256 Int |= DppCtrl::ROW_SHARE_FIRST; 7257 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7258 Int |= DppCtrl::ROW_XMASK_FIRST; 7259 } else { 7260 return MatchOperand_ParseFail; 7261 } 7262 } 7263 } 7264 7265 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7266 return MatchOperand_Success; 7267 } 7268 7269 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7270 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7271 } 7272 7273 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7274 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7275 } 7276 7277 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7278 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7279 } 7280 7281 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7282 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7283 } 7284 7285 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7286 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7287 } 7288 7289 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7290 OptionalImmIndexMap OptionalIdx; 7291 7292 unsigned I = 1; 7293 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7294 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7295 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7296 } 7297 7298 int Fi = 0; 7299 for (unsigned E = Operands.size(); I != E; ++I) { 7300 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7301 MCOI::TIED_TO); 7302 if (TiedTo != -1) { 7303 assert((unsigned)TiedTo < Inst.getNumOperands()); 7304 // handle tied old or src2 for MAC instructions 7305 Inst.addOperand(Inst.getOperand(TiedTo)); 7306 } 7307 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7308 // Add the register arguments 7309 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7310 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7311 // Skip it. 7312 continue; 7313 } 7314 7315 if (IsDPP8) { 7316 if (Op.isDPP8()) { 7317 Op.addImmOperands(Inst, 1); 7318 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7319 Op.addRegWithFPInputModsOperands(Inst, 2); 7320 } else if (Op.isFI()) { 7321 Fi = Op.getImm(); 7322 } else if (Op.isReg()) { 7323 Op.addRegOperands(Inst, 1); 7324 } else { 7325 llvm_unreachable("Invalid operand type"); 7326 } 7327 } else { 7328 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7329 Op.addRegWithFPInputModsOperands(Inst, 2); 7330 } else if (Op.isDPPCtrl()) { 7331 Op.addImmOperands(Inst, 1); 7332 } else if (Op.isImm()) { 7333 // Handle optional arguments 7334 OptionalIdx[Op.getImmTy()] = I; 7335 } else { 7336 llvm_unreachable("Invalid operand type"); 7337 } 7338 } 7339 } 7340 7341 if (IsDPP8) { 7342 using namespace llvm::AMDGPU::DPP; 7343 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7344 } else { 7345 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7346 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7347 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7348 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7349 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7350 } 7351 } 7352 } 7353 7354 //===----------------------------------------------------------------------===// 7355 // sdwa 7356 //===----------------------------------------------------------------------===// 7357 7358 OperandMatchResultTy 7359 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7360 AMDGPUOperand::ImmTy Type) { 7361 using namespace llvm::AMDGPU::SDWA; 7362 7363 SMLoc S = Parser.getTok().getLoc(); 7364 StringRef Value; 7365 OperandMatchResultTy res; 7366 7367 res = parseStringWithPrefix(Prefix, Value); 7368 if (res != MatchOperand_Success) { 7369 return res; 7370 } 7371 7372 int64_t Int; 7373 Int = StringSwitch<int64_t>(Value) 7374 .Case("BYTE_0", SdwaSel::BYTE_0) 7375 .Case("BYTE_1", SdwaSel::BYTE_1) 7376 .Case("BYTE_2", SdwaSel::BYTE_2) 7377 .Case("BYTE_3", SdwaSel::BYTE_3) 7378 .Case("WORD_0", SdwaSel::WORD_0) 7379 .Case("WORD_1", SdwaSel::WORD_1) 7380 .Case("DWORD", SdwaSel::DWORD) 7381 .Default(0xffffffff); 7382 Parser.Lex(); // eat last token 7383 7384 if (Int == 0xffffffff) { 7385 return MatchOperand_ParseFail; 7386 } 7387 7388 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7389 return MatchOperand_Success; 7390 } 7391 7392 OperandMatchResultTy 7393 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7394 using namespace llvm::AMDGPU::SDWA; 7395 7396 SMLoc S = Parser.getTok().getLoc(); 7397 StringRef Value; 7398 OperandMatchResultTy res; 7399 7400 res = parseStringWithPrefix("dst_unused", Value); 7401 if (res != MatchOperand_Success) { 7402 return res; 7403 } 7404 7405 int64_t Int; 7406 Int = StringSwitch<int64_t>(Value) 7407 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7408 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7409 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7410 .Default(0xffffffff); 7411 Parser.Lex(); // eat last token 7412 7413 if (Int == 0xffffffff) { 7414 return MatchOperand_ParseFail; 7415 } 7416 7417 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7418 return MatchOperand_Success; 7419 } 7420 7421 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7422 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7423 } 7424 7425 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7426 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7427 } 7428 7429 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7430 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7431 } 7432 7433 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7434 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7435 } 7436 7437 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7438 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7439 } 7440 7441 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7442 uint64_t BasicInstType, 7443 bool SkipDstVcc, 7444 bool SkipSrcVcc) { 7445 using namespace llvm::AMDGPU::SDWA; 7446 7447 OptionalImmIndexMap OptionalIdx; 7448 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7449 bool SkippedVcc = false; 7450 7451 unsigned I = 1; 7452 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7453 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7454 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7455 } 7456 7457 for (unsigned E = Operands.size(); I != E; ++I) { 7458 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7459 if (SkipVcc && !SkippedVcc && Op.isReg() && 7460 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7461 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7462 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7463 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7464 // Skip VCC only if we didn't skip it on previous iteration. 7465 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7466 if (BasicInstType == SIInstrFlags::VOP2 && 7467 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7468 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7469 SkippedVcc = true; 7470 continue; 7471 } else if (BasicInstType == SIInstrFlags::VOPC && 7472 Inst.getNumOperands() == 0) { 7473 SkippedVcc = true; 7474 continue; 7475 } 7476 } 7477 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7478 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7479 } else if (Op.isImm()) { 7480 // Handle optional arguments 7481 OptionalIdx[Op.getImmTy()] = I; 7482 } else { 7483 llvm_unreachable("Invalid operand type"); 7484 } 7485 SkippedVcc = false; 7486 } 7487 7488 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7489 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7490 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7491 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7492 switch (BasicInstType) { 7493 case SIInstrFlags::VOP1: 7494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7495 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7497 } 7498 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7501 break; 7502 7503 case SIInstrFlags::VOP2: 7504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7505 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7507 } 7508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7509 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7510 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7511 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7512 break; 7513 7514 case SIInstrFlags::VOPC: 7515 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7517 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7519 break; 7520 7521 default: 7522 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7523 } 7524 } 7525 7526 // special case v_mac_{f16, f32}: 7527 // it has src2 register operand that is tied to dst operand 7528 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7529 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7530 auto it = Inst.begin(); 7531 std::advance( 7532 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7533 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7534 } 7535 } 7536 7537 //===----------------------------------------------------------------------===// 7538 // mAI 7539 //===----------------------------------------------------------------------===// 7540 7541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7542 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7543 } 7544 7545 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7546 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7547 } 7548 7549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7550 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7551 } 7552 7553 /// Force static initialization. 7554 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7555 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7556 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7557 } 7558 7559 #define GET_REGISTER_MATCHER 7560 #define GET_MATCHER_IMPLEMENTATION 7561 #define GET_MNEMONIC_SPELL_CHECKER 7562 #define GET_MNEMONIC_CHECKER 7563 #include "AMDGPUGenAsmMatcher.inc" 7564 7565 // This fuction should be defined after auto-generated include so that we have 7566 // MatchClassKind enum defined 7567 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7568 unsigned Kind) { 7569 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7570 // But MatchInstructionImpl() expects to meet token and fails to validate 7571 // operand. This method checks if we are given immediate operand but expect to 7572 // get corresponding token. 7573 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7574 switch (Kind) { 7575 case MCK_addr64: 7576 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7577 case MCK_gds: 7578 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7579 case MCK_lds: 7580 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7581 case MCK_glc: 7582 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7583 case MCK_idxen: 7584 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7585 case MCK_offen: 7586 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7587 case MCK_SSrcB32: 7588 // When operands have expression values, they will return true for isToken, 7589 // because it is not possible to distinguish between a token and an 7590 // expression at parse time. MatchInstructionImpl() will always try to 7591 // match an operand as a token, when isToken returns true, and when the 7592 // name of the expression is not a valid token, the match will fail, 7593 // so we need to handle it here. 7594 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7595 case MCK_SSrcF32: 7596 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7597 case MCK_SoppBrTarget: 7598 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7599 case MCK_VReg32OrOff: 7600 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7601 case MCK_InterpSlot: 7602 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7603 case MCK_Attr: 7604 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7605 case MCK_AttrChan: 7606 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7607 case MCK_ImmSMEMOffset: 7608 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7609 case MCK_SReg_64: 7610 case MCK_SReg_64_XEXEC: 7611 // Null is defined as a 32-bit register but 7612 // it should also be enabled with 64-bit operands. 7613 // The following code enables it for SReg_64 operands 7614 // used as source and destination. Remaining source 7615 // operands are handled in isInlinableImm. 7616 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7617 default: 7618 return Match_InvalidOperand; 7619 } 7620 } 7621 7622 //===----------------------------------------------------------------------===// 7623 // endpgm 7624 //===----------------------------------------------------------------------===// 7625 7626 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7627 SMLoc S = Parser.getTok().getLoc(); 7628 int64_t Imm = 0; 7629 7630 if (!parseExpr(Imm)) { 7631 // The operand is optional, if not present default to 0 7632 Imm = 0; 7633 } 7634 7635 if (!isUInt<16>(Imm)) { 7636 Error(S, "expected a 16-bit value"); 7637 return MatchOperand_ParseFail; 7638 } 7639 7640 Operands.push_back( 7641 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7642 return MatchOperand_Success; 7643 } 7644 7645 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7646