1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth, 1092 SMLoc Loc); 1093 1094 bool isRegister(); 1095 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1096 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1097 void initializeGprCountSymbol(RegisterKind RegKind); 1098 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1099 unsigned RegWidth); 1100 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1101 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1102 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1103 bool IsGdsHardcoded); 1104 1105 public: 1106 enum AMDGPUMatchResultTy { 1107 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1108 }; 1109 enum OperandMode { 1110 OperandMode_Default, 1111 OperandMode_NSA, 1112 }; 1113 1114 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1115 1116 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1117 const MCInstrInfo &MII, 1118 const MCTargetOptions &Options) 1119 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1120 MCAsmParserExtension::Initialize(Parser); 1121 1122 if (getFeatureBits().none()) { 1123 // Set default features. 1124 copySTI().ToggleFeature("southern-islands"); 1125 } 1126 1127 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1128 1129 { 1130 // TODO: make those pre-defined variables read-only. 1131 // Currently there is none suitable machinery in the core llvm-mc for this. 1132 // MCSymbol::isRedefinable is intended for another purpose, and 1133 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1135 MCContext &Ctx = getContext(); 1136 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1137 MCSymbol *Sym = 1138 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1144 } else { 1145 MCSymbol *Sym = 1146 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1147 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1148 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1149 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1150 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1151 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1152 } 1153 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1154 initializeGprCountSymbol(IS_VGPR); 1155 initializeGprCountSymbol(IS_SGPR); 1156 } else 1157 KernelScope.initialize(getContext()); 1158 } 1159 } 1160 1161 bool hasXNACK() const { 1162 return AMDGPU::hasXNACK(getSTI()); 1163 } 1164 1165 bool hasMIMG_R128() const { 1166 return AMDGPU::hasMIMG_R128(getSTI()); 1167 } 1168 1169 bool hasPackedD16() const { 1170 return AMDGPU::hasPackedD16(getSTI()); 1171 } 1172 1173 bool hasGFX10A16() const { 1174 return AMDGPU::hasGFX10A16(getSTI()); 1175 } 1176 1177 bool isSI() const { 1178 return AMDGPU::isSI(getSTI()); 1179 } 1180 1181 bool isCI() const { 1182 return AMDGPU::isCI(getSTI()); 1183 } 1184 1185 bool isVI() const { 1186 return AMDGPU::isVI(getSTI()); 1187 } 1188 1189 bool isGFX9() const { 1190 return AMDGPU::isGFX9(getSTI()); 1191 } 1192 1193 bool isGFX9Plus() const { 1194 return AMDGPU::isGFX9Plus(getSTI()); 1195 } 1196 1197 bool isGFX10() const { 1198 return AMDGPU::isGFX10(getSTI()); 1199 } 1200 1201 bool isGFX10_BEncoding() const { 1202 return AMDGPU::isGFX10_BEncoding(getSTI()); 1203 } 1204 1205 bool hasInv2PiInlineImm() const { 1206 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1207 } 1208 1209 bool hasFlatOffsets() const { 1210 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1211 } 1212 1213 bool hasSGPR102_SGPR103() const { 1214 return !isVI() && !isGFX9(); 1215 } 1216 1217 bool hasSGPR104_SGPR105() const { 1218 return isGFX10(); 1219 } 1220 1221 bool hasIntClamp() const { 1222 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1223 } 1224 1225 AMDGPUTargetStreamer &getTargetStreamer() { 1226 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1227 return static_cast<AMDGPUTargetStreamer &>(TS); 1228 } 1229 1230 const MCRegisterInfo *getMRI() const { 1231 // We need this const_cast because for some reason getContext() is not const 1232 // in MCAsmParser. 1233 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1234 } 1235 1236 const MCInstrInfo *getMII() const { 1237 return &MII; 1238 } 1239 1240 const FeatureBitset &getFeatureBits() const { 1241 return getSTI().getFeatureBits(); 1242 } 1243 1244 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1245 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1246 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1247 1248 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1249 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1250 bool isForcedDPP() const { return ForcedDPP; } 1251 bool isForcedSDWA() const { return ForcedSDWA; } 1252 ArrayRef<unsigned> getMatchedVariants() const; 1253 StringRef getMatchedVariantName() const; 1254 1255 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1256 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1257 bool RestoreOnFailure); 1258 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1259 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1260 SMLoc &EndLoc) override; 1261 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1262 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1263 unsigned Kind) override; 1264 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1265 OperandVector &Operands, MCStreamer &Out, 1266 uint64_t &ErrorInfo, 1267 bool MatchingInlineAsm) override; 1268 bool ParseDirective(AsmToken DirectiveID) override; 1269 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1270 OperandMode Mode = OperandMode_Default); 1271 StringRef parseMnemonicSuffix(StringRef Name); 1272 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1273 SMLoc NameLoc, OperandVector &Operands) override; 1274 //bool ProcessInstruction(MCInst &Inst); 1275 1276 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1277 1278 OperandMatchResultTy 1279 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1281 bool (*ConvertResult)(int64_t &) = nullptr); 1282 1283 OperandMatchResultTy 1284 parseOperandArrayWithPrefix(const char *Prefix, 1285 OperandVector &Operands, 1286 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1287 bool (*ConvertResult)(int64_t&) = nullptr); 1288 1289 OperandMatchResultTy 1290 parseNamedBit(const char *Name, OperandVector &Operands, 1291 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1292 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1293 StringRef &Value); 1294 1295 bool isModifier(); 1296 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1297 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1298 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1299 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1300 bool parseSP3NegModifier(); 1301 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1302 OperandMatchResultTy parseReg(OperandVector &Operands); 1303 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1304 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1305 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1306 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1307 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1308 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1309 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1310 OperandMatchResultTy parseUfmt(int64_t &Format); 1311 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1312 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1313 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1314 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1315 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1316 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1317 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1318 1319 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1320 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1321 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1322 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1323 1324 bool parseCnt(int64_t &IntVal); 1325 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1326 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1327 1328 private: 1329 struct OperandInfoTy { 1330 int64_t Id; 1331 bool IsSymbolic = false; 1332 bool IsDefined = false; 1333 1334 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1335 }; 1336 1337 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1338 bool validateSendMsg(const OperandInfoTy &Msg, 1339 const OperandInfoTy &Op, 1340 const OperandInfoTy &Stream, 1341 const SMLoc Loc); 1342 1343 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1344 bool validateHwreg(const OperandInfoTy &HwReg, 1345 const int64_t Offset, 1346 const int64_t Width, 1347 const SMLoc Loc); 1348 1349 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1350 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1351 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1352 1353 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1354 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1355 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1356 bool validateSOPLiteral(const MCInst &Inst) const; 1357 bool validateConstantBusLimitations(const MCInst &Inst); 1358 bool validateEarlyClobberLimitations(const MCInst &Inst); 1359 bool validateIntClampSupported(const MCInst &Inst); 1360 bool validateMIMGAtomicDMask(const MCInst &Inst); 1361 bool validateMIMGGatherDMask(const MCInst &Inst); 1362 bool validateMovrels(const MCInst &Inst); 1363 bool validateMIMGDataSize(const MCInst &Inst); 1364 bool validateMIMGAddrSize(const MCInst &Inst); 1365 bool validateMIMGD16(const MCInst &Inst); 1366 bool validateMIMGDim(const MCInst &Inst); 1367 bool validateLdsDirect(const MCInst &Inst); 1368 bool validateOpSel(const MCInst &Inst); 1369 bool validateVccOperand(unsigned Reg) const; 1370 bool validateVOP3Literal(const MCInst &Inst) const; 1371 bool validateMAIAccWrite(const MCInst &Inst); 1372 unsigned getConstantBusLimit(unsigned Opcode) const; 1373 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1374 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1375 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1376 1377 bool isSupportedMnemo(StringRef Mnemo, 1378 const FeatureBitset &FBS); 1379 bool isSupportedMnemo(StringRef Mnemo, 1380 const FeatureBitset &FBS, 1381 ArrayRef<unsigned> Variants); 1382 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1383 1384 bool isId(const StringRef Id) const; 1385 bool isId(const AsmToken &Token, const StringRef Id) const; 1386 bool isToken(const AsmToken::TokenKind Kind) const; 1387 bool trySkipId(const StringRef Id); 1388 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1389 bool trySkipToken(const AsmToken::TokenKind Kind); 1390 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1391 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1392 bool parseId(StringRef &Val, const StringRef ErrMsg); 1393 1394 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1395 AsmToken::TokenKind getTokenKind() const; 1396 bool parseExpr(int64_t &Imm); 1397 bool parseExpr(OperandVector &Operands); 1398 StringRef getTokenStr() const; 1399 AsmToken peekToken(); 1400 AsmToken getToken() const; 1401 SMLoc getLoc() const; 1402 void lex(); 1403 1404 public: 1405 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1406 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1407 1408 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1409 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1410 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1411 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1412 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1413 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1414 1415 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1416 const unsigned MinVal, 1417 const unsigned MaxVal, 1418 const StringRef ErrMsg); 1419 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1420 bool parseSwizzleOffset(int64_t &Imm); 1421 bool parseSwizzleMacro(int64_t &Imm); 1422 bool parseSwizzleQuadPerm(int64_t &Imm); 1423 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1424 bool parseSwizzleBroadcast(int64_t &Imm); 1425 bool parseSwizzleSwap(int64_t &Imm); 1426 bool parseSwizzleReverse(int64_t &Imm); 1427 1428 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1429 int64_t parseGPRIdxMacro(); 1430 1431 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1432 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1433 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1434 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1435 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1436 1437 AMDGPUOperand::Ptr defaultDLC() const; 1438 AMDGPUOperand::Ptr defaultGLC() const; 1439 AMDGPUOperand::Ptr defaultSLC() const; 1440 1441 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1442 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1443 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1444 AMDGPUOperand::Ptr defaultFlatOffset() const; 1445 1446 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1447 1448 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1449 OptionalImmIndexMap &OptionalIdx); 1450 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1451 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1452 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1453 1454 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1455 1456 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1457 bool IsAtomic = false); 1458 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1459 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1460 1461 OperandMatchResultTy parseDim(OperandVector &Operands); 1462 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1463 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1464 AMDGPUOperand::Ptr defaultRowMask() const; 1465 AMDGPUOperand::Ptr defaultBankMask() const; 1466 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1467 AMDGPUOperand::Ptr defaultFI() const; 1468 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1469 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1470 1471 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1472 AMDGPUOperand::ImmTy Type); 1473 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1474 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1475 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1476 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1477 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1478 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1479 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1480 uint64_t BasicInstType, 1481 bool SkipDstVcc = false, 1482 bool SkipSrcVcc = false); 1483 1484 AMDGPUOperand::Ptr defaultBLGP() const; 1485 AMDGPUOperand::Ptr defaultCBSZ() const; 1486 AMDGPUOperand::Ptr defaultABID() const; 1487 1488 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1489 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1490 }; 1491 1492 struct OptionalOperand { 1493 const char *Name; 1494 AMDGPUOperand::ImmTy Type; 1495 bool IsBit; 1496 bool (*ConvertResult)(int64_t&); 1497 }; 1498 1499 } // end anonymous namespace 1500 1501 // May be called with integer type with equivalent bitwidth. 1502 static const fltSemantics *getFltSemantics(unsigned Size) { 1503 switch (Size) { 1504 case 4: 1505 return &APFloat::IEEEsingle(); 1506 case 8: 1507 return &APFloat::IEEEdouble(); 1508 case 2: 1509 return &APFloat::IEEEhalf(); 1510 default: 1511 llvm_unreachable("unsupported fp type"); 1512 } 1513 } 1514 1515 static const fltSemantics *getFltSemantics(MVT VT) { 1516 return getFltSemantics(VT.getSizeInBits() / 8); 1517 } 1518 1519 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1520 switch (OperandType) { 1521 case AMDGPU::OPERAND_REG_IMM_INT32: 1522 case AMDGPU::OPERAND_REG_IMM_FP32: 1523 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1524 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1525 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1527 return &APFloat::IEEEsingle(); 1528 case AMDGPU::OPERAND_REG_IMM_INT64: 1529 case AMDGPU::OPERAND_REG_IMM_FP64: 1530 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1531 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1532 return &APFloat::IEEEdouble(); 1533 case AMDGPU::OPERAND_REG_IMM_INT16: 1534 case AMDGPU::OPERAND_REG_IMM_FP16: 1535 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1536 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1537 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1538 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1539 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1540 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1541 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1542 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1543 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1544 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1545 return &APFloat::IEEEhalf(); 1546 default: 1547 llvm_unreachable("unsupported fp type"); 1548 } 1549 } 1550 1551 //===----------------------------------------------------------------------===// 1552 // Operand 1553 //===----------------------------------------------------------------------===// 1554 1555 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1556 bool Lost; 1557 1558 // Convert literal to single precision 1559 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1560 APFloat::rmNearestTiesToEven, 1561 &Lost); 1562 // We allow precision lost but not overflow or underflow 1563 if (Status != APFloat::opOK && 1564 Lost && 1565 ((Status & APFloat::opOverflow) != 0 || 1566 (Status & APFloat::opUnderflow) != 0)) { 1567 return false; 1568 } 1569 1570 return true; 1571 } 1572 1573 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1574 return isUIntN(Size, Val) || isIntN(Size, Val); 1575 } 1576 1577 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1578 if (VT.getScalarType() == MVT::i16) { 1579 // FP immediate values are broken. 1580 return isInlinableIntLiteral(Val); 1581 } 1582 1583 // f16/v2f16 operands work correctly for all values. 1584 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1585 } 1586 1587 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1588 1589 // This is a hack to enable named inline values like 1590 // shared_base with both 32-bit and 64-bit operands. 1591 // Note that these values are defined as 1592 // 32-bit operands only. 1593 if (isInlineValue()) { 1594 return true; 1595 } 1596 1597 if (!isImmTy(ImmTyNone)) { 1598 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1599 return false; 1600 } 1601 // TODO: We should avoid using host float here. It would be better to 1602 // check the float bit values which is what a few other places do. 1603 // We've had bot failures before due to weird NaN support on mips hosts. 1604 1605 APInt Literal(64, Imm.Val); 1606 1607 if (Imm.IsFPImm) { // We got fp literal token 1608 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1609 return AMDGPU::isInlinableLiteral64(Imm.Val, 1610 AsmParser->hasInv2PiInlineImm()); 1611 } 1612 1613 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1614 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1615 return false; 1616 1617 if (type.getScalarSizeInBits() == 16) { 1618 return isInlineableLiteralOp16( 1619 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1620 type, AsmParser->hasInv2PiInlineImm()); 1621 } 1622 1623 // Check if single precision literal is inlinable 1624 return AMDGPU::isInlinableLiteral32( 1625 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1626 AsmParser->hasInv2PiInlineImm()); 1627 } 1628 1629 // We got int literal token. 1630 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1631 return AMDGPU::isInlinableLiteral64(Imm.Val, 1632 AsmParser->hasInv2PiInlineImm()); 1633 } 1634 1635 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1636 return false; 1637 } 1638 1639 if (type.getScalarSizeInBits() == 16) { 1640 return isInlineableLiteralOp16( 1641 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1642 type, AsmParser->hasInv2PiInlineImm()); 1643 } 1644 1645 return AMDGPU::isInlinableLiteral32( 1646 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1647 AsmParser->hasInv2PiInlineImm()); 1648 } 1649 1650 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1651 // Check that this immediate can be added as literal 1652 if (!isImmTy(ImmTyNone)) { 1653 return false; 1654 } 1655 1656 if (!Imm.IsFPImm) { 1657 // We got int literal token. 1658 1659 if (type == MVT::f64 && hasFPModifiers()) { 1660 // Cannot apply fp modifiers to int literals preserving the same semantics 1661 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1662 // disable these cases. 1663 return false; 1664 } 1665 1666 unsigned Size = type.getSizeInBits(); 1667 if (Size == 64) 1668 Size = 32; 1669 1670 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1671 // types. 1672 return isSafeTruncation(Imm.Val, Size); 1673 } 1674 1675 // We got fp literal token 1676 if (type == MVT::f64) { // Expected 64-bit fp operand 1677 // We would set low 64-bits of literal to zeroes but we accept this literals 1678 return true; 1679 } 1680 1681 if (type == MVT::i64) { // Expected 64-bit int operand 1682 // We don't allow fp literals in 64-bit integer instructions. It is 1683 // unclear how we should encode them. 1684 return false; 1685 } 1686 1687 // We allow fp literals with f16x2 operands assuming that the specified 1688 // literal goes into the lower half and the upper half is zero. We also 1689 // require that the literal may be losslesly converted to f16. 1690 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1691 (type == MVT::v2i16)? MVT::i16 : type; 1692 1693 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1694 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1695 } 1696 1697 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1698 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1699 } 1700 1701 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1702 if (AsmParser->isVI()) 1703 return isVReg32(); 1704 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1705 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1706 else 1707 return false; 1708 } 1709 1710 bool AMDGPUOperand::isSDWAFP16Operand() const { 1711 return isSDWAOperand(MVT::f16); 1712 } 1713 1714 bool AMDGPUOperand::isSDWAFP32Operand() const { 1715 return isSDWAOperand(MVT::f32); 1716 } 1717 1718 bool AMDGPUOperand::isSDWAInt16Operand() const { 1719 return isSDWAOperand(MVT::i16); 1720 } 1721 1722 bool AMDGPUOperand::isSDWAInt32Operand() const { 1723 return isSDWAOperand(MVT::i32); 1724 } 1725 1726 bool AMDGPUOperand::isBoolReg() const { 1727 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1728 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1729 } 1730 1731 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1732 { 1733 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1734 assert(Size == 2 || Size == 4 || Size == 8); 1735 1736 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1737 1738 if (Imm.Mods.Abs) { 1739 Val &= ~FpSignMask; 1740 } 1741 if (Imm.Mods.Neg) { 1742 Val ^= FpSignMask; 1743 } 1744 1745 return Val; 1746 } 1747 1748 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1749 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1750 Inst.getNumOperands())) { 1751 addLiteralImmOperand(Inst, Imm.Val, 1752 ApplyModifiers & 1753 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1754 } else { 1755 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1756 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1757 } 1758 } 1759 1760 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1761 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1762 auto OpNum = Inst.getNumOperands(); 1763 // Check that this operand accepts literals 1764 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1765 1766 if (ApplyModifiers) { 1767 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1768 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1769 Val = applyInputFPModifiers(Val, Size); 1770 } 1771 1772 APInt Literal(64, Val); 1773 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1774 1775 if (Imm.IsFPImm) { // We got fp literal token 1776 switch (OpTy) { 1777 case AMDGPU::OPERAND_REG_IMM_INT64: 1778 case AMDGPU::OPERAND_REG_IMM_FP64: 1779 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1780 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1781 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1782 AsmParser->hasInv2PiInlineImm())) { 1783 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1784 return; 1785 } 1786 1787 // Non-inlineable 1788 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1789 // For fp operands we check if low 32 bits are zeros 1790 if (Literal.getLoBits(32) != 0) { 1791 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1792 "Can't encode literal as exact 64-bit floating-point operand. " 1793 "Low 32-bits will be set to zero"); 1794 } 1795 1796 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1797 return; 1798 } 1799 1800 // We don't allow fp literals in 64-bit integer instructions. It is 1801 // unclear how we should encode them. This case should be checked earlier 1802 // in predicate methods (isLiteralImm()) 1803 llvm_unreachable("fp literal in 64-bit integer instruction."); 1804 1805 case AMDGPU::OPERAND_REG_IMM_INT32: 1806 case AMDGPU::OPERAND_REG_IMM_FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1808 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1809 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1811 case AMDGPU::OPERAND_REG_IMM_INT16: 1812 case AMDGPU::OPERAND_REG_IMM_FP16: 1813 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1814 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1816 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1817 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1821 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1822 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1823 bool lost; 1824 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1825 // Convert literal to single precision 1826 FPLiteral.convert(*getOpFltSemantics(OpTy), 1827 APFloat::rmNearestTiesToEven, &lost); 1828 // We allow precision lost but not overflow or underflow. This should be 1829 // checked earlier in isLiteralImm() 1830 1831 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1832 Inst.addOperand(MCOperand::createImm(ImmVal)); 1833 return; 1834 } 1835 default: 1836 llvm_unreachable("invalid operand size"); 1837 } 1838 1839 return; 1840 } 1841 1842 // We got int literal token. 1843 // Only sign extend inline immediates. 1844 switch (OpTy) { 1845 case AMDGPU::OPERAND_REG_IMM_INT32: 1846 case AMDGPU::OPERAND_REG_IMM_FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1848 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1851 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1852 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1853 if (isSafeTruncation(Val, 32) && 1854 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1855 AsmParser->hasInv2PiInlineImm())) { 1856 Inst.addOperand(MCOperand::createImm(Val)); 1857 return; 1858 } 1859 1860 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1861 return; 1862 1863 case AMDGPU::OPERAND_REG_IMM_INT64: 1864 case AMDGPU::OPERAND_REG_IMM_FP64: 1865 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1866 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1867 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1868 Inst.addOperand(MCOperand::createImm(Val)); 1869 return; 1870 } 1871 1872 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1873 return; 1874 1875 case AMDGPU::OPERAND_REG_IMM_INT16: 1876 case AMDGPU::OPERAND_REG_IMM_FP16: 1877 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1878 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1879 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1881 if (isSafeTruncation(Val, 16) && 1882 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1883 AsmParser->hasInv2PiInlineImm())) { 1884 Inst.addOperand(MCOperand::createImm(Val)); 1885 return; 1886 } 1887 1888 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1889 return; 1890 1891 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1892 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1893 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1894 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1895 assert(isSafeTruncation(Val, 16)); 1896 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1897 AsmParser->hasInv2PiInlineImm())); 1898 1899 Inst.addOperand(MCOperand::createImm(Val)); 1900 return; 1901 } 1902 default: 1903 llvm_unreachable("invalid operand size"); 1904 } 1905 } 1906 1907 template <unsigned Bitwidth> 1908 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1909 APInt Literal(64, Imm.Val); 1910 1911 if (!Imm.IsFPImm) { 1912 // We got int literal token. 1913 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1914 return; 1915 } 1916 1917 bool Lost; 1918 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1919 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1920 APFloat::rmNearestTiesToEven, &Lost); 1921 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1922 } 1923 1924 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1925 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1926 } 1927 1928 static bool isInlineValue(unsigned Reg) { 1929 switch (Reg) { 1930 case AMDGPU::SRC_SHARED_BASE: 1931 case AMDGPU::SRC_SHARED_LIMIT: 1932 case AMDGPU::SRC_PRIVATE_BASE: 1933 case AMDGPU::SRC_PRIVATE_LIMIT: 1934 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1935 return true; 1936 case AMDGPU::SRC_VCCZ: 1937 case AMDGPU::SRC_EXECZ: 1938 case AMDGPU::SRC_SCC: 1939 return true; 1940 case AMDGPU::SGPR_NULL: 1941 return true; 1942 default: 1943 return false; 1944 } 1945 } 1946 1947 bool AMDGPUOperand::isInlineValue() const { 1948 return isRegKind() && ::isInlineValue(getReg()); 1949 } 1950 1951 //===----------------------------------------------------------------------===// 1952 // AsmParser 1953 //===----------------------------------------------------------------------===// 1954 1955 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1956 if (Is == IS_VGPR) { 1957 switch (RegWidth) { 1958 default: return -1; 1959 case 1: return AMDGPU::VGPR_32RegClassID; 1960 case 2: return AMDGPU::VReg_64RegClassID; 1961 case 3: return AMDGPU::VReg_96RegClassID; 1962 case 4: return AMDGPU::VReg_128RegClassID; 1963 case 5: return AMDGPU::VReg_160RegClassID; 1964 case 6: return AMDGPU::VReg_192RegClassID; 1965 case 8: return AMDGPU::VReg_256RegClassID; 1966 case 16: return AMDGPU::VReg_512RegClassID; 1967 case 32: return AMDGPU::VReg_1024RegClassID; 1968 } 1969 } else if (Is == IS_TTMP) { 1970 switch (RegWidth) { 1971 default: return -1; 1972 case 1: return AMDGPU::TTMP_32RegClassID; 1973 case 2: return AMDGPU::TTMP_64RegClassID; 1974 case 4: return AMDGPU::TTMP_128RegClassID; 1975 case 8: return AMDGPU::TTMP_256RegClassID; 1976 case 16: return AMDGPU::TTMP_512RegClassID; 1977 } 1978 } else if (Is == IS_SGPR) { 1979 switch (RegWidth) { 1980 default: return -1; 1981 case 1: return AMDGPU::SGPR_32RegClassID; 1982 case 2: return AMDGPU::SGPR_64RegClassID; 1983 case 3: return AMDGPU::SGPR_96RegClassID; 1984 case 4: return AMDGPU::SGPR_128RegClassID; 1985 case 5: return AMDGPU::SGPR_160RegClassID; 1986 case 6: return AMDGPU::SGPR_192RegClassID; 1987 case 8: return AMDGPU::SGPR_256RegClassID; 1988 case 16: return AMDGPU::SGPR_512RegClassID; 1989 } 1990 } else if (Is == IS_AGPR) { 1991 switch (RegWidth) { 1992 default: return -1; 1993 case 1: return AMDGPU::AGPR_32RegClassID; 1994 case 2: return AMDGPU::AReg_64RegClassID; 1995 case 3: return AMDGPU::AReg_96RegClassID; 1996 case 4: return AMDGPU::AReg_128RegClassID; 1997 case 5: return AMDGPU::AReg_160RegClassID; 1998 case 6: return AMDGPU::AReg_192RegClassID; 1999 case 8: return AMDGPU::AReg_256RegClassID; 2000 case 16: return AMDGPU::AReg_512RegClassID; 2001 case 32: return AMDGPU::AReg_1024RegClassID; 2002 } 2003 } 2004 return -1; 2005 } 2006 2007 static unsigned getSpecialRegForName(StringRef RegName) { 2008 return StringSwitch<unsigned>(RegName) 2009 .Case("exec", AMDGPU::EXEC) 2010 .Case("vcc", AMDGPU::VCC) 2011 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2012 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2013 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2014 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2015 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2016 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2017 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2018 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2019 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2020 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2021 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2022 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2023 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2024 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2025 .Case("m0", AMDGPU::M0) 2026 .Case("vccz", AMDGPU::SRC_VCCZ) 2027 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2028 .Case("execz", AMDGPU::SRC_EXECZ) 2029 .Case("src_execz", AMDGPU::SRC_EXECZ) 2030 .Case("scc", AMDGPU::SRC_SCC) 2031 .Case("src_scc", AMDGPU::SRC_SCC) 2032 .Case("tba", AMDGPU::TBA) 2033 .Case("tma", AMDGPU::TMA) 2034 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2035 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2036 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2037 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2038 .Case("vcc_lo", AMDGPU::VCC_LO) 2039 .Case("vcc_hi", AMDGPU::VCC_HI) 2040 .Case("exec_lo", AMDGPU::EXEC_LO) 2041 .Case("exec_hi", AMDGPU::EXEC_HI) 2042 .Case("tma_lo", AMDGPU::TMA_LO) 2043 .Case("tma_hi", AMDGPU::TMA_HI) 2044 .Case("tba_lo", AMDGPU::TBA_LO) 2045 .Case("tba_hi", AMDGPU::TBA_HI) 2046 .Case("pc", AMDGPU::PC_REG) 2047 .Case("null", AMDGPU::SGPR_NULL) 2048 .Default(AMDGPU::NoRegister); 2049 } 2050 2051 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2052 SMLoc &EndLoc, bool RestoreOnFailure) { 2053 auto R = parseRegister(); 2054 if (!R) return true; 2055 assert(R->isReg()); 2056 RegNo = R->getReg(); 2057 StartLoc = R->getStartLoc(); 2058 EndLoc = R->getEndLoc(); 2059 return false; 2060 } 2061 2062 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2063 SMLoc &EndLoc) { 2064 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2065 } 2066 2067 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2068 SMLoc &StartLoc, 2069 SMLoc &EndLoc) { 2070 bool Result = 2071 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2072 bool PendingErrors = getParser().hasPendingError(); 2073 getParser().clearPendingErrors(); 2074 if (PendingErrors) 2075 return MatchOperand_ParseFail; 2076 if (Result) 2077 return MatchOperand_NoMatch; 2078 return MatchOperand_Success; 2079 } 2080 2081 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2082 RegisterKind RegKind, unsigned Reg1, 2083 SMLoc Loc) { 2084 switch (RegKind) { 2085 case IS_SPECIAL: 2086 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2087 Reg = AMDGPU::EXEC; 2088 RegWidth = 2; 2089 return true; 2090 } 2091 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2092 Reg = AMDGPU::FLAT_SCR; 2093 RegWidth = 2; 2094 return true; 2095 } 2096 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2097 Reg = AMDGPU::XNACK_MASK; 2098 RegWidth = 2; 2099 return true; 2100 } 2101 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2102 Reg = AMDGPU::VCC; 2103 RegWidth = 2; 2104 return true; 2105 } 2106 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2107 Reg = AMDGPU::TBA; 2108 RegWidth = 2; 2109 return true; 2110 } 2111 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2112 Reg = AMDGPU::TMA; 2113 RegWidth = 2; 2114 return true; 2115 } 2116 Error(Loc, "register does not fit in the list"); 2117 return false; 2118 case IS_VGPR: 2119 case IS_SGPR: 2120 case IS_AGPR: 2121 case IS_TTMP: 2122 if (Reg1 != Reg + RegWidth) { 2123 Error(Loc, "registers in a list must have consecutive indices"); 2124 return false; 2125 } 2126 RegWidth++; 2127 return true; 2128 default: 2129 llvm_unreachable("unexpected register kind"); 2130 } 2131 } 2132 2133 struct RegInfo { 2134 StringLiteral Name; 2135 RegisterKind Kind; 2136 }; 2137 2138 static constexpr RegInfo RegularRegisters[] = { 2139 {{"v"}, IS_VGPR}, 2140 {{"s"}, IS_SGPR}, 2141 {{"ttmp"}, IS_TTMP}, 2142 {{"acc"}, IS_AGPR}, 2143 {{"a"}, IS_AGPR}, 2144 }; 2145 2146 static bool isRegularReg(RegisterKind Kind) { 2147 return Kind == IS_VGPR || 2148 Kind == IS_SGPR || 2149 Kind == IS_TTMP || 2150 Kind == IS_AGPR; 2151 } 2152 2153 static const RegInfo* getRegularRegInfo(StringRef Str) { 2154 for (const RegInfo &Reg : RegularRegisters) 2155 if (Str.startswith(Reg.Name)) 2156 return &Reg; 2157 return nullptr; 2158 } 2159 2160 static bool getRegNum(StringRef Str, unsigned& Num) { 2161 return !Str.getAsInteger(10, Num); 2162 } 2163 2164 bool 2165 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2166 const AsmToken &NextToken) const { 2167 2168 // A list of consecutive registers: [s0,s1,s2,s3] 2169 if (Token.is(AsmToken::LBrac)) 2170 return true; 2171 2172 if (!Token.is(AsmToken::Identifier)) 2173 return false; 2174 2175 // A single register like s0 or a range of registers like s[0:1] 2176 2177 StringRef Str = Token.getString(); 2178 const RegInfo *Reg = getRegularRegInfo(Str); 2179 if (Reg) { 2180 StringRef RegName = Reg->Name; 2181 StringRef RegSuffix = Str.substr(RegName.size()); 2182 if (!RegSuffix.empty()) { 2183 unsigned Num; 2184 // A single register with an index: rXX 2185 if (getRegNum(RegSuffix, Num)) 2186 return true; 2187 } else { 2188 // A range of registers: r[XX:YY]. 2189 if (NextToken.is(AsmToken::LBrac)) 2190 return true; 2191 } 2192 } 2193 2194 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2195 } 2196 2197 bool 2198 AMDGPUAsmParser::isRegister() 2199 { 2200 return isRegister(getToken(), peekToken()); 2201 } 2202 2203 unsigned 2204 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2205 unsigned RegNum, 2206 unsigned RegWidth, 2207 SMLoc Loc) { 2208 2209 assert(isRegularReg(RegKind)); 2210 2211 unsigned AlignSize = 1; 2212 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2213 // SGPR and TTMP registers must be aligned. 2214 // Max required alignment is 4 dwords. 2215 AlignSize = std::min(RegWidth, 4u); 2216 } 2217 2218 if (RegNum % AlignSize != 0) { 2219 Error(Loc, "invalid register alignment"); 2220 return AMDGPU::NoRegister; 2221 } 2222 2223 unsigned RegIdx = RegNum / AlignSize; 2224 int RCID = getRegClass(RegKind, RegWidth); 2225 if (RCID == -1) { 2226 Error(Loc, "invalid or unsupported register size"); 2227 return AMDGPU::NoRegister; 2228 } 2229 2230 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2231 const MCRegisterClass RC = TRI->getRegClass(RCID); 2232 if (RegIdx >= RC.getNumRegs()) { 2233 Error(Loc, "register index is out of range"); 2234 return AMDGPU::NoRegister; 2235 } 2236 2237 return RC.getRegister(RegIdx); 2238 } 2239 2240 bool 2241 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2242 int64_t RegLo, RegHi; 2243 if (!skipToken(AsmToken::LBrac, "missing register index")) 2244 return false; 2245 2246 SMLoc FirstIdxLoc = getLoc(); 2247 SMLoc SecondIdxLoc; 2248 2249 if (!parseExpr(RegLo)) 2250 return false; 2251 2252 if (trySkipToken(AsmToken::Colon)) { 2253 SecondIdxLoc = getLoc(); 2254 if (!parseExpr(RegHi)) 2255 return false; 2256 } else { 2257 RegHi = RegLo; 2258 } 2259 2260 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2261 return false; 2262 2263 if (!isUInt<32>(RegLo)) { 2264 Error(FirstIdxLoc, "invalid register index"); 2265 return false; 2266 } 2267 2268 if (!isUInt<32>(RegHi)) { 2269 Error(SecondIdxLoc, "invalid register index"); 2270 return false; 2271 } 2272 2273 if (RegLo > RegHi) { 2274 Error(FirstIdxLoc, "first register index should not exceed second index"); 2275 return false; 2276 } 2277 2278 Num = static_cast<unsigned>(RegLo); 2279 Width = (RegHi - RegLo) + 1; 2280 return true; 2281 } 2282 2283 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2284 unsigned &RegNum, unsigned &RegWidth, 2285 SmallVectorImpl<AsmToken> &Tokens) { 2286 assert(isToken(AsmToken::Identifier)); 2287 unsigned Reg = getSpecialRegForName(getTokenStr()); 2288 if (Reg) { 2289 RegNum = 0; 2290 RegWidth = 1; 2291 RegKind = IS_SPECIAL; 2292 Tokens.push_back(getToken()); 2293 lex(); // skip register name 2294 } 2295 return Reg; 2296 } 2297 2298 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2299 unsigned &RegNum, unsigned &RegWidth, 2300 SmallVectorImpl<AsmToken> &Tokens) { 2301 assert(isToken(AsmToken::Identifier)); 2302 StringRef RegName = getTokenStr(); 2303 auto Loc = getLoc(); 2304 2305 const RegInfo *RI = getRegularRegInfo(RegName); 2306 if (!RI) { 2307 Error(Loc, "invalid register name"); 2308 return AMDGPU::NoRegister; 2309 } 2310 2311 Tokens.push_back(getToken()); 2312 lex(); // skip register name 2313 2314 RegKind = RI->Kind; 2315 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2316 if (!RegSuffix.empty()) { 2317 // Single 32-bit register: vXX. 2318 if (!getRegNum(RegSuffix, RegNum)) { 2319 Error(Loc, "invalid register index"); 2320 return AMDGPU::NoRegister; 2321 } 2322 RegWidth = 1; 2323 } else { 2324 // Range of registers: v[XX:YY]. ":YY" is optional. 2325 if (!ParseRegRange(RegNum, RegWidth)) 2326 return AMDGPU::NoRegister; 2327 } 2328 2329 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2330 } 2331 2332 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2333 unsigned &RegWidth, 2334 SmallVectorImpl<AsmToken> &Tokens) { 2335 unsigned Reg = AMDGPU::NoRegister; 2336 auto ListLoc = getLoc(); 2337 2338 if (!skipToken(AsmToken::LBrac, 2339 "expected a register or a list of registers")) { 2340 return AMDGPU::NoRegister; 2341 } 2342 2343 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2344 2345 auto Loc = getLoc(); 2346 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2347 return AMDGPU::NoRegister; 2348 if (RegWidth != 1) { 2349 Error(Loc, "expected a single 32-bit register"); 2350 return AMDGPU::NoRegister; 2351 } 2352 2353 for (; trySkipToken(AsmToken::Comma); ) { 2354 RegisterKind NextRegKind; 2355 unsigned NextReg, NextRegNum, NextRegWidth; 2356 Loc = getLoc(); 2357 2358 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2359 NextRegNum, NextRegWidth, 2360 Tokens)) { 2361 return AMDGPU::NoRegister; 2362 } 2363 if (NextRegWidth != 1) { 2364 Error(Loc, "expected a single 32-bit register"); 2365 return AMDGPU::NoRegister; 2366 } 2367 if (NextRegKind != RegKind) { 2368 Error(Loc, "registers in a list must be of the same kind"); 2369 return AMDGPU::NoRegister; 2370 } 2371 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2372 return AMDGPU::NoRegister; 2373 } 2374 2375 if (!skipToken(AsmToken::RBrac, 2376 "expected a comma or a closing square bracket")) { 2377 return AMDGPU::NoRegister; 2378 } 2379 2380 if (isRegularReg(RegKind)) 2381 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2382 2383 return Reg; 2384 } 2385 2386 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2387 unsigned &RegNum, unsigned &RegWidth, 2388 SmallVectorImpl<AsmToken> &Tokens) { 2389 auto Loc = getLoc(); 2390 Reg = AMDGPU::NoRegister; 2391 2392 if (isToken(AsmToken::Identifier)) { 2393 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2394 if (Reg == AMDGPU::NoRegister) 2395 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2396 } else { 2397 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2398 } 2399 2400 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2401 if (Reg == AMDGPU::NoRegister) { 2402 assert(Parser.hasPendingError()); 2403 return false; 2404 } 2405 2406 if (!subtargetHasRegister(*TRI, Reg)) { 2407 if (Reg == AMDGPU::SGPR_NULL) { 2408 Error(Loc, "'null' operand is not supported on this GPU"); 2409 } else { 2410 Error(Loc, "register not available on this GPU"); 2411 } 2412 return false; 2413 } 2414 2415 return true; 2416 } 2417 2418 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2419 unsigned &RegNum, unsigned &RegWidth, 2420 bool RestoreOnFailure /*=false*/) { 2421 Reg = AMDGPU::NoRegister; 2422 2423 SmallVector<AsmToken, 1> Tokens; 2424 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2425 if (RestoreOnFailure) { 2426 while (!Tokens.empty()) { 2427 getLexer().UnLex(Tokens.pop_back_val()); 2428 } 2429 } 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 Optional<StringRef> 2436 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2437 switch (RegKind) { 2438 case IS_VGPR: 2439 return StringRef(".amdgcn.next_free_vgpr"); 2440 case IS_SGPR: 2441 return StringRef(".amdgcn.next_free_sgpr"); 2442 default: 2443 return None; 2444 } 2445 } 2446 2447 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2448 auto SymbolName = getGprCountSymbolName(RegKind); 2449 assert(SymbolName && "initializing invalid register kind"); 2450 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2451 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2452 } 2453 2454 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2455 unsigned DwordRegIndex, 2456 unsigned RegWidth) { 2457 // Symbols are only defined for GCN targets 2458 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2459 return true; 2460 2461 auto SymbolName = getGprCountSymbolName(RegKind); 2462 if (!SymbolName) 2463 return true; 2464 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2465 2466 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2467 int64_t OldCount; 2468 2469 if (!Sym->isVariable()) 2470 return !Error(getParser().getTok().getLoc(), 2471 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2472 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2473 return !Error( 2474 getParser().getTok().getLoc(), 2475 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2476 2477 if (OldCount <= NewMax) 2478 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2479 2480 return true; 2481 } 2482 2483 std::unique_ptr<AMDGPUOperand> 2484 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2485 const auto &Tok = Parser.getTok(); 2486 SMLoc StartLoc = Tok.getLoc(); 2487 SMLoc EndLoc = Tok.getEndLoc(); 2488 RegisterKind RegKind; 2489 unsigned Reg, RegNum, RegWidth; 2490 2491 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2492 return nullptr; 2493 } 2494 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2495 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2496 return nullptr; 2497 } else 2498 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2499 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2500 } 2501 2502 OperandMatchResultTy 2503 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2504 // TODO: add syntactic sugar for 1/(2*PI) 2505 2506 assert(!isRegister()); 2507 assert(!isModifier()); 2508 2509 const auto& Tok = getToken(); 2510 const auto& NextTok = peekToken(); 2511 bool IsReal = Tok.is(AsmToken::Real); 2512 SMLoc S = getLoc(); 2513 bool Negate = false; 2514 2515 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2516 lex(); 2517 IsReal = true; 2518 Negate = true; 2519 } 2520 2521 if (IsReal) { 2522 // Floating-point expressions are not supported. 2523 // Can only allow floating-point literals with an 2524 // optional sign. 2525 2526 StringRef Num = getTokenStr(); 2527 lex(); 2528 2529 APFloat RealVal(APFloat::IEEEdouble()); 2530 auto roundMode = APFloat::rmNearestTiesToEven; 2531 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2532 return MatchOperand_ParseFail; 2533 } 2534 if (Negate) 2535 RealVal.changeSign(); 2536 2537 Operands.push_back( 2538 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2539 AMDGPUOperand::ImmTyNone, true)); 2540 2541 return MatchOperand_Success; 2542 2543 } else { 2544 int64_t IntVal; 2545 const MCExpr *Expr; 2546 SMLoc S = getLoc(); 2547 2548 if (HasSP3AbsModifier) { 2549 // This is a workaround for handling expressions 2550 // as arguments of SP3 'abs' modifier, for example: 2551 // |1.0| 2552 // |-1| 2553 // |1+x| 2554 // This syntax is not compatible with syntax of standard 2555 // MC expressions (due to the trailing '|'). 2556 SMLoc EndLoc; 2557 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2558 return MatchOperand_ParseFail; 2559 } else { 2560 if (Parser.parseExpression(Expr)) 2561 return MatchOperand_ParseFail; 2562 } 2563 2564 if (Expr->evaluateAsAbsolute(IntVal)) { 2565 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2566 } else { 2567 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2568 } 2569 2570 return MatchOperand_Success; 2571 } 2572 2573 return MatchOperand_NoMatch; 2574 } 2575 2576 OperandMatchResultTy 2577 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2578 if (!isRegister()) 2579 return MatchOperand_NoMatch; 2580 2581 if (auto R = parseRegister()) { 2582 assert(R->isReg()); 2583 Operands.push_back(std::move(R)); 2584 return MatchOperand_Success; 2585 } 2586 return MatchOperand_ParseFail; 2587 } 2588 2589 OperandMatchResultTy 2590 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2591 auto res = parseReg(Operands); 2592 if (res != MatchOperand_NoMatch) { 2593 return res; 2594 } else if (isModifier()) { 2595 return MatchOperand_NoMatch; 2596 } else { 2597 return parseImm(Operands, HasSP3AbsMod); 2598 } 2599 } 2600 2601 bool 2602 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2603 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2604 const auto &str = Token.getString(); 2605 return str == "abs" || str == "neg" || str == "sext"; 2606 } 2607 return false; 2608 } 2609 2610 bool 2611 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2612 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2613 } 2614 2615 bool 2616 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2617 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2618 } 2619 2620 bool 2621 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2622 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2623 } 2624 2625 // Check if this is an operand modifier or an opcode modifier 2626 // which may look like an expression but it is not. We should 2627 // avoid parsing these modifiers as expressions. Currently 2628 // recognized sequences are: 2629 // |...| 2630 // abs(...) 2631 // neg(...) 2632 // sext(...) 2633 // -reg 2634 // -|...| 2635 // -abs(...) 2636 // name:... 2637 // Note that simple opcode modifiers like 'gds' may be parsed as 2638 // expressions; this is a special case. See getExpressionAsToken. 2639 // 2640 bool 2641 AMDGPUAsmParser::isModifier() { 2642 2643 AsmToken Tok = getToken(); 2644 AsmToken NextToken[2]; 2645 peekTokens(NextToken); 2646 2647 return isOperandModifier(Tok, NextToken[0]) || 2648 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2649 isOpcodeModifierWithVal(Tok, NextToken[0]); 2650 } 2651 2652 // Check if the current token is an SP3 'neg' modifier. 2653 // Currently this modifier is allowed in the following context: 2654 // 2655 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2656 // 2. Before an 'abs' modifier: -abs(...) 2657 // 3. Before an SP3 'abs' modifier: -|...| 2658 // 2659 // In all other cases "-" is handled as a part 2660 // of an expression that follows the sign. 2661 // 2662 // Note: When "-" is followed by an integer literal, 2663 // this is interpreted as integer negation rather 2664 // than a floating-point NEG modifier applied to N. 2665 // Beside being contr-intuitive, such use of floating-point 2666 // NEG modifier would have resulted in different meaning 2667 // of integer literals used with VOP1/2/C and VOP3, 2668 // for example: 2669 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2670 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2671 // Negative fp literals with preceding "-" are 2672 // handled likewise for unifomtity 2673 // 2674 bool 2675 AMDGPUAsmParser::parseSP3NegModifier() { 2676 2677 AsmToken NextToken[2]; 2678 peekTokens(NextToken); 2679 2680 if (isToken(AsmToken::Minus) && 2681 (isRegister(NextToken[0], NextToken[1]) || 2682 NextToken[0].is(AsmToken::Pipe) || 2683 isId(NextToken[0], "abs"))) { 2684 lex(); 2685 return true; 2686 } 2687 2688 return false; 2689 } 2690 2691 OperandMatchResultTy 2692 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2693 bool AllowImm) { 2694 bool Neg, SP3Neg; 2695 bool Abs, SP3Abs; 2696 SMLoc Loc; 2697 2698 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2699 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2700 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2701 return MatchOperand_ParseFail; 2702 } 2703 2704 SP3Neg = parseSP3NegModifier(); 2705 2706 Loc = getLoc(); 2707 Neg = trySkipId("neg"); 2708 if (Neg && SP3Neg) { 2709 Error(Loc, "expected register or immediate"); 2710 return MatchOperand_ParseFail; 2711 } 2712 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2713 return MatchOperand_ParseFail; 2714 2715 Abs = trySkipId("abs"); 2716 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2717 return MatchOperand_ParseFail; 2718 2719 Loc = getLoc(); 2720 SP3Abs = trySkipToken(AsmToken::Pipe); 2721 if (Abs && SP3Abs) { 2722 Error(Loc, "expected register or immediate"); 2723 return MatchOperand_ParseFail; 2724 } 2725 2726 OperandMatchResultTy Res; 2727 if (AllowImm) { 2728 Res = parseRegOrImm(Operands, SP3Abs); 2729 } else { 2730 Res = parseReg(Operands); 2731 } 2732 if (Res != MatchOperand_Success) { 2733 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2734 } 2735 2736 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2737 return MatchOperand_ParseFail; 2738 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2739 return MatchOperand_ParseFail; 2740 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2741 return MatchOperand_ParseFail; 2742 2743 AMDGPUOperand::Modifiers Mods; 2744 Mods.Abs = Abs || SP3Abs; 2745 Mods.Neg = Neg || SP3Neg; 2746 2747 if (Mods.hasFPModifiers()) { 2748 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2749 if (Op.isExpr()) { 2750 Error(Op.getStartLoc(), "expected an absolute expression"); 2751 return MatchOperand_ParseFail; 2752 } 2753 Op.setModifiers(Mods); 2754 } 2755 return MatchOperand_Success; 2756 } 2757 2758 OperandMatchResultTy 2759 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2760 bool AllowImm) { 2761 bool Sext = trySkipId("sext"); 2762 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2763 return MatchOperand_ParseFail; 2764 2765 OperandMatchResultTy Res; 2766 if (AllowImm) { 2767 Res = parseRegOrImm(Operands); 2768 } else { 2769 Res = parseReg(Operands); 2770 } 2771 if (Res != MatchOperand_Success) { 2772 return Sext? MatchOperand_ParseFail : Res; 2773 } 2774 2775 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2776 return MatchOperand_ParseFail; 2777 2778 AMDGPUOperand::Modifiers Mods; 2779 Mods.Sext = Sext; 2780 2781 if (Mods.hasIntModifiers()) { 2782 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2783 if (Op.isExpr()) { 2784 Error(Op.getStartLoc(), "expected an absolute expression"); 2785 return MatchOperand_ParseFail; 2786 } 2787 Op.setModifiers(Mods); 2788 } 2789 2790 return MatchOperand_Success; 2791 } 2792 2793 OperandMatchResultTy 2794 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2795 return parseRegOrImmWithFPInputMods(Operands, false); 2796 } 2797 2798 OperandMatchResultTy 2799 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2800 return parseRegOrImmWithIntInputMods(Operands, false); 2801 } 2802 2803 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2804 auto Loc = getLoc(); 2805 if (trySkipId("off")) { 2806 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2807 AMDGPUOperand::ImmTyOff, false)); 2808 return MatchOperand_Success; 2809 } 2810 2811 if (!isRegister()) 2812 return MatchOperand_NoMatch; 2813 2814 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2815 if (Reg) { 2816 Operands.push_back(std::move(Reg)); 2817 return MatchOperand_Success; 2818 } 2819 2820 return MatchOperand_ParseFail; 2821 2822 } 2823 2824 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2825 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2826 2827 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2828 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2829 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2830 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2831 return Match_InvalidOperand; 2832 2833 if ((TSFlags & SIInstrFlags::VOP3) && 2834 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2835 getForcedEncodingSize() != 64) 2836 return Match_PreferE32; 2837 2838 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2839 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2840 // v_mac_f32/16 allow only dst_sel == DWORD; 2841 auto OpNum = 2842 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2843 const auto &Op = Inst.getOperand(OpNum); 2844 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2845 return Match_InvalidOperand; 2846 } 2847 } 2848 2849 return Match_Success; 2850 } 2851 2852 static ArrayRef<unsigned> getAllVariants() { 2853 static const unsigned Variants[] = { 2854 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2855 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2856 }; 2857 2858 return makeArrayRef(Variants); 2859 } 2860 2861 // What asm variants we should check 2862 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2863 if (getForcedEncodingSize() == 32) { 2864 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2865 return makeArrayRef(Variants); 2866 } 2867 2868 if (isForcedVOP3()) { 2869 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2870 return makeArrayRef(Variants); 2871 } 2872 2873 if (isForcedSDWA()) { 2874 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2875 AMDGPUAsmVariants::SDWA9}; 2876 return makeArrayRef(Variants); 2877 } 2878 2879 if (isForcedDPP()) { 2880 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2881 return makeArrayRef(Variants); 2882 } 2883 2884 return getAllVariants(); 2885 } 2886 2887 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2888 if (getForcedEncodingSize() == 32) 2889 return "e32"; 2890 2891 if (isForcedVOP3()) 2892 return "e64"; 2893 2894 if (isForcedSDWA()) 2895 return "sdwa"; 2896 2897 if (isForcedDPP()) 2898 return "dpp"; 2899 2900 return ""; 2901 } 2902 2903 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2904 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2905 const unsigned Num = Desc.getNumImplicitUses(); 2906 for (unsigned i = 0; i < Num; ++i) { 2907 unsigned Reg = Desc.ImplicitUses[i]; 2908 switch (Reg) { 2909 case AMDGPU::FLAT_SCR: 2910 case AMDGPU::VCC: 2911 case AMDGPU::VCC_LO: 2912 case AMDGPU::VCC_HI: 2913 case AMDGPU::M0: 2914 return Reg; 2915 default: 2916 break; 2917 } 2918 } 2919 return AMDGPU::NoRegister; 2920 } 2921 2922 // NB: This code is correct only when used to check constant 2923 // bus limitations because GFX7 support no f16 inline constants. 2924 // Note that there are no cases when a GFX7 opcode violates 2925 // constant bus limitations due to the use of an f16 constant. 2926 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2927 unsigned OpIdx) const { 2928 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2929 2930 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2931 return false; 2932 } 2933 2934 const MCOperand &MO = Inst.getOperand(OpIdx); 2935 2936 int64_t Val = MO.getImm(); 2937 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2938 2939 switch (OpSize) { // expected operand size 2940 case 8: 2941 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2942 case 4: 2943 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2944 case 2: { 2945 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2946 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2947 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2948 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2949 return AMDGPU::isInlinableIntLiteral(Val); 2950 2951 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2952 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2953 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2954 return AMDGPU::isInlinableIntLiteralV216(Val); 2955 2956 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2957 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2958 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2959 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2960 2961 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2962 } 2963 default: 2964 llvm_unreachable("invalid operand size"); 2965 } 2966 } 2967 2968 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2969 if (!isGFX10()) 2970 return 1; 2971 2972 switch (Opcode) { 2973 // 64-bit shift instructions can use only one scalar value input 2974 case AMDGPU::V_LSHLREV_B64: 2975 case AMDGPU::V_LSHLREV_B64_gfx10: 2976 case AMDGPU::V_LSHL_B64: 2977 case AMDGPU::V_LSHRREV_B64: 2978 case AMDGPU::V_LSHRREV_B64_gfx10: 2979 case AMDGPU::V_LSHR_B64: 2980 case AMDGPU::V_ASHRREV_I64: 2981 case AMDGPU::V_ASHRREV_I64_gfx10: 2982 case AMDGPU::V_ASHR_I64: 2983 return 1; 2984 default: 2985 return 2; 2986 } 2987 } 2988 2989 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2990 const MCOperand &MO = Inst.getOperand(OpIdx); 2991 if (MO.isImm()) { 2992 return !isInlineConstant(Inst, OpIdx); 2993 } else if (MO.isReg()) { 2994 auto Reg = MO.getReg(); 2995 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2996 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2997 } else { 2998 return true; 2999 } 3000 } 3001 3002 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 3003 const unsigned Opcode = Inst.getOpcode(); 3004 const MCInstrDesc &Desc = MII.get(Opcode); 3005 unsigned ConstantBusUseCount = 0; 3006 unsigned NumLiterals = 0; 3007 unsigned LiteralSize; 3008 3009 if (Desc.TSFlags & 3010 (SIInstrFlags::VOPC | 3011 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3012 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3013 SIInstrFlags::SDWA)) { 3014 // Check special imm operands (used by madmk, etc) 3015 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3016 ++ConstantBusUseCount; 3017 } 3018 3019 SmallDenseSet<unsigned> SGPRsUsed; 3020 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3021 if (SGPRUsed != AMDGPU::NoRegister) { 3022 SGPRsUsed.insert(SGPRUsed); 3023 ++ConstantBusUseCount; 3024 } 3025 3026 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3027 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3028 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3029 3030 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3031 3032 for (int OpIdx : OpIndices) { 3033 if (OpIdx == -1) break; 3034 3035 const MCOperand &MO = Inst.getOperand(OpIdx); 3036 if (usesConstantBus(Inst, OpIdx)) { 3037 if (MO.isReg()) { 3038 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3039 // Pairs of registers with a partial intersections like these 3040 // s0, s[0:1] 3041 // flat_scratch_lo, flat_scratch 3042 // flat_scratch_lo, flat_scratch_hi 3043 // are theoretically valid but they are disabled anyway. 3044 // Note that this code mimics SIInstrInfo::verifyInstruction 3045 if (!SGPRsUsed.count(Reg)) { 3046 SGPRsUsed.insert(Reg); 3047 ++ConstantBusUseCount; 3048 } 3049 } else { // Expression or a literal 3050 3051 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3052 continue; // special operand like VINTERP attr_chan 3053 3054 // An instruction may use only one literal. 3055 // This has been validated on the previous step. 3056 // See validateVOP3Literal. 3057 // This literal may be used as more than one operand. 3058 // If all these operands are of the same size, 3059 // this literal counts as one scalar value. 3060 // Otherwise it counts as 2 scalar values. 3061 // See "GFX10 Shader Programming", section 3.6.2.3. 3062 3063 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3064 if (Size < 4) Size = 4; 3065 3066 if (NumLiterals == 0) { 3067 NumLiterals = 1; 3068 LiteralSize = Size; 3069 } else if (LiteralSize != Size) { 3070 NumLiterals = 2; 3071 } 3072 } 3073 } 3074 } 3075 } 3076 ConstantBusUseCount += NumLiterals; 3077 3078 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3079 } 3080 3081 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3082 const unsigned Opcode = Inst.getOpcode(); 3083 const MCInstrDesc &Desc = MII.get(Opcode); 3084 3085 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3086 if (DstIdx == -1 || 3087 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3088 return true; 3089 } 3090 3091 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3092 3093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3094 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3095 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3096 3097 assert(DstIdx != -1); 3098 const MCOperand &Dst = Inst.getOperand(DstIdx); 3099 assert(Dst.isReg()); 3100 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3101 3102 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3103 3104 for (int SrcIdx : SrcIndices) { 3105 if (SrcIdx == -1) break; 3106 const MCOperand &Src = Inst.getOperand(SrcIdx); 3107 if (Src.isReg()) { 3108 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3109 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3110 return false; 3111 } 3112 } 3113 } 3114 3115 return true; 3116 } 3117 3118 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3119 3120 const unsigned Opc = Inst.getOpcode(); 3121 const MCInstrDesc &Desc = MII.get(Opc); 3122 3123 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3124 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3125 assert(ClampIdx != -1); 3126 return Inst.getOperand(ClampIdx).getImm() == 0; 3127 } 3128 3129 return true; 3130 } 3131 3132 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3133 3134 const unsigned Opc = Inst.getOpcode(); 3135 const MCInstrDesc &Desc = MII.get(Opc); 3136 3137 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3138 return true; 3139 3140 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3141 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3142 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3143 3144 assert(VDataIdx != -1); 3145 3146 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3147 return true; 3148 3149 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3150 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3151 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3152 if (DMask == 0) 3153 DMask = 1; 3154 3155 unsigned DataSize = 3156 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3157 if (hasPackedD16()) { 3158 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3159 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3160 DataSize = (DataSize + 1) / 2; 3161 } 3162 3163 return (VDataSize / 4) == DataSize + TFESize; 3164 } 3165 3166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3167 const unsigned Opc = Inst.getOpcode(); 3168 const MCInstrDesc &Desc = MII.get(Opc); 3169 3170 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3171 return true; 3172 3173 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3174 3175 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3176 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3177 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3178 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3179 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3180 3181 assert(VAddr0Idx != -1); 3182 assert(SrsrcIdx != -1); 3183 assert(SrsrcIdx > VAddr0Idx); 3184 3185 if (DimIdx == -1) 3186 return true; // intersect_ray 3187 3188 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3189 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3190 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3191 unsigned VAddrSize = 3192 IsNSA ? SrsrcIdx - VAddr0Idx 3193 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3194 3195 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3196 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3197 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3198 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3199 if (!IsNSA) { 3200 if (AddrSize > 8) 3201 AddrSize = 16; 3202 else if (AddrSize > 4) 3203 AddrSize = 8; 3204 } 3205 3206 return VAddrSize == AddrSize; 3207 } 3208 3209 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3210 3211 const unsigned Opc = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opc); 3213 3214 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3215 return true; 3216 if (!Desc.mayLoad() || !Desc.mayStore()) 3217 return true; // Not atomic 3218 3219 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3220 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3221 3222 // This is an incomplete check because image_atomic_cmpswap 3223 // may only use 0x3 and 0xf while other atomic operations 3224 // may use 0x1 and 0x3. However these limitations are 3225 // verified when we check that dmask matches dst size. 3226 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3227 } 3228 3229 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3230 3231 const unsigned Opc = Inst.getOpcode(); 3232 const MCInstrDesc &Desc = MII.get(Opc); 3233 3234 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3235 return true; 3236 3237 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3238 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3239 3240 // GATHER4 instructions use dmask in a different fashion compared to 3241 // other MIMG instructions. The only useful DMASK values are 3242 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3243 // (red,red,red,red) etc.) The ISA document doesn't mention 3244 // this. 3245 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3246 } 3247 3248 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3249 { 3250 switch (Opcode) { 3251 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3252 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3253 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3254 return true; 3255 default: 3256 return false; 3257 } 3258 } 3259 3260 // movrels* opcodes should only allow VGPRS as src0. 3261 // This is specified in .td description for vop1/vop3, 3262 // but sdwa is handled differently. See isSDWAOperand. 3263 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3264 3265 const unsigned Opc = Inst.getOpcode(); 3266 const MCInstrDesc &Desc = MII.get(Opc); 3267 3268 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3269 return true; 3270 3271 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3272 assert(Src0Idx != -1); 3273 3274 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3275 if (!Src0.isReg()) 3276 return false; 3277 3278 auto Reg = Src0.getReg(); 3279 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3280 return !isSGPR(mc2PseudoReg(Reg), TRI); 3281 } 3282 3283 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3284 3285 const unsigned Opc = Inst.getOpcode(); 3286 3287 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3288 return true; 3289 3290 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3291 assert(Src0Idx != -1); 3292 3293 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3294 if (!Src0.isReg()) 3295 return true; 3296 3297 auto Reg = Src0.getReg(); 3298 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3299 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3300 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3301 return false; 3302 } 3303 3304 return true; 3305 } 3306 3307 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3308 3309 const unsigned Opc = Inst.getOpcode(); 3310 const MCInstrDesc &Desc = MII.get(Opc); 3311 3312 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3313 return true; 3314 3315 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3316 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3317 if (isCI() || isSI()) 3318 return false; 3319 } 3320 3321 return true; 3322 } 3323 3324 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3325 const unsigned Opc = Inst.getOpcode(); 3326 const MCInstrDesc &Desc = MII.get(Opc); 3327 3328 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3329 return true; 3330 3331 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3332 if (DimIdx < 0) 3333 return true; 3334 3335 long Imm = Inst.getOperand(DimIdx).getImm(); 3336 if (Imm < 0 || Imm >= 8) 3337 return false; 3338 3339 return true; 3340 } 3341 3342 static bool IsRevOpcode(const unsigned Opcode) 3343 { 3344 switch (Opcode) { 3345 case AMDGPU::V_SUBREV_F32_e32: 3346 case AMDGPU::V_SUBREV_F32_e64: 3347 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3348 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3349 case AMDGPU::V_SUBREV_F32_e32_vi: 3350 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3351 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3352 case AMDGPU::V_SUBREV_F32_e64_vi: 3353 3354 case AMDGPU::V_SUBREV_CO_U32_e32: 3355 case AMDGPU::V_SUBREV_CO_U32_e64: 3356 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3357 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3358 3359 case AMDGPU::V_SUBBREV_U32_e32: 3360 case AMDGPU::V_SUBBREV_U32_e64: 3361 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3362 case AMDGPU::V_SUBBREV_U32_e32_vi: 3363 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3364 case AMDGPU::V_SUBBREV_U32_e64_vi: 3365 3366 case AMDGPU::V_SUBREV_U32_e32: 3367 case AMDGPU::V_SUBREV_U32_e64: 3368 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3369 case AMDGPU::V_SUBREV_U32_e32_vi: 3370 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3371 case AMDGPU::V_SUBREV_U32_e64_vi: 3372 3373 case AMDGPU::V_SUBREV_F16_e32: 3374 case AMDGPU::V_SUBREV_F16_e64: 3375 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3376 case AMDGPU::V_SUBREV_F16_e32_vi: 3377 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3378 case AMDGPU::V_SUBREV_F16_e64_vi: 3379 3380 case AMDGPU::V_SUBREV_U16_e32: 3381 case AMDGPU::V_SUBREV_U16_e64: 3382 case AMDGPU::V_SUBREV_U16_e32_vi: 3383 case AMDGPU::V_SUBREV_U16_e64_vi: 3384 3385 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3386 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3387 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3388 3389 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3390 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3391 3392 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3393 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3394 3395 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3396 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3397 3398 case AMDGPU::V_LSHRREV_B32_e32: 3399 case AMDGPU::V_LSHRREV_B32_e64: 3400 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3401 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3402 case AMDGPU::V_LSHRREV_B32_e32_vi: 3403 case AMDGPU::V_LSHRREV_B32_e64_vi: 3404 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3405 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3406 3407 case AMDGPU::V_ASHRREV_I32_e32: 3408 case AMDGPU::V_ASHRREV_I32_e64: 3409 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3410 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3411 case AMDGPU::V_ASHRREV_I32_e32_vi: 3412 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3413 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3414 case AMDGPU::V_ASHRREV_I32_e64_vi: 3415 3416 case AMDGPU::V_LSHLREV_B32_e32: 3417 case AMDGPU::V_LSHLREV_B32_e64: 3418 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3419 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3420 case AMDGPU::V_LSHLREV_B32_e32_vi: 3421 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3422 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3423 case AMDGPU::V_LSHLREV_B32_e64_vi: 3424 3425 case AMDGPU::V_LSHLREV_B16_e32: 3426 case AMDGPU::V_LSHLREV_B16_e64: 3427 case AMDGPU::V_LSHLREV_B16_e32_vi: 3428 case AMDGPU::V_LSHLREV_B16_e64_vi: 3429 case AMDGPU::V_LSHLREV_B16_gfx10: 3430 3431 case AMDGPU::V_LSHRREV_B16_e32: 3432 case AMDGPU::V_LSHRREV_B16_e64: 3433 case AMDGPU::V_LSHRREV_B16_e32_vi: 3434 case AMDGPU::V_LSHRREV_B16_e64_vi: 3435 case AMDGPU::V_LSHRREV_B16_gfx10: 3436 3437 case AMDGPU::V_ASHRREV_I16_e32: 3438 case AMDGPU::V_ASHRREV_I16_e64: 3439 case AMDGPU::V_ASHRREV_I16_e32_vi: 3440 case AMDGPU::V_ASHRREV_I16_e64_vi: 3441 case AMDGPU::V_ASHRREV_I16_gfx10: 3442 3443 case AMDGPU::V_LSHLREV_B64: 3444 case AMDGPU::V_LSHLREV_B64_gfx10: 3445 case AMDGPU::V_LSHLREV_B64_vi: 3446 3447 case AMDGPU::V_LSHRREV_B64: 3448 case AMDGPU::V_LSHRREV_B64_gfx10: 3449 case AMDGPU::V_LSHRREV_B64_vi: 3450 3451 case AMDGPU::V_ASHRREV_I64: 3452 case AMDGPU::V_ASHRREV_I64_gfx10: 3453 case AMDGPU::V_ASHRREV_I64_vi: 3454 3455 case AMDGPU::V_PK_LSHLREV_B16: 3456 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3457 case AMDGPU::V_PK_LSHLREV_B16_vi: 3458 3459 case AMDGPU::V_PK_LSHRREV_B16: 3460 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3461 case AMDGPU::V_PK_LSHRREV_B16_vi: 3462 case AMDGPU::V_PK_ASHRREV_I16: 3463 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3464 case AMDGPU::V_PK_ASHRREV_I16_vi: 3465 return true; 3466 default: 3467 return false; 3468 } 3469 } 3470 3471 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3472 3473 using namespace SIInstrFlags; 3474 const unsigned Opcode = Inst.getOpcode(); 3475 const MCInstrDesc &Desc = MII.get(Opcode); 3476 3477 // lds_direct register is defined so that it can be used 3478 // with 9-bit operands only. Ignore encodings which do not accept these. 3479 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3480 return true; 3481 3482 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3483 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3484 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3485 3486 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3487 3488 // lds_direct cannot be specified as either src1 or src2. 3489 for (int SrcIdx : SrcIndices) { 3490 if (SrcIdx == -1) break; 3491 const MCOperand &Src = Inst.getOperand(SrcIdx); 3492 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3493 return false; 3494 } 3495 } 3496 3497 if (Src0Idx == -1) 3498 return true; 3499 3500 const MCOperand &Src = Inst.getOperand(Src0Idx); 3501 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3502 return true; 3503 3504 // lds_direct is specified as src0. Check additional limitations. 3505 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3506 } 3507 3508 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3509 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3510 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3511 if (Op.isFlatOffset()) 3512 return Op.getStartLoc(); 3513 } 3514 return getLoc(); 3515 } 3516 3517 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3518 const OperandVector &Operands) { 3519 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3520 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3521 return true; 3522 3523 auto Opcode = Inst.getOpcode(); 3524 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3525 assert(OpNum != -1); 3526 3527 const auto &Op = Inst.getOperand(OpNum); 3528 if (!hasFlatOffsets() && Op.getImm() != 0) { 3529 Error(getFlatOffsetLoc(Operands), 3530 "flat offset modifier is not supported on this GPU"); 3531 return false; 3532 } 3533 3534 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3535 // For FLAT segment the offset must be positive; 3536 // MSB is ignored and forced to zero. 3537 unsigned OffsetSize = isGFX9() ? 13 : 12; 3538 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3539 if (!isIntN(OffsetSize, Op.getImm())) { 3540 Error(getFlatOffsetLoc(Operands), 3541 isGFX9() ? "expected a 13-bit signed offset" : 3542 "expected a 12-bit signed offset"); 3543 return false; 3544 } 3545 } else { 3546 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3547 Error(getFlatOffsetLoc(Operands), 3548 isGFX9() ? "expected a 12-bit unsigned offset" : 3549 "expected an 11-bit unsigned offset"); 3550 return false; 3551 } 3552 } 3553 3554 return true; 3555 } 3556 3557 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3558 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3559 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3560 if (Op.isSMEMOffset()) 3561 return Op.getStartLoc(); 3562 } 3563 return getLoc(); 3564 } 3565 3566 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3567 const OperandVector &Operands) { 3568 if (isCI() || isSI()) 3569 return true; 3570 3571 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3572 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3573 return true; 3574 3575 auto Opcode = Inst.getOpcode(); 3576 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3577 if (OpNum == -1) 3578 return true; 3579 3580 const auto &Op = Inst.getOperand(OpNum); 3581 if (!Op.isImm()) 3582 return true; 3583 3584 uint64_t Offset = Op.getImm(); 3585 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3586 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3587 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3588 return true; 3589 3590 Error(getSMEMOffsetLoc(Operands), 3591 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3592 "expected a 21-bit signed offset"); 3593 3594 return false; 3595 } 3596 3597 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3598 unsigned Opcode = Inst.getOpcode(); 3599 const MCInstrDesc &Desc = MII.get(Opcode); 3600 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3601 return true; 3602 3603 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3604 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3605 3606 const int OpIndices[] = { Src0Idx, Src1Idx }; 3607 3608 unsigned NumExprs = 0; 3609 unsigned NumLiterals = 0; 3610 uint32_t LiteralValue; 3611 3612 for (int OpIdx : OpIndices) { 3613 if (OpIdx == -1) break; 3614 3615 const MCOperand &MO = Inst.getOperand(OpIdx); 3616 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3617 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3618 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3619 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3620 if (NumLiterals == 0 || LiteralValue != Value) { 3621 LiteralValue = Value; 3622 ++NumLiterals; 3623 } 3624 } else if (MO.isExpr()) { 3625 ++NumExprs; 3626 } 3627 } 3628 } 3629 3630 return NumLiterals + NumExprs <= 1; 3631 } 3632 3633 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3634 const unsigned Opc = Inst.getOpcode(); 3635 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3636 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3637 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3638 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3639 3640 if (OpSel & ~3) 3641 return false; 3642 } 3643 return true; 3644 } 3645 3646 // Check if VCC register matches wavefront size 3647 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3648 auto FB = getFeatureBits(); 3649 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3650 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3651 } 3652 3653 // VOP3 literal is only allowed in GFX10+ and only one can be used 3654 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3655 unsigned Opcode = Inst.getOpcode(); 3656 const MCInstrDesc &Desc = MII.get(Opcode); 3657 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3658 return true; 3659 3660 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3661 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3662 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3663 3664 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3665 3666 unsigned NumExprs = 0; 3667 unsigned NumLiterals = 0; 3668 uint32_t LiteralValue; 3669 3670 for (int OpIdx : OpIndices) { 3671 if (OpIdx == -1) break; 3672 3673 const MCOperand &MO = Inst.getOperand(OpIdx); 3674 if (!MO.isImm() && !MO.isExpr()) 3675 continue; 3676 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3677 continue; 3678 3679 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3680 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3681 return false; 3682 3683 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3684 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3685 if (NumLiterals == 0 || LiteralValue != Value) { 3686 LiteralValue = Value; 3687 ++NumLiterals; 3688 } 3689 } else if (MO.isExpr()) { 3690 ++NumExprs; 3691 } 3692 } 3693 NumLiterals += NumExprs; 3694 3695 return !NumLiterals || 3696 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3697 } 3698 3699 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3700 const SMLoc &IDLoc, 3701 const OperandVector &Operands) { 3702 if (!validateLdsDirect(Inst)) { 3703 Error(IDLoc, 3704 "invalid use of lds_direct"); 3705 return false; 3706 } 3707 if (!validateSOPLiteral(Inst)) { 3708 Error(IDLoc, 3709 "only one literal operand is allowed"); 3710 return false; 3711 } 3712 if (!validateVOP3Literal(Inst)) { 3713 Error(IDLoc, 3714 "invalid literal operand"); 3715 return false; 3716 } 3717 if (!validateConstantBusLimitations(Inst)) { 3718 Error(IDLoc, 3719 "invalid operand (violates constant bus restrictions)"); 3720 return false; 3721 } 3722 if (!validateEarlyClobberLimitations(Inst)) { 3723 Error(IDLoc, 3724 "destination must be different than all sources"); 3725 return false; 3726 } 3727 if (!validateIntClampSupported(Inst)) { 3728 Error(IDLoc, 3729 "integer clamping is not supported on this GPU"); 3730 return false; 3731 } 3732 if (!validateOpSel(Inst)) { 3733 Error(IDLoc, 3734 "invalid op_sel operand"); 3735 return false; 3736 } 3737 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3738 if (!validateMIMGD16(Inst)) { 3739 Error(IDLoc, 3740 "d16 modifier is not supported on this GPU"); 3741 return false; 3742 } 3743 if (!validateMIMGDim(Inst)) { 3744 Error(IDLoc, "dim modifier is required on this GPU"); 3745 return false; 3746 } 3747 if (!validateMIMGDataSize(Inst)) { 3748 Error(IDLoc, 3749 "image data size does not match dmask and tfe"); 3750 return false; 3751 } 3752 if (!validateMIMGAddrSize(Inst)) { 3753 Error(IDLoc, 3754 "image address size does not match dim and a16"); 3755 return false; 3756 } 3757 if (!validateMIMGAtomicDMask(Inst)) { 3758 Error(IDLoc, 3759 "invalid atomic image dmask"); 3760 return false; 3761 } 3762 if (!validateMIMGGatherDMask(Inst)) { 3763 Error(IDLoc, 3764 "invalid image_gather dmask: only one bit must be set"); 3765 return false; 3766 } 3767 if (!validateMovrels(Inst)) { 3768 Error(IDLoc, "source operand must be a VGPR"); 3769 return false; 3770 } 3771 if (!validateFlatOffset(Inst, Operands)) { 3772 return false; 3773 } 3774 if (!validateSMEMOffset(Inst, Operands)) { 3775 return false; 3776 } 3777 if (!validateMAIAccWrite(Inst)) { 3778 return false; 3779 } 3780 3781 return true; 3782 } 3783 3784 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3785 const FeatureBitset &FBS, 3786 unsigned VariantID = 0); 3787 3788 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3789 const FeatureBitset &AvailableFeatures, 3790 unsigned VariantID); 3791 3792 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3793 const FeatureBitset &FBS) { 3794 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3795 } 3796 3797 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3798 const FeatureBitset &FBS, 3799 ArrayRef<unsigned> Variants) { 3800 for (auto Variant : Variants) { 3801 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3802 return true; 3803 } 3804 3805 return false; 3806 } 3807 3808 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3809 const SMLoc &IDLoc) { 3810 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3811 3812 // Check if requested instruction variant is supported. 3813 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3814 return false; 3815 3816 // This instruction is not supported. 3817 // Clear any other pending errors because they are no longer relevant. 3818 getParser().clearPendingErrors(); 3819 3820 // Requested instruction variant is not supported. 3821 // Check if any other variants are supported. 3822 StringRef VariantName = getMatchedVariantName(); 3823 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3824 return Error(IDLoc, 3825 Twine(VariantName, 3826 " variant of this instruction is not supported")); 3827 } 3828 3829 // Finally check if this instruction is supported on any other GPU. 3830 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3831 return Error(IDLoc, "instruction not supported on this GPU"); 3832 } 3833 3834 // Instruction not supported on any GPU. Probably a typo. 3835 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3836 return Error(IDLoc, "invalid instruction" + Suggestion); 3837 } 3838 3839 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3840 OperandVector &Operands, 3841 MCStreamer &Out, 3842 uint64_t &ErrorInfo, 3843 bool MatchingInlineAsm) { 3844 MCInst Inst; 3845 unsigned Result = Match_Success; 3846 for (auto Variant : getMatchedVariants()) { 3847 uint64_t EI; 3848 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3849 Variant); 3850 // We order match statuses from least to most specific. We use most specific 3851 // status as resulting 3852 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3853 if ((R == Match_Success) || 3854 (R == Match_PreferE32) || 3855 (R == Match_MissingFeature && Result != Match_PreferE32) || 3856 (R == Match_InvalidOperand && Result != Match_MissingFeature 3857 && Result != Match_PreferE32) || 3858 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3859 && Result != Match_MissingFeature 3860 && Result != Match_PreferE32)) { 3861 Result = R; 3862 ErrorInfo = EI; 3863 } 3864 if (R == Match_Success) 3865 break; 3866 } 3867 3868 if (Result == Match_Success) { 3869 if (!validateInstruction(Inst, IDLoc, Operands)) { 3870 return true; 3871 } 3872 Inst.setLoc(IDLoc); 3873 Out.emitInstruction(Inst, getSTI()); 3874 return false; 3875 } 3876 3877 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 3878 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 3879 return true; 3880 } 3881 3882 switch (Result) { 3883 default: break; 3884 case Match_MissingFeature: 3885 // FIXME: this case should be analyzed and error message corrected. 3886 return Error(IDLoc, "instruction not supported on this GPU"); 3887 3888 case Match_InvalidOperand: { 3889 SMLoc ErrorLoc = IDLoc; 3890 if (ErrorInfo != ~0ULL) { 3891 if (ErrorInfo >= Operands.size()) { 3892 return Error(IDLoc, "too few operands for instruction"); 3893 } 3894 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3895 if (ErrorLoc == SMLoc()) 3896 ErrorLoc = IDLoc; 3897 } 3898 return Error(ErrorLoc, "invalid operand for instruction"); 3899 } 3900 3901 case Match_PreferE32: 3902 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3903 "should be encoded as e32"); 3904 case Match_MnemonicFail: 3905 llvm_unreachable("Invalid instructions should have been handled already"); 3906 } 3907 llvm_unreachable("Implement any new match types added!"); 3908 } 3909 3910 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3911 int64_t Tmp = -1; 3912 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3913 return true; 3914 } 3915 if (getParser().parseAbsoluteExpression(Tmp)) { 3916 return true; 3917 } 3918 Ret = static_cast<uint32_t>(Tmp); 3919 return false; 3920 } 3921 3922 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3923 uint32_t &Minor) { 3924 if (ParseAsAbsoluteExpression(Major)) 3925 return TokError("invalid major version"); 3926 3927 if (getLexer().isNot(AsmToken::Comma)) 3928 return TokError("minor version number required, comma expected"); 3929 Lex(); 3930 3931 if (ParseAsAbsoluteExpression(Minor)) 3932 return TokError("invalid minor version"); 3933 3934 return false; 3935 } 3936 3937 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3938 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3939 return TokError("directive only supported for amdgcn architecture"); 3940 3941 std::string Target; 3942 3943 SMLoc TargetStart = getTok().getLoc(); 3944 if (getParser().parseEscapedString(Target)) 3945 return true; 3946 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3947 3948 std::string ExpectedTarget; 3949 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3950 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3951 3952 if (Target != ExpectedTargetOS.str()) 3953 return getParser().Error(TargetRange.Start, "target must match options", 3954 TargetRange); 3955 3956 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3957 return false; 3958 } 3959 3960 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3961 return getParser().Error(Range.Start, "value out of range", Range); 3962 } 3963 3964 bool AMDGPUAsmParser::calculateGPRBlocks( 3965 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3966 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3967 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3968 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3969 // TODO(scott.linder): These calculations are duplicated from 3970 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3971 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3972 3973 unsigned NumVGPRs = NextFreeVGPR; 3974 unsigned NumSGPRs = NextFreeSGPR; 3975 3976 if (Version.Major >= 10) 3977 NumSGPRs = 0; 3978 else { 3979 unsigned MaxAddressableNumSGPRs = 3980 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3981 3982 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3983 NumSGPRs > MaxAddressableNumSGPRs) 3984 return OutOfRangeError(SGPRRange); 3985 3986 NumSGPRs += 3987 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3988 3989 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3990 NumSGPRs > MaxAddressableNumSGPRs) 3991 return OutOfRangeError(SGPRRange); 3992 3993 if (Features.test(FeatureSGPRInitBug)) 3994 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3995 } 3996 3997 VGPRBlocks = 3998 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3999 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4000 4001 return false; 4002 } 4003 4004 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4005 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4006 return TokError("directive only supported for amdgcn architecture"); 4007 4008 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4009 return TokError("directive only supported for amdhsa OS"); 4010 4011 StringRef KernelName; 4012 if (getParser().parseIdentifier(KernelName)) 4013 return true; 4014 4015 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4016 4017 StringSet<> Seen; 4018 4019 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4020 4021 SMRange VGPRRange; 4022 uint64_t NextFreeVGPR = 0; 4023 SMRange SGPRRange; 4024 uint64_t NextFreeSGPR = 0; 4025 unsigned UserSGPRCount = 0; 4026 bool ReserveVCC = true; 4027 bool ReserveFlatScr = true; 4028 bool ReserveXNACK = hasXNACK(); 4029 Optional<bool> EnableWavefrontSize32; 4030 4031 while (true) { 4032 while (getLexer().is(AsmToken::EndOfStatement)) 4033 Lex(); 4034 4035 if (getLexer().isNot(AsmToken::Identifier)) 4036 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4037 4038 StringRef ID = getTok().getIdentifier(); 4039 SMRange IDRange = getTok().getLocRange(); 4040 Lex(); 4041 4042 if (ID == ".end_amdhsa_kernel") 4043 break; 4044 4045 if (Seen.find(ID) != Seen.end()) 4046 return TokError(".amdhsa_ directives cannot be repeated"); 4047 Seen.insert(ID); 4048 4049 SMLoc ValStart = getTok().getLoc(); 4050 int64_t IVal; 4051 if (getParser().parseAbsoluteExpression(IVal)) 4052 return true; 4053 SMLoc ValEnd = getTok().getLoc(); 4054 SMRange ValRange = SMRange(ValStart, ValEnd); 4055 4056 if (IVal < 0) 4057 return OutOfRangeError(ValRange); 4058 4059 uint64_t Val = IVal; 4060 4061 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4062 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4063 return OutOfRangeError(RANGE); \ 4064 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4065 4066 if (ID == ".amdhsa_group_segment_fixed_size") { 4067 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4068 return OutOfRangeError(ValRange); 4069 KD.group_segment_fixed_size = Val; 4070 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4071 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4072 return OutOfRangeError(ValRange); 4073 KD.private_segment_fixed_size = Val; 4074 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4075 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4077 Val, ValRange); 4078 if (Val) 4079 UserSGPRCount += 4; 4080 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4081 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4083 ValRange); 4084 if (Val) 4085 UserSGPRCount += 2; 4086 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4087 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4088 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4089 ValRange); 4090 if (Val) 4091 UserSGPRCount += 2; 4092 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4093 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4094 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4095 Val, ValRange); 4096 if (Val) 4097 UserSGPRCount += 2; 4098 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4099 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4100 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4101 ValRange); 4102 if (Val) 4103 UserSGPRCount += 2; 4104 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4105 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4106 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4107 ValRange); 4108 if (Val) 4109 UserSGPRCount += 2; 4110 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4111 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4112 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4113 Val, ValRange); 4114 if (Val) 4115 UserSGPRCount += 1; 4116 } else if (ID == ".amdhsa_wavefront_size32") { 4117 if (IVersion.Major < 10) 4118 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4119 IDRange); 4120 EnableWavefrontSize32 = Val; 4121 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4122 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4123 Val, ValRange); 4124 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4125 PARSE_BITS_ENTRY( 4126 KD.compute_pgm_rsrc2, 4127 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4128 ValRange); 4129 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4131 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4132 ValRange); 4133 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4134 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4135 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4136 ValRange); 4137 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4138 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4139 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4140 ValRange); 4141 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4142 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4143 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4144 ValRange); 4145 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4146 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4147 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4148 ValRange); 4149 } else if (ID == ".amdhsa_next_free_vgpr") { 4150 VGPRRange = ValRange; 4151 NextFreeVGPR = Val; 4152 } else if (ID == ".amdhsa_next_free_sgpr") { 4153 SGPRRange = ValRange; 4154 NextFreeSGPR = Val; 4155 } else if (ID == ".amdhsa_reserve_vcc") { 4156 if (!isUInt<1>(Val)) 4157 return OutOfRangeError(ValRange); 4158 ReserveVCC = Val; 4159 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4160 if (IVersion.Major < 7) 4161 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4162 IDRange); 4163 if (!isUInt<1>(Val)) 4164 return OutOfRangeError(ValRange); 4165 ReserveFlatScr = Val; 4166 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4167 if (IVersion.Major < 8) 4168 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4169 IDRange); 4170 if (!isUInt<1>(Val)) 4171 return OutOfRangeError(ValRange); 4172 ReserveXNACK = Val; 4173 } else if (ID == ".amdhsa_float_round_mode_32") { 4174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4175 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4176 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4177 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4178 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4179 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4180 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4181 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4182 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4183 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4184 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4185 ValRange); 4186 } else if (ID == ".amdhsa_dx10_clamp") { 4187 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4188 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4189 } else if (ID == ".amdhsa_ieee_mode") { 4190 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4191 Val, ValRange); 4192 } else if (ID == ".amdhsa_fp16_overflow") { 4193 if (IVersion.Major < 9) 4194 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4195 IDRange); 4196 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4197 ValRange); 4198 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4199 if (IVersion.Major < 10) 4200 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4201 IDRange); 4202 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4203 ValRange); 4204 } else if (ID == ".amdhsa_memory_ordered") { 4205 if (IVersion.Major < 10) 4206 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4207 IDRange); 4208 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4209 ValRange); 4210 } else if (ID == ".amdhsa_forward_progress") { 4211 if (IVersion.Major < 10) 4212 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4213 IDRange); 4214 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4215 ValRange); 4216 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4217 PARSE_BITS_ENTRY( 4218 KD.compute_pgm_rsrc2, 4219 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4220 ValRange); 4221 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4222 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4223 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4224 Val, ValRange); 4225 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4226 PARSE_BITS_ENTRY( 4227 KD.compute_pgm_rsrc2, 4228 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4229 ValRange); 4230 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4231 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4233 Val, ValRange); 4234 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4235 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4237 Val, ValRange); 4238 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4239 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4240 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4241 Val, ValRange); 4242 } else if (ID == ".amdhsa_exception_int_div_zero") { 4243 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4244 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4245 Val, ValRange); 4246 } else { 4247 return getParser().Error(IDRange.Start, 4248 "unknown .amdhsa_kernel directive", IDRange); 4249 } 4250 4251 #undef PARSE_BITS_ENTRY 4252 } 4253 4254 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4255 return TokError(".amdhsa_next_free_vgpr directive is required"); 4256 4257 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4258 return TokError(".amdhsa_next_free_sgpr directive is required"); 4259 4260 unsigned VGPRBlocks; 4261 unsigned SGPRBlocks; 4262 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4263 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4264 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4265 SGPRBlocks)) 4266 return true; 4267 4268 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4269 VGPRBlocks)) 4270 return OutOfRangeError(VGPRRange); 4271 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4272 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4273 4274 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4275 SGPRBlocks)) 4276 return OutOfRangeError(SGPRRange); 4277 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4278 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4279 SGPRBlocks); 4280 4281 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4282 return TokError("too many user SGPRs enabled"); 4283 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4284 UserSGPRCount); 4285 4286 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4287 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4288 ReserveFlatScr, ReserveXNACK); 4289 return false; 4290 } 4291 4292 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4293 uint32_t Major; 4294 uint32_t Minor; 4295 4296 if (ParseDirectiveMajorMinor(Major, Minor)) 4297 return true; 4298 4299 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4300 return false; 4301 } 4302 4303 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4304 uint32_t Major; 4305 uint32_t Minor; 4306 uint32_t Stepping; 4307 StringRef VendorName; 4308 StringRef ArchName; 4309 4310 // If this directive has no arguments, then use the ISA version for the 4311 // targeted GPU. 4312 if (getLexer().is(AsmToken::EndOfStatement)) { 4313 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4314 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4315 ISA.Stepping, 4316 "AMD", "AMDGPU"); 4317 return false; 4318 } 4319 4320 if (ParseDirectiveMajorMinor(Major, Minor)) 4321 return true; 4322 4323 if (getLexer().isNot(AsmToken::Comma)) 4324 return TokError("stepping version number required, comma expected"); 4325 Lex(); 4326 4327 if (ParseAsAbsoluteExpression(Stepping)) 4328 return TokError("invalid stepping version"); 4329 4330 if (getLexer().isNot(AsmToken::Comma)) 4331 return TokError("vendor name required, comma expected"); 4332 Lex(); 4333 4334 if (getLexer().isNot(AsmToken::String)) 4335 return TokError("invalid vendor name"); 4336 4337 VendorName = getLexer().getTok().getStringContents(); 4338 Lex(); 4339 4340 if (getLexer().isNot(AsmToken::Comma)) 4341 return TokError("arch name required, comma expected"); 4342 Lex(); 4343 4344 if (getLexer().isNot(AsmToken::String)) 4345 return TokError("invalid arch name"); 4346 4347 ArchName = getLexer().getTok().getStringContents(); 4348 Lex(); 4349 4350 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4351 VendorName, ArchName); 4352 return false; 4353 } 4354 4355 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4356 amd_kernel_code_t &Header) { 4357 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4358 // assembly for backwards compatibility. 4359 if (ID == "max_scratch_backing_memory_byte_size") { 4360 Parser.eatToEndOfStatement(); 4361 return false; 4362 } 4363 4364 SmallString<40> ErrStr; 4365 raw_svector_ostream Err(ErrStr); 4366 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4367 return TokError(Err.str()); 4368 } 4369 Lex(); 4370 4371 if (ID == "enable_wavefront_size32") { 4372 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4373 if (!isGFX10()) 4374 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4375 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4376 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4377 } else { 4378 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4379 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4380 } 4381 } 4382 4383 if (ID == "wavefront_size") { 4384 if (Header.wavefront_size == 5) { 4385 if (!isGFX10()) 4386 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4387 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4388 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4389 } else if (Header.wavefront_size == 6) { 4390 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4391 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4392 } 4393 } 4394 4395 if (ID == "enable_wgp_mode") { 4396 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4397 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4398 } 4399 4400 if (ID == "enable_mem_ordered") { 4401 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4402 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4403 } 4404 4405 if (ID == "enable_fwd_progress") { 4406 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4407 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4408 } 4409 4410 return false; 4411 } 4412 4413 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4414 amd_kernel_code_t Header; 4415 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4416 4417 while (true) { 4418 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4419 // will set the current token to EndOfStatement. 4420 while(getLexer().is(AsmToken::EndOfStatement)) 4421 Lex(); 4422 4423 if (getLexer().isNot(AsmToken::Identifier)) 4424 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4425 4426 StringRef ID = getLexer().getTok().getIdentifier(); 4427 Lex(); 4428 4429 if (ID == ".end_amd_kernel_code_t") 4430 break; 4431 4432 if (ParseAMDKernelCodeTValue(ID, Header)) 4433 return true; 4434 } 4435 4436 getTargetStreamer().EmitAMDKernelCodeT(Header); 4437 4438 return false; 4439 } 4440 4441 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4442 if (getLexer().isNot(AsmToken::Identifier)) 4443 return TokError("expected symbol name"); 4444 4445 StringRef KernelName = Parser.getTok().getString(); 4446 4447 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4448 ELF::STT_AMDGPU_HSA_KERNEL); 4449 Lex(); 4450 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4451 KernelScope.initialize(getContext()); 4452 return false; 4453 } 4454 4455 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4456 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4457 return Error(getParser().getTok().getLoc(), 4458 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4459 "architectures"); 4460 } 4461 4462 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4463 4464 std::string ISAVersionStringFromSTI; 4465 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4466 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4467 4468 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4469 return Error(getParser().getTok().getLoc(), 4470 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4471 "arguments specified through the command line"); 4472 } 4473 4474 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4475 Lex(); 4476 4477 return false; 4478 } 4479 4480 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4481 const char *AssemblerDirectiveBegin; 4482 const char *AssemblerDirectiveEnd; 4483 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4484 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4485 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4486 HSAMD::V3::AssemblerDirectiveEnd) 4487 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4488 HSAMD::AssemblerDirectiveEnd); 4489 4490 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4491 return Error(getParser().getTok().getLoc(), 4492 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4493 "not available on non-amdhsa OSes")).str()); 4494 } 4495 4496 std::string HSAMetadataString; 4497 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4498 HSAMetadataString)) 4499 return true; 4500 4501 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4502 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4503 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4504 } else { 4505 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4506 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4507 } 4508 4509 return false; 4510 } 4511 4512 /// Common code to parse out a block of text (typically YAML) between start and 4513 /// end directives. 4514 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4515 const char *AssemblerDirectiveEnd, 4516 std::string &CollectString) { 4517 4518 raw_string_ostream CollectStream(CollectString); 4519 4520 getLexer().setSkipSpace(false); 4521 4522 bool FoundEnd = false; 4523 while (!getLexer().is(AsmToken::Eof)) { 4524 while (getLexer().is(AsmToken::Space)) { 4525 CollectStream << getLexer().getTok().getString(); 4526 Lex(); 4527 } 4528 4529 if (getLexer().is(AsmToken::Identifier)) { 4530 StringRef ID = getLexer().getTok().getIdentifier(); 4531 if (ID == AssemblerDirectiveEnd) { 4532 Lex(); 4533 FoundEnd = true; 4534 break; 4535 } 4536 } 4537 4538 CollectStream << Parser.parseStringToEndOfStatement() 4539 << getContext().getAsmInfo()->getSeparatorString(); 4540 4541 Parser.eatToEndOfStatement(); 4542 } 4543 4544 getLexer().setSkipSpace(true); 4545 4546 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4547 return TokError(Twine("expected directive ") + 4548 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4549 } 4550 4551 CollectStream.flush(); 4552 return false; 4553 } 4554 4555 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4556 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4557 std::string String; 4558 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4559 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4560 return true; 4561 4562 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4563 if (!PALMetadata->setFromString(String)) 4564 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4565 return false; 4566 } 4567 4568 /// Parse the assembler directive for old linear-format PAL metadata. 4569 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4570 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4571 return Error(getParser().getTok().getLoc(), 4572 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4573 "not available on non-amdpal OSes")).str()); 4574 } 4575 4576 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4577 PALMetadata->setLegacy(); 4578 for (;;) { 4579 uint32_t Key, Value; 4580 if (ParseAsAbsoluteExpression(Key)) { 4581 return TokError(Twine("invalid value in ") + 4582 Twine(PALMD::AssemblerDirective)); 4583 } 4584 if (getLexer().isNot(AsmToken::Comma)) { 4585 return TokError(Twine("expected an even number of values in ") + 4586 Twine(PALMD::AssemblerDirective)); 4587 } 4588 Lex(); 4589 if (ParseAsAbsoluteExpression(Value)) { 4590 return TokError(Twine("invalid value in ") + 4591 Twine(PALMD::AssemblerDirective)); 4592 } 4593 PALMetadata->setRegister(Key, Value); 4594 if (getLexer().isNot(AsmToken::Comma)) 4595 break; 4596 Lex(); 4597 } 4598 return false; 4599 } 4600 4601 /// ParseDirectiveAMDGPULDS 4602 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4603 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4604 if (getParser().checkForValidSection()) 4605 return true; 4606 4607 StringRef Name; 4608 SMLoc NameLoc = getLexer().getLoc(); 4609 if (getParser().parseIdentifier(Name)) 4610 return TokError("expected identifier in directive"); 4611 4612 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4613 if (parseToken(AsmToken::Comma, "expected ','")) 4614 return true; 4615 4616 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4617 4618 int64_t Size; 4619 SMLoc SizeLoc = getLexer().getLoc(); 4620 if (getParser().parseAbsoluteExpression(Size)) 4621 return true; 4622 if (Size < 0) 4623 return Error(SizeLoc, "size must be non-negative"); 4624 if (Size > LocalMemorySize) 4625 return Error(SizeLoc, "size is too large"); 4626 4627 int64_t Alignment = 4; 4628 if (getLexer().is(AsmToken::Comma)) { 4629 Lex(); 4630 SMLoc AlignLoc = getLexer().getLoc(); 4631 if (getParser().parseAbsoluteExpression(Alignment)) 4632 return true; 4633 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4634 return Error(AlignLoc, "alignment must be a power of two"); 4635 4636 // Alignment larger than the size of LDS is possible in theory, as long 4637 // as the linker manages to place to symbol at address 0, but we do want 4638 // to make sure the alignment fits nicely into a 32-bit integer. 4639 if (Alignment >= 1u << 31) 4640 return Error(AlignLoc, "alignment is too large"); 4641 } 4642 4643 if (parseToken(AsmToken::EndOfStatement, 4644 "unexpected token in '.amdgpu_lds' directive")) 4645 return true; 4646 4647 Symbol->redefineIfPossible(); 4648 if (!Symbol->isUndefined()) 4649 return Error(NameLoc, "invalid symbol redefinition"); 4650 4651 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4652 return false; 4653 } 4654 4655 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4656 StringRef IDVal = DirectiveID.getString(); 4657 4658 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4659 if (IDVal == ".amdgcn_target") 4660 return ParseDirectiveAMDGCNTarget(); 4661 4662 if (IDVal == ".amdhsa_kernel") 4663 return ParseDirectiveAMDHSAKernel(); 4664 4665 // TODO: Restructure/combine with PAL metadata directive. 4666 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4667 return ParseDirectiveHSAMetadata(); 4668 } else { 4669 if (IDVal == ".hsa_code_object_version") 4670 return ParseDirectiveHSACodeObjectVersion(); 4671 4672 if (IDVal == ".hsa_code_object_isa") 4673 return ParseDirectiveHSACodeObjectISA(); 4674 4675 if (IDVal == ".amd_kernel_code_t") 4676 return ParseDirectiveAMDKernelCodeT(); 4677 4678 if (IDVal == ".amdgpu_hsa_kernel") 4679 return ParseDirectiveAMDGPUHsaKernel(); 4680 4681 if (IDVal == ".amd_amdgpu_isa") 4682 return ParseDirectiveISAVersion(); 4683 4684 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4685 return ParseDirectiveHSAMetadata(); 4686 } 4687 4688 if (IDVal == ".amdgpu_lds") 4689 return ParseDirectiveAMDGPULDS(); 4690 4691 if (IDVal == PALMD::AssemblerDirectiveBegin) 4692 return ParseDirectivePALMetadataBegin(); 4693 4694 if (IDVal == PALMD::AssemblerDirective) 4695 return ParseDirectivePALMetadata(); 4696 4697 return true; 4698 } 4699 4700 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4701 unsigned RegNo) const { 4702 4703 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4704 R.isValid(); ++R) { 4705 if (*R == RegNo) 4706 return isGFX9Plus(); 4707 } 4708 4709 // GFX10 has 2 more SGPRs 104 and 105. 4710 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4711 R.isValid(); ++R) { 4712 if (*R == RegNo) 4713 return hasSGPR104_SGPR105(); 4714 } 4715 4716 switch (RegNo) { 4717 case AMDGPU::SRC_SHARED_BASE: 4718 case AMDGPU::SRC_SHARED_LIMIT: 4719 case AMDGPU::SRC_PRIVATE_BASE: 4720 case AMDGPU::SRC_PRIVATE_LIMIT: 4721 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4722 return !isCI() && !isSI() && !isVI(); 4723 case AMDGPU::TBA: 4724 case AMDGPU::TBA_LO: 4725 case AMDGPU::TBA_HI: 4726 case AMDGPU::TMA: 4727 case AMDGPU::TMA_LO: 4728 case AMDGPU::TMA_HI: 4729 return !isGFX9() && !isGFX10(); 4730 case AMDGPU::XNACK_MASK: 4731 case AMDGPU::XNACK_MASK_LO: 4732 case AMDGPU::XNACK_MASK_HI: 4733 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4734 case AMDGPU::SGPR_NULL: 4735 return isGFX10(); 4736 default: 4737 break; 4738 } 4739 4740 if (isCI()) 4741 return true; 4742 4743 if (isSI() || isGFX10()) { 4744 // No flat_scr on SI. 4745 // On GFX10 flat scratch is not a valid register operand and can only be 4746 // accessed with s_setreg/s_getreg. 4747 switch (RegNo) { 4748 case AMDGPU::FLAT_SCR: 4749 case AMDGPU::FLAT_SCR_LO: 4750 case AMDGPU::FLAT_SCR_HI: 4751 return false; 4752 default: 4753 return true; 4754 } 4755 } 4756 4757 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4758 // SI/CI have. 4759 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4760 R.isValid(); ++R) { 4761 if (*R == RegNo) 4762 return hasSGPR102_SGPR103(); 4763 } 4764 4765 return true; 4766 } 4767 4768 OperandMatchResultTy 4769 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4770 OperandMode Mode) { 4771 // Try to parse with a custom parser 4772 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4773 4774 // If we successfully parsed the operand or if there as an error parsing, 4775 // we are done. 4776 // 4777 // If we are parsing after we reach EndOfStatement then this means we 4778 // are appending default values to the Operands list. This is only done 4779 // by custom parser, so we shouldn't continue on to the generic parsing. 4780 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4781 getLexer().is(AsmToken::EndOfStatement)) 4782 return ResTy; 4783 4784 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4785 unsigned Prefix = Operands.size(); 4786 SMLoc LBraceLoc = getTok().getLoc(); 4787 Parser.Lex(); // eat the '[' 4788 4789 for (;;) { 4790 ResTy = parseReg(Operands); 4791 if (ResTy != MatchOperand_Success) 4792 return ResTy; 4793 4794 if (getLexer().is(AsmToken::RBrac)) 4795 break; 4796 4797 if (getLexer().isNot(AsmToken::Comma)) 4798 return MatchOperand_ParseFail; 4799 Parser.Lex(); 4800 } 4801 4802 if (Operands.size() - Prefix > 1) { 4803 Operands.insert(Operands.begin() + Prefix, 4804 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4805 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4806 getTok().getLoc())); 4807 } 4808 4809 Parser.Lex(); // eat the ']' 4810 return MatchOperand_Success; 4811 } 4812 4813 return parseRegOrImm(Operands); 4814 } 4815 4816 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4817 // Clear any forced encodings from the previous instruction. 4818 setForcedEncodingSize(0); 4819 setForcedDPP(false); 4820 setForcedSDWA(false); 4821 4822 if (Name.endswith("_e64")) { 4823 setForcedEncodingSize(64); 4824 return Name.substr(0, Name.size() - 4); 4825 } else if (Name.endswith("_e32")) { 4826 setForcedEncodingSize(32); 4827 return Name.substr(0, Name.size() - 4); 4828 } else if (Name.endswith("_dpp")) { 4829 setForcedDPP(true); 4830 return Name.substr(0, Name.size() - 4); 4831 } else if (Name.endswith("_sdwa")) { 4832 setForcedSDWA(true); 4833 return Name.substr(0, Name.size() - 5); 4834 } 4835 return Name; 4836 } 4837 4838 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4839 StringRef Name, 4840 SMLoc NameLoc, OperandVector &Operands) { 4841 // Add the instruction mnemonic 4842 Name = parseMnemonicSuffix(Name); 4843 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4844 4845 bool IsMIMG = Name.startswith("image_"); 4846 4847 while (!getLexer().is(AsmToken::EndOfStatement)) { 4848 OperandMode Mode = OperandMode_Default; 4849 if (IsMIMG && isGFX10() && Operands.size() == 2) 4850 Mode = OperandMode_NSA; 4851 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4852 4853 // Eat the comma or space if there is one. 4854 if (getLexer().is(AsmToken::Comma)) 4855 Parser.Lex(); 4856 4857 if (Res != MatchOperand_Success) { 4858 checkUnsupportedInstruction(Name, NameLoc); 4859 if (!Parser.hasPendingError()) { 4860 // FIXME: use real operand location rather than the current location. 4861 StringRef Msg = 4862 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4863 "not a valid operand."; 4864 Error(getLexer().getLoc(), Msg); 4865 } 4866 while (!getLexer().is(AsmToken::EndOfStatement)) { 4867 Parser.Lex(); 4868 } 4869 return true; 4870 } 4871 } 4872 4873 return false; 4874 } 4875 4876 //===----------------------------------------------------------------------===// 4877 // Utility functions 4878 //===----------------------------------------------------------------------===// 4879 4880 OperandMatchResultTy 4881 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4882 4883 if (!trySkipId(Prefix, AsmToken::Colon)) 4884 return MatchOperand_NoMatch; 4885 4886 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4887 } 4888 4889 OperandMatchResultTy 4890 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4891 AMDGPUOperand::ImmTy ImmTy, 4892 bool (*ConvertResult)(int64_t&)) { 4893 SMLoc S = getLoc(); 4894 int64_t Value = 0; 4895 4896 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4897 if (Res != MatchOperand_Success) 4898 return Res; 4899 4900 if (ConvertResult && !ConvertResult(Value)) { 4901 Error(S, "invalid " + StringRef(Prefix) + " value."); 4902 } 4903 4904 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4905 return MatchOperand_Success; 4906 } 4907 4908 OperandMatchResultTy 4909 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4910 OperandVector &Operands, 4911 AMDGPUOperand::ImmTy ImmTy, 4912 bool (*ConvertResult)(int64_t&)) { 4913 SMLoc S = getLoc(); 4914 if (!trySkipId(Prefix, AsmToken::Colon)) 4915 return MatchOperand_NoMatch; 4916 4917 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4918 return MatchOperand_ParseFail; 4919 4920 unsigned Val = 0; 4921 const unsigned MaxSize = 4; 4922 4923 // FIXME: How to verify the number of elements matches the number of src 4924 // operands? 4925 for (int I = 0; ; ++I) { 4926 int64_t Op; 4927 SMLoc Loc = getLoc(); 4928 if (!parseExpr(Op)) 4929 return MatchOperand_ParseFail; 4930 4931 if (Op != 0 && Op != 1) { 4932 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4933 return MatchOperand_ParseFail; 4934 } 4935 4936 Val |= (Op << I); 4937 4938 if (trySkipToken(AsmToken::RBrac)) 4939 break; 4940 4941 if (I + 1 == MaxSize) { 4942 Error(getLoc(), "expected a closing square bracket"); 4943 return MatchOperand_ParseFail; 4944 } 4945 4946 if (!skipToken(AsmToken::Comma, "expected a comma")) 4947 return MatchOperand_ParseFail; 4948 } 4949 4950 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4951 return MatchOperand_Success; 4952 } 4953 4954 OperandMatchResultTy 4955 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4956 AMDGPUOperand::ImmTy ImmTy) { 4957 int64_t Bit = 0; 4958 SMLoc S = Parser.getTok().getLoc(); 4959 4960 // We are at the end of the statement, and this is a default argument, so 4961 // use a default value. 4962 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4963 switch(getLexer().getKind()) { 4964 case AsmToken::Identifier: { 4965 StringRef Tok = Parser.getTok().getString(); 4966 if (Tok == Name) { 4967 if (Tok == "r128" && !hasMIMG_R128()) 4968 Error(S, "r128 modifier is not supported on this GPU"); 4969 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4970 Error(S, "a16 modifier is not supported on this GPU"); 4971 Bit = 1; 4972 Parser.Lex(); 4973 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4974 Bit = 0; 4975 Parser.Lex(); 4976 } else { 4977 return MatchOperand_NoMatch; 4978 } 4979 break; 4980 } 4981 default: 4982 return MatchOperand_NoMatch; 4983 } 4984 } 4985 4986 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4987 return MatchOperand_ParseFail; 4988 4989 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4990 ImmTy = AMDGPUOperand::ImmTyR128A16; 4991 4992 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4993 return MatchOperand_Success; 4994 } 4995 4996 static void addOptionalImmOperand( 4997 MCInst& Inst, const OperandVector& Operands, 4998 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4999 AMDGPUOperand::ImmTy ImmT, 5000 int64_t Default = 0) { 5001 auto i = OptionalIdx.find(ImmT); 5002 if (i != OptionalIdx.end()) { 5003 unsigned Idx = i->second; 5004 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5005 } else { 5006 Inst.addOperand(MCOperand::createImm(Default)); 5007 } 5008 } 5009 5010 OperandMatchResultTy 5011 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5012 if (getLexer().isNot(AsmToken::Identifier)) { 5013 return MatchOperand_NoMatch; 5014 } 5015 StringRef Tok = Parser.getTok().getString(); 5016 if (Tok != Prefix) { 5017 return MatchOperand_NoMatch; 5018 } 5019 5020 Parser.Lex(); 5021 if (getLexer().isNot(AsmToken::Colon)) { 5022 return MatchOperand_ParseFail; 5023 } 5024 5025 Parser.Lex(); 5026 if (getLexer().isNot(AsmToken::Identifier)) { 5027 return MatchOperand_ParseFail; 5028 } 5029 5030 Value = Parser.getTok().getString(); 5031 return MatchOperand_Success; 5032 } 5033 5034 //===----------------------------------------------------------------------===// 5035 // MTBUF format 5036 //===----------------------------------------------------------------------===// 5037 5038 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5039 int64_t MaxVal, 5040 int64_t &Fmt) { 5041 int64_t Val; 5042 SMLoc Loc = getLoc(); 5043 5044 auto Res = parseIntWithPrefix(Pref, Val); 5045 if (Res == MatchOperand_ParseFail) 5046 return false; 5047 if (Res == MatchOperand_NoMatch) 5048 return true; 5049 5050 if (Val < 0 || Val > MaxVal) { 5051 Error(Loc, Twine("out of range ", StringRef(Pref))); 5052 return false; 5053 } 5054 5055 Fmt = Val; 5056 return true; 5057 } 5058 5059 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5060 // values to live in a joint format operand in the MCInst encoding. 5061 OperandMatchResultTy 5062 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5063 using namespace llvm::AMDGPU::MTBUFFormat; 5064 5065 int64_t Dfmt = DFMT_UNDEF; 5066 int64_t Nfmt = NFMT_UNDEF; 5067 5068 // dfmt and nfmt can appear in either order, and each is optional. 5069 for (int I = 0; I < 2; ++I) { 5070 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5071 return MatchOperand_ParseFail; 5072 5073 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5074 return MatchOperand_ParseFail; 5075 } 5076 // Skip optional comma between dfmt/nfmt 5077 // but guard against 2 commas following each other. 5078 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5079 !peekToken().is(AsmToken::Comma)) { 5080 trySkipToken(AsmToken::Comma); 5081 } 5082 } 5083 5084 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5085 return MatchOperand_NoMatch; 5086 5087 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5088 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5089 5090 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5091 return MatchOperand_Success; 5092 } 5093 5094 OperandMatchResultTy 5095 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5096 using namespace llvm::AMDGPU::MTBUFFormat; 5097 5098 int64_t Fmt = UFMT_UNDEF; 5099 5100 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5101 return MatchOperand_ParseFail; 5102 5103 if (Fmt == UFMT_UNDEF) 5104 return MatchOperand_NoMatch; 5105 5106 Format = Fmt; 5107 return MatchOperand_Success; 5108 } 5109 5110 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5111 int64_t &Nfmt, 5112 StringRef FormatStr, 5113 SMLoc Loc) { 5114 using namespace llvm::AMDGPU::MTBUFFormat; 5115 int64_t Format; 5116 5117 Format = getDfmt(FormatStr); 5118 if (Format != DFMT_UNDEF) { 5119 Dfmt = Format; 5120 return true; 5121 } 5122 5123 Format = getNfmt(FormatStr, getSTI()); 5124 if (Format != NFMT_UNDEF) { 5125 Nfmt = Format; 5126 return true; 5127 } 5128 5129 Error(Loc, "unsupported format"); 5130 return false; 5131 } 5132 5133 OperandMatchResultTy 5134 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5135 SMLoc FormatLoc, 5136 int64_t &Format) { 5137 using namespace llvm::AMDGPU::MTBUFFormat; 5138 5139 int64_t Dfmt = DFMT_UNDEF; 5140 int64_t Nfmt = NFMT_UNDEF; 5141 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5142 return MatchOperand_ParseFail; 5143 5144 if (trySkipToken(AsmToken::Comma)) { 5145 StringRef Str; 5146 SMLoc Loc = getLoc(); 5147 if (!parseId(Str, "expected a format string") || 5148 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5149 return MatchOperand_ParseFail; 5150 } 5151 if (Dfmt == DFMT_UNDEF) { 5152 Error(Loc, "duplicate numeric format"); 5153 return MatchOperand_ParseFail; 5154 } else if (Nfmt == NFMT_UNDEF) { 5155 Error(Loc, "duplicate data format"); 5156 return MatchOperand_ParseFail; 5157 } 5158 } 5159 5160 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5161 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5162 5163 if (isGFX10()) { 5164 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5165 if (Ufmt == UFMT_UNDEF) { 5166 Error(FormatLoc, "unsupported format"); 5167 return MatchOperand_ParseFail; 5168 } 5169 Format = Ufmt; 5170 } else { 5171 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5172 } 5173 5174 return MatchOperand_Success; 5175 } 5176 5177 OperandMatchResultTy 5178 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5179 SMLoc Loc, 5180 int64_t &Format) { 5181 using namespace llvm::AMDGPU::MTBUFFormat; 5182 5183 auto Id = getUnifiedFormat(FormatStr); 5184 if (Id == UFMT_UNDEF) 5185 return MatchOperand_NoMatch; 5186 5187 if (!isGFX10()) { 5188 Error(Loc, "unified format is not supported on this GPU"); 5189 return MatchOperand_ParseFail; 5190 } 5191 5192 Format = Id; 5193 return MatchOperand_Success; 5194 } 5195 5196 OperandMatchResultTy 5197 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5198 using namespace llvm::AMDGPU::MTBUFFormat; 5199 SMLoc Loc = getLoc(); 5200 5201 if (!parseExpr(Format)) 5202 return MatchOperand_ParseFail; 5203 if (!isValidFormatEncoding(Format, getSTI())) { 5204 Error(Loc, "out of range format"); 5205 return MatchOperand_ParseFail; 5206 } 5207 5208 return MatchOperand_Success; 5209 } 5210 5211 OperandMatchResultTy 5212 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5213 using namespace llvm::AMDGPU::MTBUFFormat; 5214 5215 if (!trySkipId("format", AsmToken::Colon)) 5216 return MatchOperand_NoMatch; 5217 5218 if (trySkipToken(AsmToken::LBrac)) { 5219 StringRef FormatStr; 5220 SMLoc Loc = getLoc(); 5221 if (!parseId(FormatStr, "expected a format string")) 5222 return MatchOperand_ParseFail; 5223 5224 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5225 if (Res == MatchOperand_NoMatch) 5226 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5227 if (Res != MatchOperand_Success) 5228 return Res; 5229 5230 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5231 return MatchOperand_ParseFail; 5232 5233 return MatchOperand_Success; 5234 } 5235 5236 return parseNumericFormat(Format); 5237 } 5238 5239 OperandMatchResultTy 5240 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5241 using namespace llvm::AMDGPU::MTBUFFormat; 5242 5243 int64_t Format = getDefaultFormatEncoding(getSTI()); 5244 OperandMatchResultTy Res; 5245 SMLoc Loc = getLoc(); 5246 5247 // Parse legacy format syntax. 5248 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5249 if (Res == MatchOperand_ParseFail) 5250 return Res; 5251 5252 bool FormatFound = (Res == MatchOperand_Success); 5253 5254 Operands.push_back( 5255 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5256 5257 if (FormatFound) 5258 trySkipToken(AsmToken::Comma); 5259 5260 if (isToken(AsmToken::EndOfStatement)) { 5261 // We are expecting an soffset operand, 5262 // but let matcher handle the error. 5263 return MatchOperand_Success; 5264 } 5265 5266 // Parse soffset. 5267 Res = parseRegOrImm(Operands); 5268 if (Res != MatchOperand_Success) 5269 return Res; 5270 5271 trySkipToken(AsmToken::Comma); 5272 5273 if (!FormatFound) { 5274 Res = parseSymbolicOrNumericFormat(Format); 5275 if (Res == MatchOperand_ParseFail) 5276 return Res; 5277 if (Res == MatchOperand_Success) { 5278 auto Size = Operands.size(); 5279 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5280 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5281 Op.setImm(Format); 5282 } 5283 return MatchOperand_Success; 5284 } 5285 5286 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5287 Error(getLoc(), "duplicate format"); 5288 return MatchOperand_ParseFail; 5289 } 5290 return MatchOperand_Success; 5291 } 5292 5293 //===----------------------------------------------------------------------===// 5294 // ds 5295 //===----------------------------------------------------------------------===// 5296 5297 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5298 const OperandVector &Operands) { 5299 OptionalImmIndexMap OptionalIdx; 5300 5301 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5303 5304 // Add the register arguments 5305 if (Op.isReg()) { 5306 Op.addRegOperands(Inst, 1); 5307 continue; 5308 } 5309 5310 // Handle optional arguments 5311 OptionalIdx[Op.getImmTy()] = i; 5312 } 5313 5314 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5317 5318 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5319 } 5320 5321 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5322 bool IsGdsHardcoded) { 5323 OptionalImmIndexMap OptionalIdx; 5324 5325 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5326 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5327 5328 // Add the register arguments 5329 if (Op.isReg()) { 5330 Op.addRegOperands(Inst, 1); 5331 continue; 5332 } 5333 5334 if (Op.isToken() && Op.getToken() == "gds") { 5335 IsGdsHardcoded = true; 5336 continue; 5337 } 5338 5339 // Handle optional arguments 5340 OptionalIdx[Op.getImmTy()] = i; 5341 } 5342 5343 AMDGPUOperand::ImmTy OffsetType = 5344 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5345 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5346 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5347 AMDGPUOperand::ImmTyOffset; 5348 5349 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5350 5351 if (!IsGdsHardcoded) { 5352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5353 } 5354 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5355 } 5356 5357 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5358 OptionalImmIndexMap OptionalIdx; 5359 5360 unsigned OperandIdx[4]; 5361 unsigned EnMask = 0; 5362 int SrcIdx = 0; 5363 5364 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5365 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5366 5367 // Add the register arguments 5368 if (Op.isReg()) { 5369 assert(SrcIdx < 4); 5370 OperandIdx[SrcIdx] = Inst.size(); 5371 Op.addRegOperands(Inst, 1); 5372 ++SrcIdx; 5373 continue; 5374 } 5375 5376 if (Op.isOff()) { 5377 assert(SrcIdx < 4); 5378 OperandIdx[SrcIdx] = Inst.size(); 5379 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5380 ++SrcIdx; 5381 continue; 5382 } 5383 5384 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5385 Op.addImmOperands(Inst, 1); 5386 continue; 5387 } 5388 5389 if (Op.isToken() && Op.getToken() == "done") 5390 continue; 5391 5392 // Handle optional arguments 5393 OptionalIdx[Op.getImmTy()] = i; 5394 } 5395 5396 assert(SrcIdx == 4); 5397 5398 bool Compr = false; 5399 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5400 Compr = true; 5401 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5402 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5403 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5404 } 5405 5406 for (auto i = 0; i < SrcIdx; ++i) { 5407 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5408 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5409 } 5410 } 5411 5412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5413 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5414 5415 Inst.addOperand(MCOperand::createImm(EnMask)); 5416 } 5417 5418 //===----------------------------------------------------------------------===// 5419 // s_waitcnt 5420 //===----------------------------------------------------------------------===// 5421 5422 static bool 5423 encodeCnt( 5424 const AMDGPU::IsaVersion ISA, 5425 int64_t &IntVal, 5426 int64_t CntVal, 5427 bool Saturate, 5428 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5429 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5430 { 5431 bool Failed = false; 5432 5433 IntVal = encode(ISA, IntVal, CntVal); 5434 if (CntVal != decode(ISA, IntVal)) { 5435 if (Saturate) { 5436 IntVal = encode(ISA, IntVal, -1); 5437 } else { 5438 Failed = true; 5439 } 5440 } 5441 return Failed; 5442 } 5443 5444 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5445 5446 SMLoc CntLoc = getLoc(); 5447 StringRef CntName = getTokenStr(); 5448 5449 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5450 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5451 return false; 5452 5453 int64_t CntVal; 5454 SMLoc ValLoc = getLoc(); 5455 if (!parseExpr(CntVal)) 5456 return false; 5457 5458 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5459 5460 bool Failed = true; 5461 bool Sat = CntName.endswith("_sat"); 5462 5463 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5464 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5465 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5466 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5467 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5468 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5469 } else { 5470 Error(CntLoc, "invalid counter name " + CntName); 5471 return false; 5472 } 5473 5474 if (Failed) { 5475 Error(ValLoc, "too large value for " + CntName); 5476 return false; 5477 } 5478 5479 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5480 return false; 5481 5482 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5483 if (isToken(AsmToken::EndOfStatement)) { 5484 Error(getLoc(), "expected a counter name"); 5485 return false; 5486 } 5487 } 5488 5489 return true; 5490 } 5491 5492 OperandMatchResultTy 5493 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5494 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5495 int64_t Waitcnt = getWaitcntBitMask(ISA); 5496 SMLoc S = getLoc(); 5497 5498 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5499 while (!isToken(AsmToken::EndOfStatement)) { 5500 if (!parseCnt(Waitcnt)) 5501 return MatchOperand_ParseFail; 5502 } 5503 } else { 5504 if (!parseExpr(Waitcnt)) 5505 return MatchOperand_ParseFail; 5506 } 5507 5508 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5509 return MatchOperand_Success; 5510 } 5511 5512 bool 5513 AMDGPUOperand::isSWaitCnt() const { 5514 return isImm(); 5515 } 5516 5517 //===----------------------------------------------------------------------===// 5518 // hwreg 5519 //===----------------------------------------------------------------------===// 5520 5521 bool 5522 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5523 int64_t &Offset, 5524 int64_t &Width) { 5525 using namespace llvm::AMDGPU::Hwreg; 5526 5527 // The register may be specified by name or using a numeric code 5528 if (isToken(AsmToken::Identifier) && 5529 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5530 HwReg.IsSymbolic = true; 5531 lex(); // skip message name 5532 } else if (!parseExpr(HwReg.Id)) { 5533 return false; 5534 } 5535 5536 if (trySkipToken(AsmToken::RParen)) 5537 return true; 5538 5539 // parse optional params 5540 return 5541 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5542 parseExpr(Offset) && 5543 skipToken(AsmToken::Comma, "expected a comma") && 5544 parseExpr(Width) && 5545 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5546 } 5547 5548 bool 5549 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5550 const int64_t Offset, 5551 const int64_t Width, 5552 const SMLoc Loc) { 5553 5554 using namespace llvm::AMDGPU::Hwreg; 5555 5556 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5557 Error(Loc, "specified hardware register is not supported on this GPU"); 5558 return false; 5559 } else if (!isValidHwreg(HwReg.Id)) { 5560 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5561 return false; 5562 } else if (!isValidHwregOffset(Offset)) { 5563 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5564 return false; 5565 } else if (!isValidHwregWidth(Width)) { 5566 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5567 return false; 5568 } 5569 return true; 5570 } 5571 5572 OperandMatchResultTy 5573 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5574 using namespace llvm::AMDGPU::Hwreg; 5575 5576 int64_t ImmVal = 0; 5577 SMLoc Loc = getLoc(); 5578 5579 if (trySkipId("hwreg", AsmToken::LParen)) { 5580 OperandInfoTy HwReg(ID_UNKNOWN_); 5581 int64_t Offset = OFFSET_DEFAULT_; 5582 int64_t Width = WIDTH_DEFAULT_; 5583 if (parseHwregBody(HwReg, Offset, Width) && 5584 validateHwreg(HwReg, Offset, Width, Loc)) { 5585 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5586 } else { 5587 return MatchOperand_ParseFail; 5588 } 5589 } else if (parseExpr(ImmVal)) { 5590 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5591 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5592 return MatchOperand_ParseFail; 5593 } 5594 } else { 5595 return MatchOperand_ParseFail; 5596 } 5597 5598 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5599 return MatchOperand_Success; 5600 } 5601 5602 bool AMDGPUOperand::isHwreg() const { 5603 return isImmTy(ImmTyHwreg); 5604 } 5605 5606 //===----------------------------------------------------------------------===// 5607 // sendmsg 5608 //===----------------------------------------------------------------------===// 5609 5610 bool 5611 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5612 OperandInfoTy &Op, 5613 OperandInfoTy &Stream) { 5614 using namespace llvm::AMDGPU::SendMsg; 5615 5616 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5617 Msg.IsSymbolic = true; 5618 lex(); // skip message name 5619 } else if (!parseExpr(Msg.Id)) { 5620 return false; 5621 } 5622 5623 if (trySkipToken(AsmToken::Comma)) { 5624 Op.IsDefined = true; 5625 if (isToken(AsmToken::Identifier) && 5626 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5627 lex(); // skip operation name 5628 } else if (!parseExpr(Op.Id)) { 5629 return false; 5630 } 5631 5632 if (trySkipToken(AsmToken::Comma)) { 5633 Stream.IsDefined = true; 5634 if (!parseExpr(Stream.Id)) 5635 return false; 5636 } 5637 } 5638 5639 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5640 } 5641 5642 bool 5643 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5644 const OperandInfoTy &Op, 5645 const OperandInfoTy &Stream, 5646 const SMLoc S) { 5647 using namespace llvm::AMDGPU::SendMsg; 5648 5649 // Validation strictness depends on whether message is specified 5650 // in a symbolc or in a numeric form. In the latter case 5651 // only encoding possibility is checked. 5652 bool Strict = Msg.IsSymbolic; 5653 5654 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5655 Error(S, "invalid message id"); 5656 return false; 5657 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5658 Error(S, Op.IsDefined ? 5659 "message does not support operations" : 5660 "missing message operation"); 5661 return false; 5662 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5663 Error(S, "invalid operation id"); 5664 return false; 5665 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5666 Error(S, "message operation does not support streams"); 5667 return false; 5668 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5669 Error(S, "invalid message stream id"); 5670 return false; 5671 } 5672 return true; 5673 } 5674 5675 OperandMatchResultTy 5676 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5677 using namespace llvm::AMDGPU::SendMsg; 5678 5679 int64_t ImmVal = 0; 5680 SMLoc Loc = getLoc(); 5681 5682 if (trySkipId("sendmsg", AsmToken::LParen)) { 5683 OperandInfoTy Msg(ID_UNKNOWN_); 5684 OperandInfoTy Op(OP_NONE_); 5685 OperandInfoTy Stream(STREAM_ID_NONE_); 5686 if (parseSendMsgBody(Msg, Op, Stream) && 5687 validateSendMsg(Msg, Op, Stream, Loc)) { 5688 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5689 } else { 5690 return MatchOperand_ParseFail; 5691 } 5692 } else if (parseExpr(ImmVal)) { 5693 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5694 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5695 return MatchOperand_ParseFail; 5696 } 5697 } else { 5698 return MatchOperand_ParseFail; 5699 } 5700 5701 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5702 return MatchOperand_Success; 5703 } 5704 5705 bool AMDGPUOperand::isSendMsg() const { 5706 return isImmTy(ImmTySendMsg); 5707 } 5708 5709 //===----------------------------------------------------------------------===// 5710 // v_interp 5711 //===----------------------------------------------------------------------===// 5712 5713 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5714 if (getLexer().getKind() != AsmToken::Identifier) 5715 return MatchOperand_NoMatch; 5716 5717 StringRef Str = Parser.getTok().getString(); 5718 int Slot = StringSwitch<int>(Str) 5719 .Case("p10", 0) 5720 .Case("p20", 1) 5721 .Case("p0", 2) 5722 .Default(-1); 5723 5724 SMLoc S = Parser.getTok().getLoc(); 5725 if (Slot == -1) 5726 return MatchOperand_ParseFail; 5727 5728 Parser.Lex(); 5729 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5730 AMDGPUOperand::ImmTyInterpSlot)); 5731 return MatchOperand_Success; 5732 } 5733 5734 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5735 if (getLexer().getKind() != AsmToken::Identifier) 5736 return MatchOperand_NoMatch; 5737 5738 StringRef Str = Parser.getTok().getString(); 5739 if (!Str.startswith("attr")) 5740 return MatchOperand_NoMatch; 5741 5742 StringRef Chan = Str.take_back(2); 5743 int AttrChan = StringSwitch<int>(Chan) 5744 .Case(".x", 0) 5745 .Case(".y", 1) 5746 .Case(".z", 2) 5747 .Case(".w", 3) 5748 .Default(-1); 5749 if (AttrChan == -1) 5750 return MatchOperand_ParseFail; 5751 5752 Str = Str.drop_back(2).drop_front(4); 5753 5754 uint8_t Attr; 5755 if (Str.getAsInteger(10, Attr)) 5756 return MatchOperand_ParseFail; 5757 5758 SMLoc S = Parser.getTok().getLoc(); 5759 Parser.Lex(); 5760 if (Attr > 63) { 5761 Error(S, "out of bounds attr"); 5762 return MatchOperand_ParseFail; 5763 } 5764 5765 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5766 5767 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5768 AMDGPUOperand::ImmTyInterpAttr)); 5769 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5770 AMDGPUOperand::ImmTyAttrChan)); 5771 return MatchOperand_Success; 5772 } 5773 5774 //===----------------------------------------------------------------------===// 5775 // exp 5776 //===----------------------------------------------------------------------===// 5777 5778 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5779 uint8_t &Val) { 5780 if (Str == "null") { 5781 Val = 9; 5782 return MatchOperand_Success; 5783 } 5784 5785 if (Str.startswith("mrt")) { 5786 Str = Str.drop_front(3); 5787 if (Str == "z") { // == mrtz 5788 Val = 8; 5789 return MatchOperand_Success; 5790 } 5791 5792 if (Str.getAsInteger(10, Val)) 5793 return MatchOperand_ParseFail; 5794 5795 if (Val > 7) { 5796 Error(getLoc(), "invalid exp target"); 5797 return MatchOperand_ParseFail; 5798 } 5799 5800 return MatchOperand_Success; 5801 } 5802 5803 if (Str.startswith("pos")) { 5804 Str = Str.drop_front(3); 5805 if (Str.getAsInteger(10, Val)) 5806 return MatchOperand_ParseFail; 5807 5808 if (Val > 4 || (Val == 4 && !isGFX10())) { 5809 Error(getLoc(), "invalid exp target"); 5810 return MatchOperand_ParseFail; 5811 } 5812 5813 Val += 12; 5814 return MatchOperand_Success; 5815 } 5816 5817 if (isGFX10() && Str == "prim") { 5818 Val = 20; 5819 return MatchOperand_Success; 5820 } 5821 5822 if (Str.startswith("param")) { 5823 Str = Str.drop_front(5); 5824 if (Str.getAsInteger(10, Val)) 5825 return MatchOperand_ParseFail; 5826 5827 if (Val >= 32) { 5828 Error(getLoc(), "invalid exp target"); 5829 return MatchOperand_ParseFail; 5830 } 5831 5832 Val += 32; 5833 return MatchOperand_Success; 5834 } 5835 5836 if (Str.startswith("invalid_target_")) { 5837 Str = Str.drop_front(15); 5838 if (Str.getAsInteger(10, Val)) 5839 return MatchOperand_ParseFail; 5840 5841 Error(getLoc(), "invalid exp target"); 5842 return MatchOperand_ParseFail; 5843 } 5844 5845 return MatchOperand_NoMatch; 5846 } 5847 5848 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5849 uint8_t Val; 5850 StringRef Str = Parser.getTok().getString(); 5851 5852 auto Res = parseExpTgtImpl(Str, Val); 5853 if (Res != MatchOperand_Success) 5854 return Res; 5855 5856 SMLoc S = Parser.getTok().getLoc(); 5857 Parser.Lex(); 5858 5859 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5860 AMDGPUOperand::ImmTyExpTgt)); 5861 return MatchOperand_Success; 5862 } 5863 5864 //===----------------------------------------------------------------------===// 5865 // parser helpers 5866 //===----------------------------------------------------------------------===// 5867 5868 bool 5869 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5870 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5871 } 5872 5873 bool 5874 AMDGPUAsmParser::isId(const StringRef Id) const { 5875 return isId(getToken(), Id); 5876 } 5877 5878 bool 5879 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5880 return getTokenKind() == Kind; 5881 } 5882 5883 bool 5884 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5885 if (isId(Id)) { 5886 lex(); 5887 return true; 5888 } 5889 return false; 5890 } 5891 5892 bool 5893 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5894 if (isId(Id) && peekToken().is(Kind)) { 5895 lex(); 5896 lex(); 5897 return true; 5898 } 5899 return false; 5900 } 5901 5902 bool 5903 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5904 if (isToken(Kind)) { 5905 lex(); 5906 return true; 5907 } 5908 return false; 5909 } 5910 5911 bool 5912 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5913 const StringRef ErrMsg) { 5914 if (!trySkipToken(Kind)) { 5915 Error(getLoc(), ErrMsg); 5916 return false; 5917 } 5918 return true; 5919 } 5920 5921 bool 5922 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5923 return !getParser().parseAbsoluteExpression(Imm); 5924 } 5925 5926 bool 5927 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5928 SMLoc S = getLoc(); 5929 5930 const MCExpr *Expr; 5931 if (Parser.parseExpression(Expr)) 5932 return false; 5933 5934 int64_t IntVal; 5935 if (Expr->evaluateAsAbsolute(IntVal)) { 5936 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5937 } else { 5938 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5939 } 5940 return true; 5941 } 5942 5943 bool 5944 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5945 if (isToken(AsmToken::String)) { 5946 Val = getToken().getStringContents(); 5947 lex(); 5948 return true; 5949 } else { 5950 Error(getLoc(), ErrMsg); 5951 return false; 5952 } 5953 } 5954 5955 bool 5956 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5957 if (isToken(AsmToken::Identifier)) { 5958 Val = getTokenStr(); 5959 lex(); 5960 return true; 5961 } else { 5962 Error(getLoc(), ErrMsg); 5963 return false; 5964 } 5965 } 5966 5967 AsmToken 5968 AMDGPUAsmParser::getToken() const { 5969 return Parser.getTok(); 5970 } 5971 5972 AsmToken 5973 AMDGPUAsmParser::peekToken() { 5974 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 5975 } 5976 5977 void 5978 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5979 auto TokCount = getLexer().peekTokens(Tokens); 5980 5981 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5982 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5983 } 5984 5985 AsmToken::TokenKind 5986 AMDGPUAsmParser::getTokenKind() const { 5987 return getLexer().getKind(); 5988 } 5989 5990 SMLoc 5991 AMDGPUAsmParser::getLoc() const { 5992 return getToken().getLoc(); 5993 } 5994 5995 StringRef 5996 AMDGPUAsmParser::getTokenStr() const { 5997 return getToken().getString(); 5998 } 5999 6000 void 6001 AMDGPUAsmParser::lex() { 6002 Parser.Lex(); 6003 } 6004 6005 //===----------------------------------------------------------------------===// 6006 // swizzle 6007 //===----------------------------------------------------------------------===// 6008 6009 LLVM_READNONE 6010 static unsigned 6011 encodeBitmaskPerm(const unsigned AndMask, 6012 const unsigned OrMask, 6013 const unsigned XorMask) { 6014 using namespace llvm::AMDGPU::Swizzle; 6015 6016 return BITMASK_PERM_ENC | 6017 (AndMask << BITMASK_AND_SHIFT) | 6018 (OrMask << BITMASK_OR_SHIFT) | 6019 (XorMask << BITMASK_XOR_SHIFT); 6020 } 6021 6022 bool 6023 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6024 const unsigned MinVal, 6025 const unsigned MaxVal, 6026 const StringRef ErrMsg) { 6027 for (unsigned i = 0; i < OpNum; ++i) { 6028 if (!skipToken(AsmToken::Comma, "expected a comma")){ 6029 return false; 6030 } 6031 SMLoc ExprLoc = Parser.getTok().getLoc(); 6032 if (!parseExpr(Op[i])) { 6033 return false; 6034 } 6035 if (Op[i] < MinVal || Op[i] > MaxVal) { 6036 Error(ExprLoc, ErrMsg); 6037 return false; 6038 } 6039 } 6040 6041 return true; 6042 } 6043 6044 bool 6045 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6046 using namespace llvm::AMDGPU::Swizzle; 6047 6048 int64_t Lane[LANE_NUM]; 6049 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6050 "expected a 2-bit lane id")) { 6051 Imm = QUAD_PERM_ENC; 6052 for (unsigned I = 0; I < LANE_NUM; ++I) { 6053 Imm |= Lane[I] << (LANE_SHIFT * I); 6054 } 6055 return true; 6056 } 6057 return false; 6058 } 6059 6060 bool 6061 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6062 using namespace llvm::AMDGPU::Swizzle; 6063 6064 SMLoc S = Parser.getTok().getLoc(); 6065 int64_t GroupSize; 6066 int64_t LaneIdx; 6067 6068 if (!parseSwizzleOperands(1, &GroupSize, 6069 2, 32, 6070 "group size must be in the interval [2,32]")) { 6071 return false; 6072 } 6073 if (!isPowerOf2_64(GroupSize)) { 6074 Error(S, "group size must be a power of two"); 6075 return false; 6076 } 6077 if (parseSwizzleOperands(1, &LaneIdx, 6078 0, GroupSize - 1, 6079 "lane id must be in the interval [0,group size - 1]")) { 6080 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6081 return true; 6082 } 6083 return false; 6084 } 6085 6086 bool 6087 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6088 using namespace llvm::AMDGPU::Swizzle; 6089 6090 SMLoc S = Parser.getTok().getLoc(); 6091 int64_t GroupSize; 6092 6093 if (!parseSwizzleOperands(1, &GroupSize, 6094 2, 32, "group size must be in the interval [2,32]")) { 6095 return false; 6096 } 6097 if (!isPowerOf2_64(GroupSize)) { 6098 Error(S, "group size must be a power of two"); 6099 return false; 6100 } 6101 6102 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6103 return true; 6104 } 6105 6106 bool 6107 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6108 using namespace llvm::AMDGPU::Swizzle; 6109 6110 SMLoc S = Parser.getTok().getLoc(); 6111 int64_t GroupSize; 6112 6113 if (!parseSwizzleOperands(1, &GroupSize, 6114 1, 16, "group size must be in the interval [1,16]")) { 6115 return false; 6116 } 6117 if (!isPowerOf2_64(GroupSize)) { 6118 Error(S, "group size must be a power of two"); 6119 return false; 6120 } 6121 6122 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6123 return true; 6124 } 6125 6126 bool 6127 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6128 using namespace llvm::AMDGPU::Swizzle; 6129 6130 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6131 return false; 6132 } 6133 6134 StringRef Ctl; 6135 SMLoc StrLoc = Parser.getTok().getLoc(); 6136 if (!parseString(Ctl)) { 6137 return false; 6138 } 6139 if (Ctl.size() != BITMASK_WIDTH) { 6140 Error(StrLoc, "expected a 5-character mask"); 6141 return false; 6142 } 6143 6144 unsigned AndMask = 0; 6145 unsigned OrMask = 0; 6146 unsigned XorMask = 0; 6147 6148 for (size_t i = 0; i < Ctl.size(); ++i) { 6149 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6150 switch(Ctl[i]) { 6151 default: 6152 Error(StrLoc, "invalid mask"); 6153 return false; 6154 case '0': 6155 break; 6156 case '1': 6157 OrMask |= Mask; 6158 break; 6159 case 'p': 6160 AndMask |= Mask; 6161 break; 6162 case 'i': 6163 AndMask |= Mask; 6164 XorMask |= Mask; 6165 break; 6166 } 6167 } 6168 6169 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6170 return true; 6171 } 6172 6173 bool 6174 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6175 6176 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6177 6178 if (!parseExpr(Imm)) { 6179 return false; 6180 } 6181 if (!isUInt<16>(Imm)) { 6182 Error(OffsetLoc, "expected a 16-bit offset"); 6183 return false; 6184 } 6185 return true; 6186 } 6187 6188 bool 6189 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6190 using namespace llvm::AMDGPU::Swizzle; 6191 6192 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6193 6194 SMLoc ModeLoc = Parser.getTok().getLoc(); 6195 bool Ok = false; 6196 6197 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6198 Ok = parseSwizzleQuadPerm(Imm); 6199 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6200 Ok = parseSwizzleBitmaskPerm(Imm); 6201 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6202 Ok = parseSwizzleBroadcast(Imm); 6203 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6204 Ok = parseSwizzleSwap(Imm); 6205 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6206 Ok = parseSwizzleReverse(Imm); 6207 } else { 6208 Error(ModeLoc, "expected a swizzle mode"); 6209 } 6210 6211 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6212 } 6213 6214 return false; 6215 } 6216 6217 OperandMatchResultTy 6218 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6219 SMLoc S = Parser.getTok().getLoc(); 6220 int64_t Imm = 0; 6221 6222 if (trySkipId("offset")) { 6223 6224 bool Ok = false; 6225 if (skipToken(AsmToken::Colon, "expected a colon")) { 6226 if (trySkipId("swizzle")) { 6227 Ok = parseSwizzleMacro(Imm); 6228 } else { 6229 Ok = parseSwizzleOffset(Imm); 6230 } 6231 } 6232 6233 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6234 6235 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6236 } else { 6237 // Swizzle "offset" operand is optional. 6238 // If it is omitted, try parsing other optional operands. 6239 return parseOptionalOpr(Operands); 6240 } 6241 } 6242 6243 bool 6244 AMDGPUOperand::isSwizzle() const { 6245 return isImmTy(ImmTySwizzle); 6246 } 6247 6248 //===----------------------------------------------------------------------===// 6249 // VGPR Index Mode 6250 //===----------------------------------------------------------------------===// 6251 6252 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6253 6254 using namespace llvm::AMDGPU::VGPRIndexMode; 6255 6256 if (trySkipToken(AsmToken::RParen)) { 6257 return OFF; 6258 } 6259 6260 int64_t Imm = 0; 6261 6262 while (true) { 6263 unsigned Mode = 0; 6264 SMLoc S = Parser.getTok().getLoc(); 6265 6266 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6267 if (trySkipId(IdSymbolic[ModeId])) { 6268 Mode = 1 << ModeId; 6269 break; 6270 } 6271 } 6272 6273 if (Mode == 0) { 6274 Error(S, (Imm == 0)? 6275 "expected a VGPR index mode or a closing parenthesis" : 6276 "expected a VGPR index mode"); 6277 return UNDEF; 6278 } 6279 6280 if (Imm & Mode) { 6281 Error(S, "duplicate VGPR index mode"); 6282 return UNDEF; 6283 } 6284 Imm |= Mode; 6285 6286 if (trySkipToken(AsmToken::RParen)) 6287 break; 6288 if (!skipToken(AsmToken::Comma, 6289 "expected a comma or a closing parenthesis")) 6290 return UNDEF; 6291 } 6292 6293 return Imm; 6294 } 6295 6296 OperandMatchResultTy 6297 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6298 6299 using namespace llvm::AMDGPU::VGPRIndexMode; 6300 6301 int64_t Imm = 0; 6302 SMLoc S = Parser.getTok().getLoc(); 6303 6304 if (getLexer().getKind() == AsmToken::Identifier && 6305 Parser.getTok().getString() == "gpr_idx" && 6306 getLexer().peekTok().is(AsmToken::LParen)) { 6307 6308 Parser.Lex(); 6309 Parser.Lex(); 6310 6311 Imm = parseGPRIdxMacro(); 6312 if (Imm == UNDEF) 6313 return MatchOperand_ParseFail; 6314 6315 } else { 6316 if (getParser().parseAbsoluteExpression(Imm)) 6317 return MatchOperand_ParseFail; 6318 if (Imm < 0 || !isUInt<4>(Imm)) { 6319 Error(S, "invalid immediate: only 4-bit values are legal"); 6320 return MatchOperand_ParseFail; 6321 } 6322 } 6323 6324 Operands.push_back( 6325 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6326 return MatchOperand_Success; 6327 } 6328 6329 bool AMDGPUOperand::isGPRIdxMode() const { 6330 return isImmTy(ImmTyGprIdxMode); 6331 } 6332 6333 //===----------------------------------------------------------------------===// 6334 // sopp branch targets 6335 //===----------------------------------------------------------------------===// 6336 6337 OperandMatchResultTy 6338 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6339 6340 // Make sure we are not parsing something 6341 // that looks like a label or an expression but is not. 6342 // This will improve error messages. 6343 if (isRegister() || isModifier()) 6344 return MatchOperand_NoMatch; 6345 6346 if (!parseExpr(Operands)) 6347 return MatchOperand_ParseFail; 6348 6349 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6350 assert(Opr.isImm() || Opr.isExpr()); 6351 SMLoc Loc = Opr.getStartLoc(); 6352 6353 // Currently we do not support arbitrary expressions as branch targets. 6354 // Only labels and absolute expressions are accepted. 6355 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6356 Error(Loc, "expected an absolute expression or a label"); 6357 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6358 Error(Loc, "expected a 16-bit signed jump offset"); 6359 } 6360 6361 return MatchOperand_Success; 6362 } 6363 6364 //===----------------------------------------------------------------------===// 6365 // Boolean holding registers 6366 //===----------------------------------------------------------------------===// 6367 6368 OperandMatchResultTy 6369 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6370 return parseReg(Operands); 6371 } 6372 6373 //===----------------------------------------------------------------------===// 6374 // mubuf 6375 //===----------------------------------------------------------------------===// 6376 6377 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6378 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6379 } 6380 6381 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6382 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6383 } 6384 6385 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6386 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6387 } 6388 6389 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6390 const OperandVector &Operands, 6391 bool IsAtomic, 6392 bool IsAtomicReturn, 6393 bool IsLds) { 6394 bool IsLdsOpcode = IsLds; 6395 bool HasLdsModifier = false; 6396 OptionalImmIndexMap OptionalIdx; 6397 assert(IsAtomicReturn ? IsAtomic : true); 6398 unsigned FirstOperandIdx = 1; 6399 6400 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6401 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6402 6403 // Add the register arguments 6404 if (Op.isReg()) { 6405 Op.addRegOperands(Inst, 1); 6406 // Insert a tied src for atomic return dst. 6407 // This cannot be postponed as subsequent calls to 6408 // addImmOperands rely on correct number of MC operands. 6409 if (IsAtomicReturn && i == FirstOperandIdx) 6410 Op.addRegOperands(Inst, 1); 6411 continue; 6412 } 6413 6414 // Handle the case where soffset is an immediate 6415 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6416 Op.addImmOperands(Inst, 1); 6417 continue; 6418 } 6419 6420 HasLdsModifier |= Op.isLDS(); 6421 6422 // Handle tokens like 'offen' which are sometimes hard-coded into the 6423 // asm string. There are no MCInst operands for these. 6424 if (Op.isToken()) { 6425 continue; 6426 } 6427 assert(Op.isImm()); 6428 6429 // Handle optional arguments 6430 OptionalIdx[Op.getImmTy()] = i; 6431 } 6432 6433 // This is a workaround for an llvm quirk which may result in an 6434 // incorrect instruction selection. Lds and non-lds versions of 6435 // MUBUF instructions are identical except that lds versions 6436 // have mandatory 'lds' modifier. However this modifier follows 6437 // optional modifiers and llvm asm matcher regards this 'lds' 6438 // modifier as an optional one. As a result, an lds version 6439 // of opcode may be selected even if it has no 'lds' modifier. 6440 if (IsLdsOpcode && !HasLdsModifier) { 6441 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6442 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6443 Inst.setOpcode(NoLdsOpcode); 6444 IsLdsOpcode = false; 6445 } 6446 } 6447 6448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6449 if (!IsAtomic) { // glc is hard-coded. 6450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6451 } 6452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6453 6454 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6456 } 6457 6458 if (isGFX10()) 6459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6460 } 6461 6462 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6463 OptionalImmIndexMap OptionalIdx; 6464 6465 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6466 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6467 6468 // Add the register arguments 6469 if (Op.isReg()) { 6470 Op.addRegOperands(Inst, 1); 6471 continue; 6472 } 6473 6474 // Handle the case where soffset is an immediate 6475 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6476 Op.addImmOperands(Inst, 1); 6477 continue; 6478 } 6479 6480 // Handle tokens like 'offen' which are sometimes hard-coded into the 6481 // asm string. There are no MCInst operands for these. 6482 if (Op.isToken()) { 6483 continue; 6484 } 6485 assert(Op.isImm()); 6486 6487 // Handle optional arguments 6488 OptionalIdx[Op.getImmTy()] = i; 6489 } 6490 6491 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6492 AMDGPUOperand::ImmTyOffset); 6493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6495 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6497 6498 if (isGFX10()) 6499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6500 } 6501 6502 //===----------------------------------------------------------------------===// 6503 // mimg 6504 //===----------------------------------------------------------------------===// 6505 6506 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6507 bool IsAtomic) { 6508 unsigned I = 1; 6509 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6510 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6511 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6512 } 6513 6514 if (IsAtomic) { 6515 // Add src, same as dst 6516 assert(Desc.getNumDefs() == 1); 6517 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6518 } 6519 6520 OptionalImmIndexMap OptionalIdx; 6521 6522 for (unsigned E = Operands.size(); I != E; ++I) { 6523 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6524 6525 // Add the register arguments 6526 if (Op.isReg()) { 6527 Op.addRegOperands(Inst, 1); 6528 } else if (Op.isImmModifier()) { 6529 OptionalIdx[Op.getImmTy()] = I; 6530 } else if (!Op.isToken()) { 6531 llvm_unreachable("unexpected operand type"); 6532 } 6533 } 6534 6535 bool IsGFX10 = isGFX10(); 6536 6537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6538 if (IsGFX10) 6539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6540 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6541 if (IsGFX10) 6542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6543 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6544 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6546 if (IsGFX10) 6547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6550 if (!IsGFX10) 6551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6553 } 6554 6555 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6556 cvtMIMG(Inst, Operands, true); 6557 } 6558 6559 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6560 const OperandVector &Operands) { 6561 for (unsigned I = 1; I < Operands.size(); ++I) { 6562 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6563 if (Operand.isReg()) 6564 Operand.addRegOperands(Inst, 1); 6565 } 6566 6567 Inst.addOperand(MCOperand::createImm(1)); // a16 6568 } 6569 6570 //===----------------------------------------------------------------------===// 6571 // smrd 6572 //===----------------------------------------------------------------------===// 6573 6574 bool AMDGPUOperand::isSMRDOffset8() const { 6575 return isImm() && isUInt<8>(getImm()); 6576 } 6577 6578 bool AMDGPUOperand::isSMEMOffset() const { 6579 return isImm(); // Offset range is checked later by validator. 6580 } 6581 6582 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6583 // 32-bit literals are only supported on CI and we only want to use them 6584 // when the offset is > 8-bits. 6585 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6586 } 6587 6588 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6589 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6590 } 6591 6592 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6593 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6594 } 6595 6596 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6597 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6598 } 6599 6600 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6601 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6602 } 6603 6604 //===----------------------------------------------------------------------===// 6605 // vop3 6606 //===----------------------------------------------------------------------===// 6607 6608 static bool ConvertOmodMul(int64_t &Mul) { 6609 if (Mul != 1 && Mul != 2 && Mul != 4) 6610 return false; 6611 6612 Mul >>= 1; 6613 return true; 6614 } 6615 6616 static bool ConvertOmodDiv(int64_t &Div) { 6617 if (Div == 1) { 6618 Div = 0; 6619 return true; 6620 } 6621 6622 if (Div == 2) { 6623 Div = 3; 6624 return true; 6625 } 6626 6627 return false; 6628 } 6629 6630 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6631 if (BoundCtrl == 0) { 6632 BoundCtrl = 1; 6633 return true; 6634 } 6635 6636 if (BoundCtrl == -1) { 6637 BoundCtrl = 0; 6638 return true; 6639 } 6640 6641 return false; 6642 } 6643 6644 // Note: the order in this table matches the order of operands in AsmString. 6645 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6646 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6647 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6648 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6649 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6650 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6651 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6652 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6653 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6654 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6655 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6656 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6657 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6658 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6659 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6660 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6661 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6662 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6663 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6664 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6665 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6666 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6667 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6668 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6669 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6670 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6671 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6672 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6673 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6674 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6675 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6676 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6677 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6678 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6679 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6680 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6681 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6682 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6683 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6684 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6685 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6686 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6687 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6688 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6689 }; 6690 6691 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6692 6693 OperandMatchResultTy res = parseOptionalOpr(Operands); 6694 6695 // This is a hack to enable hardcoded mandatory operands which follow 6696 // optional operands. 6697 // 6698 // Current design assumes that all operands after the first optional operand 6699 // are also optional. However implementation of some instructions violates 6700 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6701 // 6702 // To alleviate this problem, we have to (implicitly) parse extra operands 6703 // to make sure autogenerated parser of custom operands never hit hardcoded 6704 // mandatory operands. 6705 6706 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6707 if (res != MatchOperand_Success || 6708 isToken(AsmToken::EndOfStatement)) 6709 break; 6710 6711 trySkipToken(AsmToken::Comma); 6712 res = parseOptionalOpr(Operands); 6713 } 6714 6715 return res; 6716 } 6717 6718 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6719 OperandMatchResultTy res; 6720 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6721 // try to parse any optional operand here 6722 if (Op.IsBit) { 6723 res = parseNamedBit(Op.Name, Operands, Op.Type); 6724 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6725 res = parseOModOperand(Operands); 6726 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6727 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6728 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6729 res = parseSDWASel(Operands, Op.Name, Op.Type); 6730 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6731 res = parseSDWADstUnused(Operands); 6732 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6733 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6734 Op.Type == AMDGPUOperand::ImmTyNegLo || 6735 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6736 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6737 Op.ConvertResult); 6738 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6739 res = parseDim(Operands); 6740 } else { 6741 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6742 } 6743 if (res != MatchOperand_NoMatch) { 6744 return res; 6745 } 6746 } 6747 return MatchOperand_NoMatch; 6748 } 6749 6750 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6751 StringRef Name = Parser.getTok().getString(); 6752 if (Name == "mul") { 6753 return parseIntWithPrefix("mul", Operands, 6754 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6755 } 6756 6757 if (Name == "div") { 6758 return parseIntWithPrefix("div", Operands, 6759 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6760 } 6761 6762 return MatchOperand_NoMatch; 6763 } 6764 6765 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6766 cvtVOP3P(Inst, Operands); 6767 6768 int Opc = Inst.getOpcode(); 6769 6770 int SrcNum; 6771 const int Ops[] = { AMDGPU::OpName::src0, 6772 AMDGPU::OpName::src1, 6773 AMDGPU::OpName::src2 }; 6774 for (SrcNum = 0; 6775 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6776 ++SrcNum); 6777 assert(SrcNum > 0); 6778 6779 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6780 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6781 6782 if ((OpSel & (1 << SrcNum)) != 0) { 6783 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6784 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6785 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6786 } 6787 } 6788 6789 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6790 // 1. This operand is input modifiers 6791 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6792 // 2. This is not last operand 6793 && Desc.NumOperands > (OpNum + 1) 6794 // 3. Next operand is register class 6795 && Desc.OpInfo[OpNum + 1].RegClass != -1 6796 // 4. Next register is not tied to any other operand 6797 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6798 } 6799 6800 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6801 { 6802 OptionalImmIndexMap OptionalIdx; 6803 unsigned Opc = Inst.getOpcode(); 6804 6805 unsigned I = 1; 6806 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6807 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6808 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6809 } 6810 6811 for (unsigned E = Operands.size(); I != E; ++I) { 6812 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6813 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6814 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6815 } else if (Op.isInterpSlot() || 6816 Op.isInterpAttr() || 6817 Op.isAttrChan()) { 6818 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6819 } else if (Op.isImmModifier()) { 6820 OptionalIdx[Op.getImmTy()] = I; 6821 } else { 6822 llvm_unreachable("unhandled operand type"); 6823 } 6824 } 6825 6826 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6828 } 6829 6830 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6832 } 6833 6834 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6836 } 6837 } 6838 6839 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6840 OptionalImmIndexMap &OptionalIdx) { 6841 unsigned Opc = Inst.getOpcode(); 6842 6843 unsigned I = 1; 6844 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6845 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6846 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6847 } 6848 6849 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6850 // This instruction has src modifiers 6851 for (unsigned E = Operands.size(); I != E; ++I) { 6852 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6853 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6854 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6855 } else if (Op.isImmModifier()) { 6856 OptionalIdx[Op.getImmTy()] = I; 6857 } else if (Op.isRegOrImm()) { 6858 Op.addRegOrImmOperands(Inst, 1); 6859 } else { 6860 llvm_unreachable("unhandled operand type"); 6861 } 6862 } 6863 } else { 6864 // No src modifiers 6865 for (unsigned E = Operands.size(); I != E; ++I) { 6866 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6867 if (Op.isMod()) { 6868 OptionalIdx[Op.getImmTy()] = I; 6869 } else { 6870 Op.addRegOrImmOperands(Inst, 1); 6871 } 6872 } 6873 } 6874 6875 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6877 } 6878 6879 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6880 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6881 } 6882 6883 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6884 // it has src2 register operand that is tied to dst operand 6885 // we don't allow modifiers for this operand in assembler so src2_modifiers 6886 // should be 0. 6887 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6888 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6889 Opc == AMDGPU::V_MAC_F32_e64_vi || 6890 Opc == AMDGPU::V_MAC_F16_e64_vi || 6891 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6892 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6893 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6894 auto it = Inst.begin(); 6895 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6896 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6897 ++it; 6898 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6899 } 6900 } 6901 6902 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6903 OptionalImmIndexMap OptionalIdx; 6904 cvtVOP3(Inst, Operands, OptionalIdx); 6905 } 6906 6907 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6908 const OperandVector &Operands) { 6909 OptionalImmIndexMap OptIdx; 6910 const int Opc = Inst.getOpcode(); 6911 const MCInstrDesc &Desc = MII.get(Opc); 6912 6913 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6914 6915 cvtVOP3(Inst, Operands, OptIdx); 6916 6917 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6918 assert(!IsPacked); 6919 Inst.addOperand(Inst.getOperand(0)); 6920 } 6921 6922 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6923 // instruction, and then figure out where to actually put the modifiers 6924 6925 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6926 6927 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6928 if (OpSelHiIdx != -1) { 6929 int DefaultVal = IsPacked ? -1 : 0; 6930 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6931 DefaultVal); 6932 } 6933 6934 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6935 if (NegLoIdx != -1) { 6936 assert(IsPacked); 6937 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6938 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6939 } 6940 6941 const int Ops[] = { AMDGPU::OpName::src0, 6942 AMDGPU::OpName::src1, 6943 AMDGPU::OpName::src2 }; 6944 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6945 AMDGPU::OpName::src1_modifiers, 6946 AMDGPU::OpName::src2_modifiers }; 6947 6948 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6949 6950 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6951 unsigned OpSelHi = 0; 6952 unsigned NegLo = 0; 6953 unsigned NegHi = 0; 6954 6955 if (OpSelHiIdx != -1) { 6956 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6957 } 6958 6959 if (NegLoIdx != -1) { 6960 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6961 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6962 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6963 } 6964 6965 for (int J = 0; J < 3; ++J) { 6966 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6967 if (OpIdx == -1) 6968 break; 6969 6970 uint32_t ModVal = 0; 6971 6972 if ((OpSel & (1 << J)) != 0) 6973 ModVal |= SISrcMods::OP_SEL_0; 6974 6975 if ((OpSelHi & (1 << J)) != 0) 6976 ModVal |= SISrcMods::OP_SEL_1; 6977 6978 if ((NegLo & (1 << J)) != 0) 6979 ModVal |= SISrcMods::NEG; 6980 6981 if ((NegHi & (1 << J)) != 0) 6982 ModVal |= SISrcMods::NEG_HI; 6983 6984 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6985 6986 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6987 } 6988 } 6989 6990 //===----------------------------------------------------------------------===// 6991 // dpp 6992 //===----------------------------------------------------------------------===// 6993 6994 bool AMDGPUOperand::isDPP8() const { 6995 return isImmTy(ImmTyDPP8); 6996 } 6997 6998 bool AMDGPUOperand::isDPPCtrl() const { 6999 using namespace AMDGPU::DPP; 7000 7001 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7002 if (result) { 7003 int64_t Imm = getImm(); 7004 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7005 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7006 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7007 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7008 (Imm == DppCtrl::WAVE_SHL1) || 7009 (Imm == DppCtrl::WAVE_ROL1) || 7010 (Imm == DppCtrl::WAVE_SHR1) || 7011 (Imm == DppCtrl::WAVE_ROR1) || 7012 (Imm == DppCtrl::ROW_MIRROR) || 7013 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7014 (Imm == DppCtrl::BCAST15) || 7015 (Imm == DppCtrl::BCAST31) || 7016 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7017 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7018 } 7019 return false; 7020 } 7021 7022 //===----------------------------------------------------------------------===// 7023 // mAI 7024 //===----------------------------------------------------------------------===// 7025 7026 bool AMDGPUOperand::isBLGP() const { 7027 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7028 } 7029 7030 bool AMDGPUOperand::isCBSZ() const { 7031 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7032 } 7033 7034 bool AMDGPUOperand::isABID() const { 7035 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7036 } 7037 7038 bool AMDGPUOperand::isS16Imm() const { 7039 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7040 } 7041 7042 bool AMDGPUOperand::isU16Imm() const { 7043 return isImm() && isUInt<16>(getImm()); 7044 } 7045 7046 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7047 if (!isGFX10()) 7048 return MatchOperand_NoMatch; 7049 7050 SMLoc S = Parser.getTok().getLoc(); 7051 7052 if (getLexer().isNot(AsmToken::Identifier)) 7053 return MatchOperand_NoMatch; 7054 if (getLexer().getTok().getString() != "dim") 7055 return MatchOperand_NoMatch; 7056 7057 Parser.Lex(); 7058 if (getLexer().isNot(AsmToken::Colon)) 7059 return MatchOperand_ParseFail; 7060 7061 Parser.Lex(); 7062 7063 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7064 // integer. 7065 std::string Token; 7066 if (getLexer().is(AsmToken::Integer)) { 7067 SMLoc Loc = getLexer().getTok().getEndLoc(); 7068 Token = std::string(getLexer().getTok().getString()); 7069 Parser.Lex(); 7070 if (getLexer().getTok().getLoc() != Loc) 7071 return MatchOperand_ParseFail; 7072 } 7073 if (getLexer().isNot(AsmToken::Identifier)) 7074 return MatchOperand_ParseFail; 7075 Token += getLexer().getTok().getString(); 7076 7077 StringRef DimId = Token; 7078 if (DimId.startswith("SQ_RSRC_IMG_")) 7079 DimId = DimId.substr(12); 7080 7081 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7082 if (!DimInfo) 7083 return MatchOperand_ParseFail; 7084 7085 Parser.Lex(); 7086 7087 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7088 AMDGPUOperand::ImmTyDim)); 7089 return MatchOperand_Success; 7090 } 7091 7092 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7093 SMLoc S = Parser.getTok().getLoc(); 7094 StringRef Prefix; 7095 7096 if (getLexer().getKind() == AsmToken::Identifier) { 7097 Prefix = Parser.getTok().getString(); 7098 } else { 7099 return MatchOperand_NoMatch; 7100 } 7101 7102 if (Prefix != "dpp8") 7103 return parseDPPCtrl(Operands); 7104 if (!isGFX10()) 7105 return MatchOperand_NoMatch; 7106 7107 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7108 7109 int64_t Sels[8]; 7110 7111 Parser.Lex(); 7112 if (getLexer().isNot(AsmToken::Colon)) 7113 return MatchOperand_ParseFail; 7114 7115 Parser.Lex(); 7116 if (getLexer().isNot(AsmToken::LBrac)) 7117 return MatchOperand_ParseFail; 7118 7119 Parser.Lex(); 7120 if (getParser().parseAbsoluteExpression(Sels[0])) 7121 return MatchOperand_ParseFail; 7122 if (0 > Sels[0] || 7 < Sels[0]) 7123 return MatchOperand_ParseFail; 7124 7125 for (size_t i = 1; i < 8; ++i) { 7126 if (getLexer().isNot(AsmToken::Comma)) 7127 return MatchOperand_ParseFail; 7128 7129 Parser.Lex(); 7130 if (getParser().parseAbsoluteExpression(Sels[i])) 7131 return MatchOperand_ParseFail; 7132 if (0 > Sels[i] || 7 < Sels[i]) 7133 return MatchOperand_ParseFail; 7134 } 7135 7136 if (getLexer().isNot(AsmToken::RBrac)) 7137 return MatchOperand_ParseFail; 7138 Parser.Lex(); 7139 7140 unsigned DPP8 = 0; 7141 for (size_t i = 0; i < 8; ++i) 7142 DPP8 |= (Sels[i] << (i * 3)); 7143 7144 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7145 return MatchOperand_Success; 7146 } 7147 7148 OperandMatchResultTy 7149 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7150 using namespace AMDGPU::DPP; 7151 7152 SMLoc S = Parser.getTok().getLoc(); 7153 StringRef Prefix; 7154 int64_t Int; 7155 7156 if (getLexer().getKind() == AsmToken::Identifier) { 7157 Prefix = Parser.getTok().getString(); 7158 } else { 7159 return MatchOperand_NoMatch; 7160 } 7161 7162 if (Prefix == "row_mirror") { 7163 Int = DppCtrl::ROW_MIRROR; 7164 Parser.Lex(); 7165 } else if (Prefix == "row_half_mirror") { 7166 Int = DppCtrl::ROW_HALF_MIRROR; 7167 Parser.Lex(); 7168 } else { 7169 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7170 if (Prefix != "quad_perm" 7171 && Prefix != "row_shl" 7172 && Prefix != "row_shr" 7173 && Prefix != "row_ror" 7174 && Prefix != "wave_shl" 7175 && Prefix != "wave_rol" 7176 && Prefix != "wave_shr" 7177 && Prefix != "wave_ror" 7178 && Prefix != "row_bcast" 7179 && Prefix != "row_share" 7180 && Prefix != "row_xmask") { 7181 return MatchOperand_NoMatch; 7182 } 7183 7184 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7185 return MatchOperand_NoMatch; 7186 7187 if (!isVI() && !isGFX9() && 7188 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7189 Prefix == "wave_rol" || Prefix == "wave_ror" || 7190 Prefix == "row_bcast")) 7191 return MatchOperand_NoMatch; 7192 7193 Parser.Lex(); 7194 if (getLexer().isNot(AsmToken::Colon)) 7195 return MatchOperand_ParseFail; 7196 7197 if (Prefix == "quad_perm") { 7198 // quad_perm:[%d,%d,%d,%d] 7199 Parser.Lex(); 7200 if (getLexer().isNot(AsmToken::LBrac)) 7201 return MatchOperand_ParseFail; 7202 Parser.Lex(); 7203 7204 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7205 return MatchOperand_ParseFail; 7206 7207 for (int i = 0; i < 3; ++i) { 7208 if (getLexer().isNot(AsmToken::Comma)) 7209 return MatchOperand_ParseFail; 7210 Parser.Lex(); 7211 7212 int64_t Temp; 7213 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7214 return MatchOperand_ParseFail; 7215 const int shift = i*2 + 2; 7216 Int += (Temp << shift); 7217 } 7218 7219 if (getLexer().isNot(AsmToken::RBrac)) 7220 return MatchOperand_ParseFail; 7221 Parser.Lex(); 7222 } else { 7223 // sel:%d 7224 Parser.Lex(); 7225 if (getParser().parseAbsoluteExpression(Int)) 7226 return MatchOperand_ParseFail; 7227 7228 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7229 Int |= DppCtrl::ROW_SHL0; 7230 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7231 Int |= DppCtrl::ROW_SHR0; 7232 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7233 Int |= DppCtrl::ROW_ROR0; 7234 } else if (Prefix == "wave_shl" && 1 == Int) { 7235 Int = DppCtrl::WAVE_SHL1; 7236 } else if (Prefix == "wave_rol" && 1 == Int) { 7237 Int = DppCtrl::WAVE_ROL1; 7238 } else if (Prefix == "wave_shr" && 1 == Int) { 7239 Int = DppCtrl::WAVE_SHR1; 7240 } else if (Prefix == "wave_ror" && 1 == Int) { 7241 Int = DppCtrl::WAVE_ROR1; 7242 } else if (Prefix == "row_bcast") { 7243 if (Int == 15) { 7244 Int = DppCtrl::BCAST15; 7245 } else if (Int == 31) { 7246 Int = DppCtrl::BCAST31; 7247 } else { 7248 return MatchOperand_ParseFail; 7249 } 7250 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7251 Int |= DppCtrl::ROW_SHARE_FIRST; 7252 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7253 Int |= DppCtrl::ROW_XMASK_FIRST; 7254 } else { 7255 return MatchOperand_ParseFail; 7256 } 7257 } 7258 } 7259 7260 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7261 return MatchOperand_Success; 7262 } 7263 7264 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7265 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7266 } 7267 7268 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7269 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7270 } 7271 7272 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7273 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7274 } 7275 7276 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7277 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7278 } 7279 7280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7281 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7282 } 7283 7284 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7285 OptionalImmIndexMap OptionalIdx; 7286 7287 unsigned I = 1; 7288 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7289 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7290 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7291 } 7292 7293 int Fi = 0; 7294 for (unsigned E = Operands.size(); I != E; ++I) { 7295 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7296 MCOI::TIED_TO); 7297 if (TiedTo != -1) { 7298 assert((unsigned)TiedTo < Inst.getNumOperands()); 7299 // handle tied old or src2 for MAC instructions 7300 Inst.addOperand(Inst.getOperand(TiedTo)); 7301 } 7302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7303 // Add the register arguments 7304 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7305 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7306 // Skip it. 7307 continue; 7308 } 7309 7310 if (IsDPP8) { 7311 if (Op.isDPP8()) { 7312 Op.addImmOperands(Inst, 1); 7313 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7314 Op.addRegWithFPInputModsOperands(Inst, 2); 7315 } else if (Op.isFI()) { 7316 Fi = Op.getImm(); 7317 } else if (Op.isReg()) { 7318 Op.addRegOperands(Inst, 1); 7319 } else { 7320 llvm_unreachable("Invalid operand type"); 7321 } 7322 } else { 7323 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7324 Op.addRegWithFPInputModsOperands(Inst, 2); 7325 } else if (Op.isDPPCtrl()) { 7326 Op.addImmOperands(Inst, 1); 7327 } else if (Op.isImm()) { 7328 // Handle optional arguments 7329 OptionalIdx[Op.getImmTy()] = I; 7330 } else { 7331 llvm_unreachable("Invalid operand type"); 7332 } 7333 } 7334 } 7335 7336 if (IsDPP8) { 7337 using namespace llvm::AMDGPU::DPP; 7338 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7339 } else { 7340 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7341 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7343 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7344 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7345 } 7346 } 7347 } 7348 7349 //===----------------------------------------------------------------------===// 7350 // sdwa 7351 //===----------------------------------------------------------------------===// 7352 7353 OperandMatchResultTy 7354 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7355 AMDGPUOperand::ImmTy Type) { 7356 using namespace llvm::AMDGPU::SDWA; 7357 7358 SMLoc S = Parser.getTok().getLoc(); 7359 StringRef Value; 7360 OperandMatchResultTy res; 7361 7362 res = parseStringWithPrefix(Prefix, Value); 7363 if (res != MatchOperand_Success) { 7364 return res; 7365 } 7366 7367 int64_t Int; 7368 Int = StringSwitch<int64_t>(Value) 7369 .Case("BYTE_0", SdwaSel::BYTE_0) 7370 .Case("BYTE_1", SdwaSel::BYTE_1) 7371 .Case("BYTE_2", SdwaSel::BYTE_2) 7372 .Case("BYTE_3", SdwaSel::BYTE_3) 7373 .Case("WORD_0", SdwaSel::WORD_0) 7374 .Case("WORD_1", SdwaSel::WORD_1) 7375 .Case("DWORD", SdwaSel::DWORD) 7376 .Default(0xffffffff); 7377 Parser.Lex(); // eat last token 7378 7379 if (Int == 0xffffffff) { 7380 return MatchOperand_ParseFail; 7381 } 7382 7383 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7384 return MatchOperand_Success; 7385 } 7386 7387 OperandMatchResultTy 7388 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7389 using namespace llvm::AMDGPU::SDWA; 7390 7391 SMLoc S = Parser.getTok().getLoc(); 7392 StringRef Value; 7393 OperandMatchResultTy res; 7394 7395 res = parseStringWithPrefix("dst_unused", Value); 7396 if (res != MatchOperand_Success) { 7397 return res; 7398 } 7399 7400 int64_t Int; 7401 Int = StringSwitch<int64_t>(Value) 7402 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7403 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7404 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7405 .Default(0xffffffff); 7406 Parser.Lex(); // eat last token 7407 7408 if (Int == 0xffffffff) { 7409 return MatchOperand_ParseFail; 7410 } 7411 7412 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7413 return MatchOperand_Success; 7414 } 7415 7416 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7417 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7418 } 7419 7420 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7421 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7422 } 7423 7424 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7425 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7426 } 7427 7428 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7429 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7430 } 7431 7432 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7433 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7434 } 7435 7436 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7437 uint64_t BasicInstType, 7438 bool SkipDstVcc, 7439 bool SkipSrcVcc) { 7440 using namespace llvm::AMDGPU::SDWA; 7441 7442 OptionalImmIndexMap OptionalIdx; 7443 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7444 bool SkippedVcc = false; 7445 7446 unsigned I = 1; 7447 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7448 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7449 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7450 } 7451 7452 for (unsigned E = Operands.size(); I != E; ++I) { 7453 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7454 if (SkipVcc && !SkippedVcc && Op.isReg() && 7455 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7456 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7457 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7458 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7459 // Skip VCC only if we didn't skip it on previous iteration. 7460 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7461 if (BasicInstType == SIInstrFlags::VOP2 && 7462 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7463 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7464 SkippedVcc = true; 7465 continue; 7466 } else if (BasicInstType == SIInstrFlags::VOPC && 7467 Inst.getNumOperands() == 0) { 7468 SkippedVcc = true; 7469 continue; 7470 } 7471 } 7472 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7473 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7474 } else if (Op.isImm()) { 7475 // Handle optional arguments 7476 OptionalIdx[Op.getImmTy()] = I; 7477 } else { 7478 llvm_unreachable("Invalid operand type"); 7479 } 7480 SkippedVcc = false; 7481 } 7482 7483 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7484 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7485 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7486 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7487 switch (BasicInstType) { 7488 case SIInstrFlags::VOP1: 7489 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7490 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7491 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7492 } 7493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7495 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7496 break; 7497 7498 case SIInstrFlags::VOP2: 7499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7500 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7502 } 7503 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7507 break; 7508 7509 case SIInstrFlags::VOPC: 7510 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7511 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7514 break; 7515 7516 default: 7517 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7518 } 7519 } 7520 7521 // special case v_mac_{f16, f32}: 7522 // it has src2 register operand that is tied to dst operand 7523 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7524 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7525 auto it = Inst.begin(); 7526 std::advance( 7527 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7528 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7529 } 7530 } 7531 7532 //===----------------------------------------------------------------------===// 7533 // mAI 7534 //===----------------------------------------------------------------------===// 7535 7536 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7537 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7538 } 7539 7540 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7541 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7542 } 7543 7544 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7545 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7546 } 7547 7548 /// Force static initialization. 7549 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7550 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7551 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7552 } 7553 7554 #define GET_REGISTER_MATCHER 7555 #define GET_MATCHER_IMPLEMENTATION 7556 #define GET_MNEMONIC_SPELL_CHECKER 7557 #define GET_MNEMONIC_CHECKER 7558 #include "AMDGPUGenAsmMatcher.inc" 7559 7560 // This fuction should be defined after auto-generated include so that we have 7561 // MatchClassKind enum defined 7562 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7563 unsigned Kind) { 7564 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7565 // But MatchInstructionImpl() expects to meet token and fails to validate 7566 // operand. This method checks if we are given immediate operand but expect to 7567 // get corresponding token. 7568 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7569 switch (Kind) { 7570 case MCK_addr64: 7571 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7572 case MCK_gds: 7573 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7574 case MCK_lds: 7575 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7576 case MCK_glc: 7577 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7578 case MCK_idxen: 7579 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7580 case MCK_offen: 7581 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7582 case MCK_SSrcB32: 7583 // When operands have expression values, they will return true for isToken, 7584 // because it is not possible to distinguish between a token and an 7585 // expression at parse time. MatchInstructionImpl() will always try to 7586 // match an operand as a token, when isToken returns true, and when the 7587 // name of the expression is not a valid token, the match will fail, 7588 // so we need to handle it here. 7589 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7590 case MCK_SSrcF32: 7591 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7592 case MCK_SoppBrTarget: 7593 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7594 case MCK_VReg32OrOff: 7595 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7596 case MCK_InterpSlot: 7597 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7598 case MCK_Attr: 7599 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7600 case MCK_AttrChan: 7601 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7602 case MCK_ImmSMEMOffset: 7603 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7604 case MCK_SReg_64: 7605 case MCK_SReg_64_XEXEC: 7606 // Null is defined as a 32-bit register but 7607 // it should also be enabled with 64-bit operands. 7608 // The following code enables it for SReg_64 operands 7609 // used as source and destination. Remaining source 7610 // operands are handled in isInlinableImm. 7611 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7612 default: 7613 return Match_InvalidOperand; 7614 } 7615 } 7616 7617 //===----------------------------------------------------------------------===// 7618 // endpgm 7619 //===----------------------------------------------------------------------===// 7620 7621 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7622 SMLoc S = Parser.getTok().getLoc(); 7623 int64_t Imm = 0; 7624 7625 if (!parseExpr(Imm)) { 7626 // The operand is optional, if not present default to 0 7627 Imm = 0; 7628 } 7629 7630 if (!isUInt<16>(Imm)) { 7631 Error(S, "expected a 16-bit value"); 7632 return MatchOperand_ParseFail; 7633 } 7634 7635 Operands.push_back( 7636 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7637 return MatchOperand_Success; 7638 } 7639 7640 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7641