1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcF16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 ImmTy getImmTy() const { 693 assert(isImm()); 694 return Imm.Type; 695 } 696 697 unsigned getReg() const override { 698 assert(isRegKind()); 699 return Reg.RegNo; 700 } 701 702 SMLoc getStartLoc() const override { 703 return StartLoc; 704 } 705 706 SMLoc getEndLoc() const override { 707 return EndLoc; 708 } 709 710 SMRange getLocRange() const { 711 return SMRange(StartLoc, EndLoc); 712 } 713 714 Modifiers getModifiers() const { 715 assert(isRegKind() || isImmTy(ImmTyNone)); 716 return isRegKind() ? Reg.Mods : Imm.Mods; 717 } 718 719 void setModifiers(Modifiers Mods) { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 if (isRegKind()) 722 Reg.Mods = Mods; 723 else 724 Imm.Mods = Mods; 725 } 726 727 bool hasModifiers() const { 728 return getModifiers().hasModifiers(); 729 } 730 731 bool hasFPModifiers() const { 732 return getModifiers().hasFPModifiers(); 733 } 734 735 bool hasIntModifiers() const { 736 return getModifiers().hasIntModifiers(); 737 } 738 739 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 740 741 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 742 743 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 744 745 template <unsigned Bitwidth> 746 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 747 748 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 749 addKImmFPOperands<16>(Inst, N); 750 } 751 752 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 753 addKImmFPOperands<32>(Inst, N); 754 } 755 756 void addRegOperands(MCInst &Inst, unsigned N) const; 757 758 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 759 addRegOperands(Inst, N); 760 } 761 762 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 763 if (isRegKind()) 764 addRegOperands(Inst, N); 765 else if (isExpr()) 766 Inst.addOperand(MCOperand::createExpr(Expr)); 767 else 768 addImmOperands(Inst, N); 769 } 770 771 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 772 Modifiers Mods = getModifiers(); 773 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 774 if (isRegKind()) { 775 addRegOperands(Inst, N); 776 } else { 777 addImmOperands(Inst, N, false); 778 } 779 } 780 781 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 782 assert(!hasIntModifiers()); 783 addRegOrImmWithInputModsOperands(Inst, N); 784 } 785 786 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasFPModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 792 Modifiers Mods = getModifiers(); 793 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 794 assert(isRegKind()); 795 addRegOperands(Inst, N); 796 } 797 798 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 799 assert(!hasIntModifiers()); 800 addRegWithInputModsOperands(Inst, N); 801 } 802 803 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasFPModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 809 if (isImm()) 810 addImmOperands(Inst, N); 811 else { 812 assert(isExpr()); 813 Inst.addOperand(MCOperand::createExpr(Expr)); 814 } 815 } 816 817 static void printImmTy(raw_ostream& OS, ImmTy Type) { 818 switch (Type) { 819 case ImmTyNone: OS << "None"; break; 820 case ImmTyGDS: OS << "GDS"; break; 821 case ImmTyLDS: OS << "LDS"; break; 822 case ImmTyOffen: OS << "Offen"; break; 823 case ImmTyIdxen: OS << "Idxen"; break; 824 case ImmTyAddr64: OS << "Addr64"; break; 825 case ImmTyOffset: OS << "Offset"; break; 826 case ImmTyInstOffset: OS << "InstOffset"; break; 827 case ImmTyOffset0: OS << "Offset0"; break; 828 case ImmTyOffset1: OS << "Offset1"; break; 829 case ImmTyDLC: OS << "DLC"; break; 830 case ImmTyGLC: OS << "GLC"; break; 831 case ImmTySLC: OS << "SLC"; break; 832 case ImmTySWZ: OS << "SWZ"; break; 833 case ImmTyTFE: OS << "TFE"; break; 834 case ImmTyD16: OS << "D16"; break; 835 case ImmTyFORMAT: OS << "FORMAT"; break; 836 case ImmTyClampSI: OS << "ClampSI"; break; 837 case ImmTyOModSI: OS << "OModSI"; break; 838 case ImmTyDPP8: OS << "DPP8"; break; 839 case ImmTyDppCtrl: OS << "DppCtrl"; break; 840 case ImmTyDppRowMask: OS << "DppRowMask"; break; 841 case ImmTyDppBankMask: OS << "DppBankMask"; break; 842 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 843 case ImmTyDppFi: OS << "FI"; break; 844 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 845 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 846 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 847 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 848 case ImmTyDMask: OS << "DMask"; break; 849 case ImmTyDim: OS << "Dim"; break; 850 case ImmTyUNorm: OS << "UNorm"; break; 851 case ImmTyDA: OS << "DA"; break; 852 case ImmTyR128A16: OS << "R128A16"; break; 853 case ImmTyA16: OS << "A16"; break; 854 case ImmTyLWE: OS << "LWE"; break; 855 case ImmTyOff: OS << "Off"; break; 856 case ImmTyExpTgt: OS << "ExpTgt"; break; 857 case ImmTyExpCompr: OS << "ExpCompr"; break; 858 case ImmTyExpVM: OS << "ExpVM"; break; 859 case ImmTyHwreg: OS << "Hwreg"; break; 860 case ImmTySendMsg: OS << "SendMsg"; break; 861 case ImmTyInterpSlot: OS << "InterpSlot"; break; 862 case ImmTyInterpAttr: OS << "InterpAttr"; break; 863 case ImmTyAttrChan: OS << "AttrChan"; break; 864 case ImmTyOpSel: OS << "OpSel"; break; 865 case ImmTyOpSelHi: OS << "OpSelHi"; break; 866 case ImmTyNegLo: OS << "NegLo"; break; 867 case ImmTyNegHi: OS << "NegHi"; break; 868 case ImmTySwizzle: OS << "Swizzle"; break; 869 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 870 case ImmTyHigh: OS << "High"; break; 871 case ImmTyBLGP: OS << "BLGP"; break; 872 case ImmTyCBSZ: OS << "CBSZ"; break; 873 case ImmTyABID: OS << "ABID"; break; 874 case ImmTyEndpgm: OS << "Endpgm"; break; 875 } 876 } 877 878 void print(raw_ostream &OS) const override { 879 switch (Kind) { 880 case Register: 881 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 882 break; 883 case Immediate: 884 OS << '<' << getImm(); 885 if (getImmTy() != ImmTyNone) { 886 OS << " type: "; printImmTy(OS, getImmTy()); 887 } 888 OS << " mods: " << Imm.Mods << '>'; 889 break; 890 case Token: 891 OS << '\'' << getToken() << '\''; 892 break; 893 case Expression: 894 OS << "<expr " << *Expr << '>'; 895 break; 896 } 897 } 898 899 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 900 int64_t Val, SMLoc Loc, 901 ImmTy Type = ImmTyNone, 902 bool IsFPImm = false) { 903 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 904 Op->Imm.Val = Val; 905 Op->Imm.IsFPImm = IsFPImm; 906 Op->Imm.Type = Type; 907 Op->Imm.Mods = Modifiers(); 908 Op->StartLoc = Loc; 909 Op->EndLoc = Loc; 910 return Op; 911 } 912 913 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 914 StringRef Str, SMLoc Loc, 915 bool HasExplicitEncodingSize = true) { 916 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 917 Res->Tok.Data = Str.data(); 918 Res->Tok.Length = Str.size(); 919 Res->StartLoc = Loc; 920 Res->EndLoc = Loc; 921 return Res; 922 } 923 924 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 925 unsigned RegNo, SMLoc S, 926 SMLoc E) { 927 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 928 Op->Reg.RegNo = RegNo; 929 Op->Reg.Mods = Modifiers(); 930 Op->StartLoc = S; 931 Op->EndLoc = E; 932 return Op; 933 } 934 935 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 936 const class MCExpr *Expr, SMLoc S) { 937 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 938 Op->Expr = Expr; 939 Op->StartLoc = S; 940 Op->EndLoc = S; 941 return Op; 942 } 943 }; 944 945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 946 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 947 return OS; 948 } 949 950 //===----------------------------------------------------------------------===// 951 // AsmParser 952 //===----------------------------------------------------------------------===// 953 954 // Holds info related to the current kernel, e.g. count of SGPRs used. 955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 956 // .amdgpu_hsa_kernel or at EOF. 957 class KernelScopeInfo { 958 int SgprIndexUnusedMin = -1; 959 int VgprIndexUnusedMin = -1; 960 MCContext *Ctx = nullptr; 961 962 void usesSgprAt(int i) { 963 if (i >= SgprIndexUnusedMin) { 964 SgprIndexUnusedMin = ++i; 965 if (Ctx) { 966 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 967 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 968 } 969 } 970 } 971 972 void usesVgprAt(int i) { 973 if (i >= VgprIndexUnusedMin) { 974 VgprIndexUnusedMin = ++i; 975 if (Ctx) { 976 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 977 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 978 } 979 } 980 } 981 982 public: 983 KernelScopeInfo() = default; 984 985 void initialize(MCContext &Context) { 986 Ctx = &Context; 987 usesSgprAt(SgprIndexUnusedMin = -1); 988 usesVgprAt(VgprIndexUnusedMin = -1); 989 } 990 991 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 992 switch (RegKind) { 993 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 994 case IS_AGPR: // fall through 995 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 996 default: break; 997 } 998 } 999 }; 1000 1001 class AMDGPUAsmParser : public MCTargetAsmParser { 1002 MCAsmParser &Parser; 1003 1004 // Number of extra operands parsed after the first optional operand. 1005 // This may be necessary to skip hardcoded mandatory operands. 1006 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1007 1008 unsigned ForcedEncodingSize = 0; 1009 bool ForcedDPP = false; 1010 bool ForcedSDWA = false; 1011 KernelScopeInfo KernelScope; 1012 1013 /// @name Auto-generated Match Functions 1014 /// { 1015 1016 #define GET_ASSEMBLER_HEADER 1017 #include "AMDGPUGenAsmMatcher.inc" 1018 1019 /// } 1020 1021 private: 1022 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1023 bool OutOfRangeError(SMRange Range); 1024 /// Calculate VGPR/SGPR blocks required for given target, reserved 1025 /// registers, and user-specified NextFreeXGPR values. 1026 /// 1027 /// \param Features [in] Target features, used for bug corrections. 1028 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1029 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1030 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1031 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1032 /// descriptor field, if valid. 1033 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1034 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1035 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1036 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1037 /// \param VGPRBlocks [out] Result VGPR block count. 1038 /// \param SGPRBlocks [out] Result SGPR block count. 1039 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1040 bool FlatScrUsed, bool XNACKUsed, 1041 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1042 SMRange VGPRRange, unsigned NextFreeSGPR, 1043 SMRange SGPRRange, unsigned &VGPRBlocks, 1044 unsigned &SGPRBlocks); 1045 bool ParseDirectiveAMDGCNTarget(); 1046 bool ParseDirectiveAMDHSAKernel(); 1047 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1048 bool ParseDirectiveHSACodeObjectVersion(); 1049 bool ParseDirectiveHSACodeObjectISA(); 1050 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1051 bool ParseDirectiveAMDKernelCodeT(); 1052 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1053 bool ParseDirectiveAMDGPUHsaKernel(); 1054 1055 bool ParseDirectiveISAVersion(); 1056 bool ParseDirectiveHSAMetadata(); 1057 bool ParseDirectivePALMetadataBegin(); 1058 bool ParseDirectivePALMetadata(); 1059 bool ParseDirectiveAMDGPULDS(); 1060 1061 /// Common code to parse out a block of text (typically YAML) between start and 1062 /// end directives. 1063 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1064 const char *AssemblerDirectiveEnd, 1065 std::string &CollectString); 1066 1067 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1068 RegisterKind RegKind, unsigned Reg1); 1069 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1070 unsigned &RegNum, unsigned &RegWidth, 1071 bool RestoreOnFailure = false); 1072 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1073 unsigned &RegNum, unsigned &RegWidth, 1074 SmallVectorImpl<AsmToken> &Tokens); 1075 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1076 unsigned &RegWidth, 1077 SmallVectorImpl<AsmToken> &Tokens); 1078 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1079 unsigned &RegWidth, 1080 SmallVectorImpl<AsmToken> &Tokens); 1081 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1082 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1083 bool ParseRegRange(unsigned& Num, unsigned& Width); 1084 unsigned getRegularReg(RegisterKind RegKind, 1085 unsigned RegNum, 1086 unsigned RegWidth); 1087 1088 bool isRegister(); 1089 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1090 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1091 void initializeGprCountSymbol(RegisterKind RegKind); 1092 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1093 unsigned RegWidth); 1094 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1095 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1096 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1097 bool IsGdsHardcoded); 1098 1099 public: 1100 enum AMDGPUMatchResultTy { 1101 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1102 }; 1103 enum OperandMode { 1104 OperandMode_Default, 1105 OperandMode_NSA, 1106 }; 1107 1108 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1109 1110 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1111 const MCInstrInfo &MII, 1112 const MCTargetOptions &Options) 1113 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1114 MCAsmParserExtension::Initialize(Parser); 1115 1116 if (getFeatureBits().none()) { 1117 // Set default features. 1118 copySTI().ToggleFeature("southern-islands"); 1119 } 1120 1121 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1122 1123 { 1124 // TODO: make those pre-defined variables read-only. 1125 // Currently there is none suitable machinery in the core llvm-mc for this. 1126 // MCSymbol::isRedefinable is intended for another purpose, and 1127 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1129 MCContext &Ctx = getContext(); 1130 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1131 MCSymbol *Sym = 1132 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1134 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1135 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1136 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1138 } else { 1139 MCSymbol *Sym = 1140 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1144 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1145 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1146 } 1147 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1148 initializeGprCountSymbol(IS_VGPR); 1149 initializeGprCountSymbol(IS_SGPR); 1150 } else 1151 KernelScope.initialize(getContext()); 1152 } 1153 } 1154 1155 bool hasXNACK() const { 1156 return AMDGPU::hasXNACK(getSTI()); 1157 } 1158 1159 bool hasMIMG_R128() const { 1160 return AMDGPU::hasMIMG_R128(getSTI()); 1161 } 1162 1163 bool hasPackedD16() const { 1164 return AMDGPU::hasPackedD16(getSTI()); 1165 } 1166 1167 bool hasGFX10A16() const { 1168 return AMDGPU::hasGFX10A16(getSTI()); 1169 } 1170 1171 bool isSI() const { 1172 return AMDGPU::isSI(getSTI()); 1173 } 1174 1175 bool isCI() const { 1176 return AMDGPU::isCI(getSTI()); 1177 } 1178 1179 bool isVI() const { 1180 return AMDGPU::isVI(getSTI()); 1181 } 1182 1183 bool isGFX9() const { 1184 return AMDGPU::isGFX9(getSTI()); 1185 } 1186 1187 bool isGFX10() const { 1188 return AMDGPU::isGFX10(getSTI()); 1189 } 1190 1191 bool isGFX10_BEncoding() const { 1192 return AMDGPU::isGFX10_BEncoding(getSTI()); 1193 } 1194 1195 bool hasInv2PiInlineImm() const { 1196 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1197 } 1198 1199 bool hasFlatOffsets() const { 1200 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1201 } 1202 1203 bool hasSGPR102_SGPR103() const { 1204 return !isVI() && !isGFX9(); 1205 } 1206 1207 bool hasSGPR104_SGPR105() const { 1208 return isGFX10(); 1209 } 1210 1211 bool hasIntClamp() const { 1212 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1213 } 1214 1215 AMDGPUTargetStreamer &getTargetStreamer() { 1216 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1217 return static_cast<AMDGPUTargetStreamer &>(TS); 1218 } 1219 1220 const MCRegisterInfo *getMRI() const { 1221 // We need this const_cast because for some reason getContext() is not const 1222 // in MCAsmParser. 1223 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1224 } 1225 1226 const MCInstrInfo *getMII() const { 1227 return &MII; 1228 } 1229 1230 const FeatureBitset &getFeatureBits() const { 1231 return getSTI().getFeatureBits(); 1232 } 1233 1234 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1235 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1236 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1237 1238 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1239 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1240 bool isForcedDPP() const { return ForcedDPP; } 1241 bool isForcedSDWA() const { return ForcedSDWA; } 1242 ArrayRef<unsigned> getMatchedVariants() const; 1243 1244 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1245 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1246 bool RestoreOnFailure); 1247 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1248 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1249 SMLoc &EndLoc) override; 1250 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1251 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1252 unsigned Kind) override; 1253 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1254 OperandVector &Operands, MCStreamer &Out, 1255 uint64_t &ErrorInfo, 1256 bool MatchingInlineAsm) override; 1257 bool ParseDirective(AsmToken DirectiveID) override; 1258 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1259 OperandMode Mode = OperandMode_Default); 1260 StringRef parseMnemonicSuffix(StringRef Name); 1261 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1262 SMLoc NameLoc, OperandVector &Operands) override; 1263 //bool ProcessInstruction(MCInst &Inst); 1264 1265 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1266 1267 OperandMatchResultTy 1268 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1269 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1270 bool (*ConvertResult)(int64_t &) = nullptr); 1271 1272 OperandMatchResultTy 1273 parseOperandArrayWithPrefix(const char *Prefix, 1274 OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1276 bool (*ConvertResult)(int64_t&) = nullptr); 1277 1278 OperandMatchResultTy 1279 parseNamedBit(const char *Name, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1281 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1282 StringRef &Value); 1283 1284 bool isModifier(); 1285 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1286 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1287 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1288 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1289 bool parseSP3NegModifier(); 1290 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1291 OperandMatchResultTy parseReg(OperandVector &Operands); 1292 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1293 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1294 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1295 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1296 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1297 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1298 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1299 1300 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1301 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1302 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1303 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1304 1305 bool parseCnt(int64_t &IntVal); 1306 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1307 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1308 1309 private: 1310 struct OperandInfoTy { 1311 int64_t Id; 1312 bool IsSymbolic = false; 1313 bool IsDefined = false; 1314 1315 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1316 }; 1317 1318 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1319 bool validateSendMsg(const OperandInfoTy &Msg, 1320 const OperandInfoTy &Op, 1321 const OperandInfoTy &Stream, 1322 const SMLoc Loc); 1323 1324 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1325 bool validateHwreg(const OperandInfoTy &HwReg, 1326 const int64_t Offset, 1327 const int64_t Width, 1328 const SMLoc Loc); 1329 1330 void errorExpTgt(); 1331 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1332 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1333 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1334 1335 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1336 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1337 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1338 bool validateSOPLiteral(const MCInst &Inst) const; 1339 bool validateConstantBusLimitations(const MCInst &Inst); 1340 bool validateEarlyClobberLimitations(const MCInst &Inst); 1341 bool validateIntClampSupported(const MCInst &Inst); 1342 bool validateMIMGAtomicDMask(const MCInst &Inst); 1343 bool validateMIMGGatherDMask(const MCInst &Inst); 1344 bool validateMovrels(const MCInst &Inst); 1345 bool validateMIMGDataSize(const MCInst &Inst); 1346 bool validateMIMGAddrSize(const MCInst &Inst); 1347 bool validateMIMGD16(const MCInst &Inst); 1348 bool validateMIMGDim(const MCInst &Inst); 1349 bool validateLdsDirect(const MCInst &Inst); 1350 bool validateOpSel(const MCInst &Inst); 1351 bool validateVccOperand(unsigned Reg) const; 1352 bool validateVOP3Literal(const MCInst &Inst) const; 1353 bool validateMAIAccWrite(const MCInst &Inst); 1354 unsigned getConstantBusLimit(unsigned Opcode) const; 1355 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1356 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1357 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1358 1359 bool isId(const StringRef Id) const; 1360 bool isId(const AsmToken &Token, const StringRef Id) const; 1361 bool isToken(const AsmToken::TokenKind Kind) const; 1362 bool trySkipId(const StringRef Id); 1363 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1364 bool trySkipToken(const AsmToken::TokenKind Kind); 1365 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1366 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1367 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1368 AsmToken::TokenKind getTokenKind() const; 1369 bool parseExpr(int64_t &Imm); 1370 bool parseExpr(OperandVector &Operands); 1371 StringRef getTokenStr() const; 1372 AsmToken peekToken(); 1373 AsmToken getToken() const; 1374 SMLoc getLoc() const; 1375 void lex(); 1376 1377 public: 1378 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1379 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1380 1381 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1382 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1383 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1384 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1385 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1386 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1387 1388 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1389 const unsigned MinVal, 1390 const unsigned MaxVal, 1391 const StringRef ErrMsg); 1392 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1393 bool parseSwizzleOffset(int64_t &Imm); 1394 bool parseSwizzleMacro(int64_t &Imm); 1395 bool parseSwizzleQuadPerm(int64_t &Imm); 1396 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1397 bool parseSwizzleBroadcast(int64_t &Imm); 1398 bool parseSwizzleSwap(int64_t &Imm); 1399 bool parseSwizzleReverse(int64_t &Imm); 1400 1401 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1402 int64_t parseGPRIdxMacro(); 1403 1404 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1405 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1406 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1407 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1408 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1409 1410 AMDGPUOperand::Ptr defaultDLC() const; 1411 AMDGPUOperand::Ptr defaultGLC() const; 1412 AMDGPUOperand::Ptr defaultSLC() const; 1413 1414 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1415 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1416 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1417 AMDGPUOperand::Ptr defaultFlatOffset() const; 1418 1419 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1420 1421 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1422 OptionalImmIndexMap &OptionalIdx); 1423 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1424 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1425 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1426 1427 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1428 1429 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1430 bool IsAtomic = false); 1431 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1432 1433 OperandMatchResultTy parseDim(OperandVector &Operands); 1434 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1435 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1436 AMDGPUOperand::Ptr defaultRowMask() const; 1437 AMDGPUOperand::Ptr defaultBankMask() const; 1438 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1439 AMDGPUOperand::Ptr defaultFI() const; 1440 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1441 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1442 1443 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1444 AMDGPUOperand::ImmTy Type); 1445 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1446 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1447 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1448 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1449 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1450 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1451 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1452 uint64_t BasicInstType, 1453 bool SkipDstVcc = false, 1454 bool SkipSrcVcc = false); 1455 1456 AMDGPUOperand::Ptr defaultBLGP() const; 1457 AMDGPUOperand::Ptr defaultCBSZ() const; 1458 AMDGPUOperand::Ptr defaultABID() const; 1459 1460 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1461 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1462 }; 1463 1464 struct OptionalOperand { 1465 const char *Name; 1466 AMDGPUOperand::ImmTy Type; 1467 bool IsBit; 1468 bool (*ConvertResult)(int64_t&); 1469 }; 1470 1471 } // end anonymous namespace 1472 1473 // May be called with integer type with equivalent bitwidth. 1474 static const fltSemantics *getFltSemantics(unsigned Size) { 1475 switch (Size) { 1476 case 4: 1477 return &APFloat::IEEEsingle(); 1478 case 8: 1479 return &APFloat::IEEEdouble(); 1480 case 2: 1481 return &APFloat::IEEEhalf(); 1482 default: 1483 llvm_unreachable("unsupported fp type"); 1484 } 1485 } 1486 1487 static const fltSemantics *getFltSemantics(MVT VT) { 1488 return getFltSemantics(VT.getSizeInBits() / 8); 1489 } 1490 1491 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1492 switch (OperandType) { 1493 case AMDGPU::OPERAND_REG_IMM_INT32: 1494 case AMDGPU::OPERAND_REG_IMM_FP32: 1495 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1496 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1497 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1498 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1499 return &APFloat::IEEEsingle(); 1500 case AMDGPU::OPERAND_REG_IMM_INT64: 1501 case AMDGPU::OPERAND_REG_IMM_FP64: 1502 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1503 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1504 return &APFloat::IEEEdouble(); 1505 case AMDGPU::OPERAND_REG_IMM_INT16: 1506 case AMDGPU::OPERAND_REG_IMM_FP16: 1507 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1508 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1509 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1510 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1511 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1512 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1513 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1514 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1515 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1516 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1517 return &APFloat::IEEEhalf(); 1518 default: 1519 llvm_unreachable("unsupported fp type"); 1520 } 1521 } 1522 1523 //===----------------------------------------------------------------------===// 1524 // Operand 1525 //===----------------------------------------------------------------------===// 1526 1527 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1528 bool Lost; 1529 1530 // Convert literal to single precision 1531 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1532 APFloat::rmNearestTiesToEven, 1533 &Lost); 1534 // We allow precision lost but not overflow or underflow 1535 if (Status != APFloat::opOK && 1536 Lost && 1537 ((Status & APFloat::opOverflow) != 0 || 1538 (Status & APFloat::opUnderflow) != 0)) { 1539 return false; 1540 } 1541 1542 return true; 1543 } 1544 1545 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1546 return isUIntN(Size, Val) || isIntN(Size, Val); 1547 } 1548 1549 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1550 1551 // This is a hack to enable named inline values like 1552 // shared_base with both 32-bit and 64-bit operands. 1553 // Note that these values are defined as 1554 // 32-bit operands only. 1555 if (isInlineValue()) { 1556 return true; 1557 } 1558 1559 if (!isImmTy(ImmTyNone)) { 1560 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1561 return false; 1562 } 1563 // TODO: We should avoid using host float here. It would be better to 1564 // check the float bit values which is what a few other places do. 1565 // We've had bot failures before due to weird NaN support on mips hosts. 1566 1567 APInt Literal(64, Imm.Val); 1568 1569 if (Imm.IsFPImm) { // We got fp literal token 1570 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1571 return AMDGPU::isInlinableLiteral64(Imm.Val, 1572 AsmParser->hasInv2PiInlineImm()); 1573 } 1574 1575 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1576 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1577 return false; 1578 1579 if (type.getScalarSizeInBits() == 16) { 1580 return AMDGPU::isInlinableLiteral16( 1581 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1582 AsmParser->hasInv2PiInlineImm()); 1583 } 1584 1585 // Check if single precision literal is inlinable 1586 return AMDGPU::isInlinableLiteral32( 1587 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1588 AsmParser->hasInv2PiInlineImm()); 1589 } 1590 1591 // We got int literal token. 1592 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1593 return AMDGPU::isInlinableLiteral64(Imm.Val, 1594 AsmParser->hasInv2PiInlineImm()); 1595 } 1596 1597 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1598 return false; 1599 } 1600 1601 if (type.getScalarSizeInBits() == 16) { 1602 return AMDGPU::isInlinableLiteral16( 1603 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1604 AsmParser->hasInv2PiInlineImm()); 1605 } 1606 1607 return AMDGPU::isInlinableLiteral32( 1608 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1609 AsmParser->hasInv2PiInlineImm()); 1610 } 1611 1612 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1613 // Check that this immediate can be added as literal 1614 if (!isImmTy(ImmTyNone)) { 1615 return false; 1616 } 1617 1618 if (!Imm.IsFPImm) { 1619 // We got int literal token. 1620 1621 if (type == MVT::f64 && hasFPModifiers()) { 1622 // Cannot apply fp modifiers to int literals preserving the same semantics 1623 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1624 // disable these cases. 1625 return false; 1626 } 1627 1628 unsigned Size = type.getSizeInBits(); 1629 if (Size == 64) 1630 Size = 32; 1631 1632 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1633 // types. 1634 return isSafeTruncation(Imm.Val, Size); 1635 } 1636 1637 // We got fp literal token 1638 if (type == MVT::f64) { // Expected 64-bit fp operand 1639 // We would set low 64-bits of literal to zeroes but we accept this literals 1640 return true; 1641 } 1642 1643 if (type == MVT::i64) { // Expected 64-bit int operand 1644 // We don't allow fp literals in 64-bit integer instructions. It is 1645 // unclear how we should encode them. 1646 return false; 1647 } 1648 1649 // We allow fp literals with f16x2 operands assuming that the specified 1650 // literal goes into the lower half and the upper half is zero. We also 1651 // require that the literal may be losslesly converted to f16. 1652 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1653 (type == MVT::v2i16)? MVT::i16 : type; 1654 1655 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1656 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1657 } 1658 1659 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1660 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1661 } 1662 1663 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1664 if (AsmParser->isVI()) 1665 return isVReg32(); 1666 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1667 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1668 else 1669 return false; 1670 } 1671 1672 bool AMDGPUOperand::isSDWAFP16Operand() const { 1673 return isSDWAOperand(MVT::f16); 1674 } 1675 1676 bool AMDGPUOperand::isSDWAFP32Operand() const { 1677 return isSDWAOperand(MVT::f32); 1678 } 1679 1680 bool AMDGPUOperand::isSDWAInt16Operand() const { 1681 return isSDWAOperand(MVT::i16); 1682 } 1683 1684 bool AMDGPUOperand::isSDWAInt32Operand() const { 1685 return isSDWAOperand(MVT::i32); 1686 } 1687 1688 bool AMDGPUOperand::isBoolReg() const { 1689 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1690 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1691 } 1692 1693 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1694 { 1695 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1696 assert(Size == 2 || Size == 4 || Size == 8); 1697 1698 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1699 1700 if (Imm.Mods.Abs) { 1701 Val &= ~FpSignMask; 1702 } 1703 if (Imm.Mods.Neg) { 1704 Val ^= FpSignMask; 1705 } 1706 1707 return Val; 1708 } 1709 1710 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1711 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1712 Inst.getNumOperands())) { 1713 addLiteralImmOperand(Inst, Imm.Val, 1714 ApplyModifiers & 1715 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1716 } else { 1717 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1718 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1719 } 1720 } 1721 1722 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1723 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1724 auto OpNum = Inst.getNumOperands(); 1725 // Check that this operand accepts literals 1726 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1727 1728 if (ApplyModifiers) { 1729 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1730 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1731 Val = applyInputFPModifiers(Val, Size); 1732 } 1733 1734 APInt Literal(64, Val); 1735 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1736 1737 if (Imm.IsFPImm) { // We got fp literal token 1738 switch (OpTy) { 1739 case AMDGPU::OPERAND_REG_IMM_INT64: 1740 case AMDGPU::OPERAND_REG_IMM_FP64: 1741 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1742 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1743 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1744 AsmParser->hasInv2PiInlineImm())) { 1745 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1746 return; 1747 } 1748 1749 // Non-inlineable 1750 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1751 // For fp operands we check if low 32 bits are zeros 1752 if (Literal.getLoBits(32) != 0) { 1753 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1754 "Can't encode literal as exact 64-bit floating-point operand. " 1755 "Low 32-bits will be set to zero"); 1756 } 1757 1758 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1759 return; 1760 } 1761 1762 // We don't allow fp literals in 64-bit integer instructions. It is 1763 // unclear how we should encode them. This case should be checked earlier 1764 // in predicate methods (isLiteralImm()) 1765 llvm_unreachable("fp literal in 64-bit integer instruction."); 1766 1767 case AMDGPU::OPERAND_REG_IMM_INT32: 1768 case AMDGPU::OPERAND_REG_IMM_FP32: 1769 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1770 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1771 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1772 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1773 case AMDGPU::OPERAND_REG_IMM_INT16: 1774 case AMDGPU::OPERAND_REG_IMM_FP16: 1775 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1776 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1777 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1778 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1779 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1780 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1781 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1782 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1783 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1784 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1785 bool lost; 1786 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1787 // Convert literal to single precision 1788 FPLiteral.convert(*getOpFltSemantics(OpTy), 1789 APFloat::rmNearestTiesToEven, &lost); 1790 // We allow precision lost but not overflow or underflow. This should be 1791 // checked earlier in isLiteralImm() 1792 1793 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1794 Inst.addOperand(MCOperand::createImm(ImmVal)); 1795 return; 1796 } 1797 default: 1798 llvm_unreachable("invalid operand size"); 1799 } 1800 1801 return; 1802 } 1803 1804 // We got int literal token. 1805 // Only sign extend inline immediates. 1806 switch (OpTy) { 1807 case AMDGPU::OPERAND_REG_IMM_INT32: 1808 case AMDGPU::OPERAND_REG_IMM_FP32: 1809 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1810 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1811 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1812 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1813 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1814 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1815 if (isSafeTruncation(Val, 32) && 1816 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1817 AsmParser->hasInv2PiInlineImm())) { 1818 Inst.addOperand(MCOperand::createImm(Val)); 1819 return; 1820 } 1821 1822 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1823 return; 1824 1825 case AMDGPU::OPERAND_REG_IMM_INT64: 1826 case AMDGPU::OPERAND_REG_IMM_FP64: 1827 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1828 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1829 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1830 Inst.addOperand(MCOperand::createImm(Val)); 1831 return; 1832 } 1833 1834 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1835 return; 1836 1837 case AMDGPU::OPERAND_REG_IMM_INT16: 1838 case AMDGPU::OPERAND_REG_IMM_FP16: 1839 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1840 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1841 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1842 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1843 if (isSafeTruncation(Val, 16) && 1844 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1845 AsmParser->hasInv2PiInlineImm())) { 1846 Inst.addOperand(MCOperand::createImm(Val)); 1847 return; 1848 } 1849 1850 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1851 return; 1852 1853 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1854 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1856 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1857 assert(isSafeTruncation(Val, 16)); 1858 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1859 AsmParser->hasInv2PiInlineImm())); 1860 1861 Inst.addOperand(MCOperand::createImm(Val)); 1862 return; 1863 } 1864 default: 1865 llvm_unreachable("invalid operand size"); 1866 } 1867 } 1868 1869 template <unsigned Bitwidth> 1870 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1871 APInt Literal(64, Imm.Val); 1872 1873 if (!Imm.IsFPImm) { 1874 // We got int literal token. 1875 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1876 return; 1877 } 1878 1879 bool Lost; 1880 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1881 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1882 APFloat::rmNearestTiesToEven, &Lost); 1883 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1884 } 1885 1886 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1887 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1888 } 1889 1890 static bool isInlineValue(unsigned Reg) { 1891 switch (Reg) { 1892 case AMDGPU::SRC_SHARED_BASE: 1893 case AMDGPU::SRC_SHARED_LIMIT: 1894 case AMDGPU::SRC_PRIVATE_BASE: 1895 case AMDGPU::SRC_PRIVATE_LIMIT: 1896 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1897 return true; 1898 case AMDGPU::SRC_VCCZ: 1899 case AMDGPU::SRC_EXECZ: 1900 case AMDGPU::SRC_SCC: 1901 return true; 1902 case AMDGPU::SGPR_NULL: 1903 return true; 1904 default: 1905 return false; 1906 } 1907 } 1908 1909 bool AMDGPUOperand::isInlineValue() const { 1910 return isRegKind() && ::isInlineValue(getReg()); 1911 } 1912 1913 //===----------------------------------------------------------------------===// 1914 // AsmParser 1915 //===----------------------------------------------------------------------===// 1916 1917 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1918 if (Is == IS_VGPR) { 1919 switch (RegWidth) { 1920 default: return -1; 1921 case 1: return AMDGPU::VGPR_32RegClassID; 1922 case 2: return AMDGPU::VReg_64RegClassID; 1923 case 3: return AMDGPU::VReg_96RegClassID; 1924 case 4: return AMDGPU::VReg_128RegClassID; 1925 case 5: return AMDGPU::VReg_160RegClassID; 1926 case 6: return AMDGPU::VReg_192RegClassID; 1927 case 8: return AMDGPU::VReg_256RegClassID; 1928 case 16: return AMDGPU::VReg_512RegClassID; 1929 case 32: return AMDGPU::VReg_1024RegClassID; 1930 } 1931 } else if (Is == IS_TTMP) { 1932 switch (RegWidth) { 1933 default: return -1; 1934 case 1: return AMDGPU::TTMP_32RegClassID; 1935 case 2: return AMDGPU::TTMP_64RegClassID; 1936 case 4: return AMDGPU::TTMP_128RegClassID; 1937 case 8: return AMDGPU::TTMP_256RegClassID; 1938 case 16: return AMDGPU::TTMP_512RegClassID; 1939 } 1940 } else if (Is == IS_SGPR) { 1941 switch (RegWidth) { 1942 default: return -1; 1943 case 1: return AMDGPU::SGPR_32RegClassID; 1944 case 2: return AMDGPU::SGPR_64RegClassID; 1945 case 3: return AMDGPU::SGPR_96RegClassID; 1946 case 4: return AMDGPU::SGPR_128RegClassID; 1947 case 5: return AMDGPU::SGPR_160RegClassID; 1948 case 6: return AMDGPU::SGPR_192RegClassID; 1949 case 8: return AMDGPU::SGPR_256RegClassID; 1950 case 16: return AMDGPU::SGPR_512RegClassID; 1951 } 1952 } else if (Is == IS_AGPR) { 1953 switch (RegWidth) { 1954 default: return -1; 1955 case 1: return AMDGPU::AGPR_32RegClassID; 1956 case 2: return AMDGPU::AReg_64RegClassID; 1957 case 3: return AMDGPU::AReg_96RegClassID; 1958 case 4: return AMDGPU::AReg_128RegClassID; 1959 case 5: return AMDGPU::AReg_160RegClassID; 1960 case 6: return AMDGPU::AReg_192RegClassID; 1961 case 8: return AMDGPU::AReg_256RegClassID; 1962 case 16: return AMDGPU::AReg_512RegClassID; 1963 case 32: return AMDGPU::AReg_1024RegClassID; 1964 } 1965 } 1966 return -1; 1967 } 1968 1969 static unsigned getSpecialRegForName(StringRef RegName) { 1970 return StringSwitch<unsigned>(RegName) 1971 .Case("exec", AMDGPU::EXEC) 1972 .Case("vcc", AMDGPU::VCC) 1973 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1974 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1975 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1976 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1977 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1978 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1979 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1980 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1981 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1982 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1983 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1984 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1985 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1986 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1987 .Case("m0", AMDGPU::M0) 1988 .Case("vccz", AMDGPU::SRC_VCCZ) 1989 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1990 .Case("execz", AMDGPU::SRC_EXECZ) 1991 .Case("src_execz", AMDGPU::SRC_EXECZ) 1992 .Case("scc", AMDGPU::SRC_SCC) 1993 .Case("src_scc", AMDGPU::SRC_SCC) 1994 .Case("tba", AMDGPU::TBA) 1995 .Case("tma", AMDGPU::TMA) 1996 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1997 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1998 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1999 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2000 .Case("vcc_lo", AMDGPU::VCC_LO) 2001 .Case("vcc_hi", AMDGPU::VCC_HI) 2002 .Case("exec_lo", AMDGPU::EXEC_LO) 2003 .Case("exec_hi", AMDGPU::EXEC_HI) 2004 .Case("tma_lo", AMDGPU::TMA_LO) 2005 .Case("tma_hi", AMDGPU::TMA_HI) 2006 .Case("tba_lo", AMDGPU::TBA_LO) 2007 .Case("tba_hi", AMDGPU::TBA_HI) 2008 .Case("pc", AMDGPU::PC_REG) 2009 .Case("null", AMDGPU::SGPR_NULL) 2010 .Default(AMDGPU::NoRegister); 2011 } 2012 2013 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2014 SMLoc &EndLoc, bool RestoreOnFailure) { 2015 auto R = parseRegister(); 2016 if (!R) return true; 2017 assert(R->isReg()); 2018 RegNo = R->getReg(); 2019 StartLoc = R->getStartLoc(); 2020 EndLoc = R->getEndLoc(); 2021 return false; 2022 } 2023 2024 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2025 SMLoc &EndLoc) { 2026 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2027 } 2028 2029 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2030 SMLoc &StartLoc, 2031 SMLoc &EndLoc) { 2032 bool Result = 2033 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2034 bool PendingErrors = getParser().hasPendingError(); 2035 getParser().clearPendingErrors(); 2036 if (PendingErrors) 2037 return MatchOperand_ParseFail; 2038 if (Result) 2039 return MatchOperand_NoMatch; 2040 return MatchOperand_Success; 2041 } 2042 2043 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2044 RegisterKind RegKind, unsigned Reg1) { 2045 switch (RegKind) { 2046 case IS_SPECIAL: 2047 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2048 Reg = AMDGPU::EXEC; 2049 RegWidth = 2; 2050 return true; 2051 } 2052 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2053 Reg = AMDGPU::FLAT_SCR; 2054 RegWidth = 2; 2055 return true; 2056 } 2057 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2058 Reg = AMDGPU::XNACK_MASK; 2059 RegWidth = 2; 2060 return true; 2061 } 2062 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2063 Reg = AMDGPU::VCC; 2064 RegWidth = 2; 2065 return true; 2066 } 2067 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2068 Reg = AMDGPU::TBA; 2069 RegWidth = 2; 2070 return true; 2071 } 2072 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2073 Reg = AMDGPU::TMA; 2074 RegWidth = 2; 2075 return true; 2076 } 2077 return false; 2078 case IS_VGPR: 2079 case IS_SGPR: 2080 case IS_AGPR: 2081 case IS_TTMP: 2082 if (Reg1 != Reg + RegWidth) { 2083 return false; 2084 } 2085 RegWidth++; 2086 return true; 2087 default: 2088 llvm_unreachable("unexpected register kind"); 2089 } 2090 } 2091 2092 struct RegInfo { 2093 StringLiteral Name; 2094 RegisterKind Kind; 2095 }; 2096 2097 static constexpr RegInfo RegularRegisters[] = { 2098 {{"v"}, IS_VGPR}, 2099 {{"s"}, IS_SGPR}, 2100 {{"ttmp"}, IS_TTMP}, 2101 {{"acc"}, IS_AGPR}, 2102 {{"a"}, IS_AGPR}, 2103 }; 2104 2105 static bool isRegularReg(RegisterKind Kind) { 2106 return Kind == IS_VGPR || 2107 Kind == IS_SGPR || 2108 Kind == IS_TTMP || 2109 Kind == IS_AGPR; 2110 } 2111 2112 static const RegInfo* getRegularRegInfo(StringRef Str) { 2113 for (const RegInfo &Reg : RegularRegisters) 2114 if (Str.startswith(Reg.Name)) 2115 return &Reg; 2116 return nullptr; 2117 } 2118 2119 static bool getRegNum(StringRef Str, unsigned& Num) { 2120 return !Str.getAsInteger(10, Num); 2121 } 2122 2123 bool 2124 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2125 const AsmToken &NextToken) const { 2126 2127 // A list of consecutive registers: [s0,s1,s2,s3] 2128 if (Token.is(AsmToken::LBrac)) 2129 return true; 2130 2131 if (!Token.is(AsmToken::Identifier)) 2132 return false; 2133 2134 // A single register like s0 or a range of registers like s[0:1] 2135 2136 StringRef Str = Token.getString(); 2137 const RegInfo *Reg = getRegularRegInfo(Str); 2138 if (Reg) { 2139 StringRef RegName = Reg->Name; 2140 StringRef RegSuffix = Str.substr(RegName.size()); 2141 if (!RegSuffix.empty()) { 2142 unsigned Num; 2143 // A single register with an index: rXX 2144 if (getRegNum(RegSuffix, Num)) 2145 return true; 2146 } else { 2147 // A range of registers: r[XX:YY]. 2148 if (NextToken.is(AsmToken::LBrac)) 2149 return true; 2150 } 2151 } 2152 2153 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2154 } 2155 2156 bool 2157 AMDGPUAsmParser::isRegister() 2158 { 2159 return isRegister(getToken(), peekToken()); 2160 } 2161 2162 unsigned 2163 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2164 unsigned RegNum, 2165 unsigned RegWidth) { 2166 2167 assert(isRegularReg(RegKind)); 2168 2169 unsigned AlignSize = 1; 2170 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2171 // SGPR and TTMP registers must be aligned. 2172 // Max required alignment is 4 dwords. 2173 AlignSize = std::min(RegWidth, 4u); 2174 } 2175 2176 if (RegNum % AlignSize != 0) 2177 return AMDGPU::NoRegister; 2178 2179 unsigned RegIdx = RegNum / AlignSize; 2180 int RCID = getRegClass(RegKind, RegWidth); 2181 if (RCID == -1) 2182 return AMDGPU::NoRegister; 2183 2184 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2185 const MCRegisterClass RC = TRI->getRegClass(RCID); 2186 if (RegIdx >= RC.getNumRegs()) 2187 return AMDGPU::NoRegister; 2188 2189 return RC.getRegister(RegIdx); 2190 } 2191 2192 bool 2193 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2194 int64_t RegLo, RegHi; 2195 if (!trySkipToken(AsmToken::LBrac)) 2196 return false; 2197 2198 if (!parseExpr(RegLo)) 2199 return false; 2200 2201 if (trySkipToken(AsmToken::Colon)) { 2202 if (!parseExpr(RegHi)) 2203 return false; 2204 } else { 2205 RegHi = RegLo; 2206 } 2207 2208 if (!trySkipToken(AsmToken::RBrac)) 2209 return false; 2210 2211 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2212 return false; 2213 2214 Num = static_cast<unsigned>(RegLo); 2215 Width = (RegHi - RegLo) + 1; 2216 return true; 2217 } 2218 2219 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2220 unsigned &RegNum, unsigned &RegWidth, 2221 SmallVectorImpl<AsmToken> &Tokens) { 2222 assert(isToken(AsmToken::Identifier)); 2223 unsigned Reg = getSpecialRegForName(getTokenStr()); 2224 if (Reg) { 2225 RegNum = 0; 2226 RegWidth = 1; 2227 RegKind = IS_SPECIAL; 2228 Tokens.push_back(getToken()); 2229 lex(); // skip register name 2230 } 2231 return Reg; 2232 } 2233 2234 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2235 unsigned &RegNum, unsigned &RegWidth, 2236 SmallVectorImpl<AsmToken> &Tokens) { 2237 assert(isToken(AsmToken::Identifier)); 2238 StringRef RegName = getTokenStr(); 2239 2240 const RegInfo *RI = getRegularRegInfo(RegName); 2241 if (!RI) 2242 return AMDGPU::NoRegister; 2243 Tokens.push_back(getToken()); 2244 lex(); // skip register name 2245 2246 RegKind = RI->Kind; 2247 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2248 if (!RegSuffix.empty()) { 2249 // Single 32-bit register: vXX. 2250 if (!getRegNum(RegSuffix, RegNum)) 2251 return AMDGPU::NoRegister; 2252 RegWidth = 1; 2253 } else { 2254 // Range of registers: v[XX:YY]. ":YY" is optional. 2255 if (!ParseRegRange(RegNum, RegWidth)) 2256 return AMDGPU::NoRegister; 2257 } 2258 2259 return getRegularReg(RegKind, RegNum, RegWidth); 2260 } 2261 2262 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2263 unsigned &RegWidth, 2264 SmallVectorImpl<AsmToken> &Tokens) { 2265 unsigned Reg = AMDGPU::NoRegister; 2266 2267 if (!trySkipToken(AsmToken::LBrac)) 2268 return AMDGPU::NoRegister; 2269 2270 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2271 2272 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2273 return AMDGPU::NoRegister; 2274 if (RegWidth != 1) 2275 return AMDGPU::NoRegister; 2276 2277 for (; trySkipToken(AsmToken::Comma); ) { 2278 RegisterKind NextRegKind; 2279 unsigned NextReg, NextRegNum, NextRegWidth; 2280 2281 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2282 Tokens)) 2283 return AMDGPU::NoRegister; 2284 if (NextRegWidth != 1) 2285 return AMDGPU::NoRegister; 2286 if (NextRegKind != RegKind) 2287 return AMDGPU::NoRegister; 2288 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2289 return AMDGPU::NoRegister; 2290 } 2291 2292 if (!trySkipToken(AsmToken::RBrac)) 2293 return AMDGPU::NoRegister; 2294 2295 if (isRegularReg(RegKind)) 2296 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2297 2298 return Reg; 2299 } 2300 2301 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2302 unsigned &RegNum, unsigned &RegWidth, 2303 SmallVectorImpl<AsmToken> &Tokens) { 2304 Reg = AMDGPU::NoRegister; 2305 2306 if (isToken(AsmToken::Identifier)) { 2307 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2308 if (Reg == AMDGPU::NoRegister) 2309 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2310 } else { 2311 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2312 } 2313 2314 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2315 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2316 } 2317 2318 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2319 unsigned &RegNum, unsigned &RegWidth, 2320 bool RestoreOnFailure) { 2321 Reg = AMDGPU::NoRegister; 2322 2323 SmallVector<AsmToken, 1> Tokens; 2324 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2325 if (RestoreOnFailure) { 2326 while (!Tokens.empty()) { 2327 getLexer().UnLex(Tokens.pop_back_val()); 2328 } 2329 } 2330 return true; 2331 } 2332 return false; 2333 } 2334 2335 Optional<StringRef> 2336 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2337 switch (RegKind) { 2338 case IS_VGPR: 2339 return StringRef(".amdgcn.next_free_vgpr"); 2340 case IS_SGPR: 2341 return StringRef(".amdgcn.next_free_sgpr"); 2342 default: 2343 return None; 2344 } 2345 } 2346 2347 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2348 auto SymbolName = getGprCountSymbolName(RegKind); 2349 assert(SymbolName && "initializing invalid register kind"); 2350 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2351 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2352 } 2353 2354 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2355 unsigned DwordRegIndex, 2356 unsigned RegWidth) { 2357 // Symbols are only defined for GCN targets 2358 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2359 return true; 2360 2361 auto SymbolName = getGprCountSymbolName(RegKind); 2362 if (!SymbolName) 2363 return true; 2364 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2365 2366 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2367 int64_t OldCount; 2368 2369 if (!Sym->isVariable()) 2370 return !Error(getParser().getTok().getLoc(), 2371 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2372 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2373 return !Error( 2374 getParser().getTok().getLoc(), 2375 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2376 2377 if (OldCount <= NewMax) 2378 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2379 2380 return true; 2381 } 2382 2383 std::unique_ptr<AMDGPUOperand> 2384 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2385 const auto &Tok = Parser.getTok(); 2386 SMLoc StartLoc = Tok.getLoc(); 2387 SMLoc EndLoc = Tok.getEndLoc(); 2388 RegisterKind RegKind; 2389 unsigned Reg, RegNum, RegWidth; 2390 2391 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2392 //FIXME: improve error messages (bug 41303). 2393 Error(StartLoc, "not a valid operand."); 2394 return nullptr; 2395 } 2396 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2397 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2398 return nullptr; 2399 } else 2400 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2401 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2402 } 2403 2404 OperandMatchResultTy 2405 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2406 // TODO: add syntactic sugar for 1/(2*PI) 2407 2408 assert(!isRegister()); 2409 assert(!isModifier()); 2410 2411 const auto& Tok = getToken(); 2412 const auto& NextTok = peekToken(); 2413 bool IsReal = Tok.is(AsmToken::Real); 2414 SMLoc S = getLoc(); 2415 bool Negate = false; 2416 2417 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2418 lex(); 2419 IsReal = true; 2420 Negate = true; 2421 } 2422 2423 if (IsReal) { 2424 // Floating-point expressions are not supported. 2425 // Can only allow floating-point literals with an 2426 // optional sign. 2427 2428 StringRef Num = getTokenStr(); 2429 lex(); 2430 2431 APFloat RealVal(APFloat::IEEEdouble()); 2432 auto roundMode = APFloat::rmNearestTiesToEven; 2433 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2434 return MatchOperand_ParseFail; 2435 } 2436 if (Negate) 2437 RealVal.changeSign(); 2438 2439 Operands.push_back( 2440 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2441 AMDGPUOperand::ImmTyNone, true)); 2442 2443 return MatchOperand_Success; 2444 2445 } else { 2446 int64_t IntVal; 2447 const MCExpr *Expr; 2448 SMLoc S = getLoc(); 2449 2450 if (HasSP3AbsModifier) { 2451 // This is a workaround for handling expressions 2452 // as arguments of SP3 'abs' modifier, for example: 2453 // |1.0| 2454 // |-1| 2455 // |1+x| 2456 // This syntax is not compatible with syntax of standard 2457 // MC expressions (due to the trailing '|'). 2458 SMLoc EndLoc; 2459 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2460 return MatchOperand_ParseFail; 2461 } else { 2462 if (Parser.parseExpression(Expr)) 2463 return MatchOperand_ParseFail; 2464 } 2465 2466 if (Expr->evaluateAsAbsolute(IntVal)) { 2467 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2468 } else { 2469 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2470 } 2471 2472 return MatchOperand_Success; 2473 } 2474 2475 return MatchOperand_NoMatch; 2476 } 2477 2478 OperandMatchResultTy 2479 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2480 if (!isRegister()) 2481 return MatchOperand_NoMatch; 2482 2483 if (auto R = parseRegister()) { 2484 assert(R->isReg()); 2485 Operands.push_back(std::move(R)); 2486 return MatchOperand_Success; 2487 } 2488 return MatchOperand_ParseFail; 2489 } 2490 2491 OperandMatchResultTy 2492 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2493 auto res = parseReg(Operands); 2494 if (res != MatchOperand_NoMatch) { 2495 return res; 2496 } else if (isModifier()) { 2497 return MatchOperand_NoMatch; 2498 } else { 2499 return parseImm(Operands, HasSP3AbsMod); 2500 } 2501 } 2502 2503 bool 2504 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2505 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2506 const auto &str = Token.getString(); 2507 return str == "abs" || str == "neg" || str == "sext"; 2508 } 2509 return false; 2510 } 2511 2512 bool 2513 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2514 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2515 } 2516 2517 bool 2518 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2519 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2520 } 2521 2522 bool 2523 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2524 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2525 } 2526 2527 // Check if this is an operand modifier or an opcode modifier 2528 // which may look like an expression but it is not. We should 2529 // avoid parsing these modifiers as expressions. Currently 2530 // recognized sequences are: 2531 // |...| 2532 // abs(...) 2533 // neg(...) 2534 // sext(...) 2535 // -reg 2536 // -|...| 2537 // -abs(...) 2538 // name:... 2539 // Note that simple opcode modifiers like 'gds' may be parsed as 2540 // expressions; this is a special case. See getExpressionAsToken. 2541 // 2542 bool 2543 AMDGPUAsmParser::isModifier() { 2544 2545 AsmToken Tok = getToken(); 2546 AsmToken NextToken[2]; 2547 peekTokens(NextToken); 2548 2549 return isOperandModifier(Tok, NextToken[0]) || 2550 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2551 isOpcodeModifierWithVal(Tok, NextToken[0]); 2552 } 2553 2554 // Check if the current token is an SP3 'neg' modifier. 2555 // Currently this modifier is allowed in the following context: 2556 // 2557 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2558 // 2. Before an 'abs' modifier: -abs(...) 2559 // 3. Before an SP3 'abs' modifier: -|...| 2560 // 2561 // In all other cases "-" is handled as a part 2562 // of an expression that follows the sign. 2563 // 2564 // Note: When "-" is followed by an integer literal, 2565 // this is interpreted as integer negation rather 2566 // than a floating-point NEG modifier applied to N. 2567 // Beside being contr-intuitive, such use of floating-point 2568 // NEG modifier would have resulted in different meaning 2569 // of integer literals used with VOP1/2/C and VOP3, 2570 // for example: 2571 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2572 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2573 // Negative fp literals with preceding "-" are 2574 // handled likewise for unifomtity 2575 // 2576 bool 2577 AMDGPUAsmParser::parseSP3NegModifier() { 2578 2579 AsmToken NextToken[2]; 2580 peekTokens(NextToken); 2581 2582 if (isToken(AsmToken::Minus) && 2583 (isRegister(NextToken[0], NextToken[1]) || 2584 NextToken[0].is(AsmToken::Pipe) || 2585 isId(NextToken[0], "abs"))) { 2586 lex(); 2587 return true; 2588 } 2589 2590 return false; 2591 } 2592 2593 OperandMatchResultTy 2594 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2595 bool AllowImm) { 2596 bool Neg, SP3Neg; 2597 bool Abs, SP3Abs; 2598 SMLoc Loc; 2599 2600 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2601 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2602 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2603 return MatchOperand_ParseFail; 2604 } 2605 2606 SP3Neg = parseSP3NegModifier(); 2607 2608 Loc = getLoc(); 2609 Neg = trySkipId("neg"); 2610 if (Neg && SP3Neg) { 2611 Error(Loc, "expected register or immediate"); 2612 return MatchOperand_ParseFail; 2613 } 2614 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2615 return MatchOperand_ParseFail; 2616 2617 Abs = trySkipId("abs"); 2618 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2619 return MatchOperand_ParseFail; 2620 2621 Loc = getLoc(); 2622 SP3Abs = trySkipToken(AsmToken::Pipe); 2623 if (Abs && SP3Abs) { 2624 Error(Loc, "expected register or immediate"); 2625 return MatchOperand_ParseFail; 2626 } 2627 2628 OperandMatchResultTy Res; 2629 if (AllowImm) { 2630 Res = parseRegOrImm(Operands, SP3Abs); 2631 } else { 2632 Res = parseReg(Operands); 2633 } 2634 if (Res != MatchOperand_Success) { 2635 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2636 } 2637 2638 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2639 return MatchOperand_ParseFail; 2640 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2641 return MatchOperand_ParseFail; 2642 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2643 return MatchOperand_ParseFail; 2644 2645 AMDGPUOperand::Modifiers Mods; 2646 Mods.Abs = Abs || SP3Abs; 2647 Mods.Neg = Neg || SP3Neg; 2648 2649 if (Mods.hasFPModifiers()) { 2650 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2651 if (Op.isExpr()) { 2652 Error(Op.getStartLoc(), "expected an absolute expression"); 2653 return MatchOperand_ParseFail; 2654 } 2655 Op.setModifiers(Mods); 2656 } 2657 return MatchOperand_Success; 2658 } 2659 2660 OperandMatchResultTy 2661 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2662 bool AllowImm) { 2663 bool Sext = trySkipId("sext"); 2664 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2665 return MatchOperand_ParseFail; 2666 2667 OperandMatchResultTy Res; 2668 if (AllowImm) { 2669 Res = parseRegOrImm(Operands); 2670 } else { 2671 Res = parseReg(Operands); 2672 } 2673 if (Res != MatchOperand_Success) { 2674 return Sext? MatchOperand_ParseFail : Res; 2675 } 2676 2677 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2678 return MatchOperand_ParseFail; 2679 2680 AMDGPUOperand::Modifiers Mods; 2681 Mods.Sext = Sext; 2682 2683 if (Mods.hasIntModifiers()) { 2684 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2685 if (Op.isExpr()) { 2686 Error(Op.getStartLoc(), "expected an absolute expression"); 2687 return MatchOperand_ParseFail; 2688 } 2689 Op.setModifiers(Mods); 2690 } 2691 2692 return MatchOperand_Success; 2693 } 2694 2695 OperandMatchResultTy 2696 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2697 return parseRegOrImmWithFPInputMods(Operands, false); 2698 } 2699 2700 OperandMatchResultTy 2701 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2702 return parseRegOrImmWithIntInputMods(Operands, false); 2703 } 2704 2705 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2706 auto Loc = getLoc(); 2707 if (trySkipId("off")) { 2708 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2709 AMDGPUOperand::ImmTyOff, false)); 2710 return MatchOperand_Success; 2711 } 2712 2713 if (!isRegister()) 2714 return MatchOperand_NoMatch; 2715 2716 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2717 if (Reg) { 2718 Operands.push_back(std::move(Reg)); 2719 return MatchOperand_Success; 2720 } 2721 2722 return MatchOperand_ParseFail; 2723 2724 } 2725 2726 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2727 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2728 2729 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2730 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2731 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2732 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2733 return Match_InvalidOperand; 2734 2735 if ((TSFlags & SIInstrFlags::VOP3) && 2736 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2737 getForcedEncodingSize() != 64) 2738 return Match_PreferE32; 2739 2740 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2741 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2742 // v_mac_f32/16 allow only dst_sel == DWORD; 2743 auto OpNum = 2744 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2745 const auto &Op = Inst.getOperand(OpNum); 2746 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2747 return Match_InvalidOperand; 2748 } 2749 } 2750 2751 return Match_Success; 2752 } 2753 2754 // What asm variants we should check 2755 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2756 if (getForcedEncodingSize() == 32) { 2757 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2758 return makeArrayRef(Variants); 2759 } 2760 2761 if (isForcedVOP3()) { 2762 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2763 return makeArrayRef(Variants); 2764 } 2765 2766 if (isForcedSDWA()) { 2767 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2768 AMDGPUAsmVariants::SDWA9}; 2769 return makeArrayRef(Variants); 2770 } 2771 2772 if (isForcedDPP()) { 2773 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2774 return makeArrayRef(Variants); 2775 } 2776 2777 static const unsigned Variants[] = { 2778 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2779 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2780 }; 2781 2782 return makeArrayRef(Variants); 2783 } 2784 2785 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2786 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2787 const unsigned Num = Desc.getNumImplicitUses(); 2788 for (unsigned i = 0; i < Num; ++i) { 2789 unsigned Reg = Desc.ImplicitUses[i]; 2790 switch (Reg) { 2791 case AMDGPU::FLAT_SCR: 2792 case AMDGPU::VCC: 2793 case AMDGPU::VCC_LO: 2794 case AMDGPU::VCC_HI: 2795 case AMDGPU::M0: 2796 return Reg; 2797 default: 2798 break; 2799 } 2800 } 2801 return AMDGPU::NoRegister; 2802 } 2803 2804 // NB: This code is correct only when used to check constant 2805 // bus limitations because GFX7 support no f16 inline constants. 2806 // Note that there are no cases when a GFX7 opcode violates 2807 // constant bus limitations due to the use of an f16 constant. 2808 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2809 unsigned OpIdx) const { 2810 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2811 2812 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2813 return false; 2814 } 2815 2816 const MCOperand &MO = Inst.getOperand(OpIdx); 2817 2818 int64_t Val = MO.getImm(); 2819 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2820 2821 switch (OpSize) { // expected operand size 2822 case 8: 2823 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2824 case 4: 2825 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2826 case 2: { 2827 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2828 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2829 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2830 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2831 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2832 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2833 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2834 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2835 } else { 2836 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2837 } 2838 } 2839 default: 2840 llvm_unreachable("invalid operand size"); 2841 } 2842 } 2843 2844 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2845 if (!isGFX10()) 2846 return 1; 2847 2848 switch (Opcode) { 2849 // 64-bit shift instructions can use only one scalar value input 2850 case AMDGPU::V_LSHLREV_B64: 2851 case AMDGPU::V_LSHLREV_B64_gfx10: 2852 case AMDGPU::V_LSHL_B64: 2853 case AMDGPU::V_LSHRREV_B64: 2854 case AMDGPU::V_LSHRREV_B64_gfx10: 2855 case AMDGPU::V_LSHR_B64: 2856 case AMDGPU::V_ASHRREV_I64: 2857 case AMDGPU::V_ASHRREV_I64_gfx10: 2858 case AMDGPU::V_ASHR_I64: 2859 return 1; 2860 default: 2861 return 2; 2862 } 2863 } 2864 2865 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2866 const MCOperand &MO = Inst.getOperand(OpIdx); 2867 if (MO.isImm()) { 2868 return !isInlineConstant(Inst, OpIdx); 2869 } else if (MO.isReg()) { 2870 auto Reg = MO.getReg(); 2871 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2872 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2873 } else { 2874 return true; 2875 } 2876 } 2877 2878 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2879 const unsigned Opcode = Inst.getOpcode(); 2880 const MCInstrDesc &Desc = MII.get(Opcode); 2881 unsigned ConstantBusUseCount = 0; 2882 unsigned NumLiterals = 0; 2883 unsigned LiteralSize; 2884 2885 if (Desc.TSFlags & 2886 (SIInstrFlags::VOPC | 2887 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2888 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2889 SIInstrFlags::SDWA)) { 2890 // Check special imm operands (used by madmk, etc) 2891 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2892 ++ConstantBusUseCount; 2893 } 2894 2895 SmallDenseSet<unsigned> SGPRsUsed; 2896 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2897 if (SGPRUsed != AMDGPU::NoRegister) { 2898 SGPRsUsed.insert(SGPRUsed); 2899 ++ConstantBusUseCount; 2900 } 2901 2902 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2903 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2904 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2905 2906 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2907 2908 for (int OpIdx : OpIndices) { 2909 if (OpIdx == -1) break; 2910 2911 const MCOperand &MO = Inst.getOperand(OpIdx); 2912 if (usesConstantBus(Inst, OpIdx)) { 2913 if (MO.isReg()) { 2914 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2915 // Pairs of registers with a partial intersections like these 2916 // s0, s[0:1] 2917 // flat_scratch_lo, flat_scratch 2918 // flat_scratch_lo, flat_scratch_hi 2919 // are theoretically valid but they are disabled anyway. 2920 // Note that this code mimics SIInstrInfo::verifyInstruction 2921 if (!SGPRsUsed.count(Reg)) { 2922 SGPRsUsed.insert(Reg); 2923 ++ConstantBusUseCount; 2924 } 2925 } else { // Expression or a literal 2926 2927 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2928 continue; // special operand like VINTERP attr_chan 2929 2930 // An instruction may use only one literal. 2931 // This has been validated on the previous step. 2932 // See validateVOP3Literal. 2933 // This literal may be used as more than one operand. 2934 // If all these operands are of the same size, 2935 // this literal counts as one scalar value. 2936 // Otherwise it counts as 2 scalar values. 2937 // See "GFX10 Shader Programming", section 3.6.2.3. 2938 2939 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2940 if (Size < 4) Size = 4; 2941 2942 if (NumLiterals == 0) { 2943 NumLiterals = 1; 2944 LiteralSize = Size; 2945 } else if (LiteralSize != Size) { 2946 NumLiterals = 2; 2947 } 2948 } 2949 } 2950 } 2951 } 2952 ConstantBusUseCount += NumLiterals; 2953 2954 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2955 } 2956 2957 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2958 const unsigned Opcode = Inst.getOpcode(); 2959 const MCInstrDesc &Desc = MII.get(Opcode); 2960 2961 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2962 if (DstIdx == -1 || 2963 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2964 return true; 2965 } 2966 2967 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2968 2969 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2970 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2971 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2972 2973 assert(DstIdx != -1); 2974 const MCOperand &Dst = Inst.getOperand(DstIdx); 2975 assert(Dst.isReg()); 2976 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2977 2978 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2979 2980 for (int SrcIdx : SrcIndices) { 2981 if (SrcIdx == -1) break; 2982 const MCOperand &Src = Inst.getOperand(SrcIdx); 2983 if (Src.isReg()) { 2984 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2985 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2986 return false; 2987 } 2988 } 2989 } 2990 2991 return true; 2992 } 2993 2994 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2995 2996 const unsigned Opc = Inst.getOpcode(); 2997 const MCInstrDesc &Desc = MII.get(Opc); 2998 2999 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3000 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3001 assert(ClampIdx != -1); 3002 return Inst.getOperand(ClampIdx).getImm() == 0; 3003 } 3004 3005 return true; 3006 } 3007 3008 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3009 3010 const unsigned Opc = Inst.getOpcode(); 3011 const MCInstrDesc &Desc = MII.get(Opc); 3012 3013 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3014 return true; 3015 3016 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3017 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3018 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3019 3020 assert(VDataIdx != -1); 3021 assert(DMaskIdx != -1); 3022 assert(TFEIdx != -1); 3023 3024 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3025 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3026 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3027 if (DMask == 0) 3028 DMask = 1; 3029 3030 unsigned DataSize = 3031 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3032 if (hasPackedD16()) { 3033 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3034 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3035 DataSize = (DataSize + 1) / 2; 3036 } 3037 3038 return (VDataSize / 4) == DataSize + TFESize; 3039 } 3040 3041 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3042 const unsigned Opc = Inst.getOpcode(); 3043 const MCInstrDesc &Desc = MII.get(Opc); 3044 3045 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3046 return true; 3047 3048 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3049 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3050 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3051 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3052 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3053 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3054 3055 assert(VAddr0Idx != -1); 3056 assert(SrsrcIdx != -1); 3057 assert(DimIdx != -1); 3058 assert(SrsrcIdx > VAddr0Idx); 3059 3060 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3061 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3062 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3063 unsigned VAddrSize = 3064 IsNSA ? SrsrcIdx - VAddr0Idx 3065 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3066 3067 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3068 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3069 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3070 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3071 if (!IsNSA) { 3072 if (AddrSize > 8) 3073 AddrSize = 16; 3074 else if (AddrSize > 4) 3075 AddrSize = 8; 3076 } 3077 3078 return VAddrSize == AddrSize; 3079 } 3080 3081 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3082 3083 const unsigned Opc = Inst.getOpcode(); 3084 const MCInstrDesc &Desc = MII.get(Opc); 3085 3086 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3087 return true; 3088 if (!Desc.mayLoad() || !Desc.mayStore()) 3089 return true; // Not atomic 3090 3091 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3092 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3093 3094 // This is an incomplete check because image_atomic_cmpswap 3095 // may only use 0x3 and 0xf while other atomic operations 3096 // may use 0x1 and 0x3. However these limitations are 3097 // verified when we check that dmask matches dst size. 3098 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3099 } 3100 3101 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3102 3103 const unsigned Opc = Inst.getOpcode(); 3104 const MCInstrDesc &Desc = MII.get(Opc); 3105 3106 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3107 return true; 3108 3109 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3110 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3111 3112 // GATHER4 instructions use dmask in a different fashion compared to 3113 // other MIMG instructions. The only useful DMASK values are 3114 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3115 // (red,red,red,red) etc.) The ISA document doesn't mention 3116 // this. 3117 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3118 } 3119 3120 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3121 { 3122 switch (Opcode) { 3123 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3124 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3125 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3126 return true; 3127 default: 3128 return false; 3129 } 3130 } 3131 3132 // movrels* opcodes should only allow VGPRS as src0. 3133 // This is specified in .td description for vop1/vop3, 3134 // but sdwa is handled differently. See isSDWAOperand. 3135 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3136 3137 const unsigned Opc = Inst.getOpcode(); 3138 const MCInstrDesc &Desc = MII.get(Opc); 3139 3140 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3141 return true; 3142 3143 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3144 assert(Src0Idx != -1); 3145 3146 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3147 if (!Src0.isReg()) 3148 return false; 3149 3150 auto Reg = Src0.getReg(); 3151 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3152 return !isSGPR(mc2PseudoReg(Reg), TRI); 3153 } 3154 3155 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3156 3157 const unsigned Opc = Inst.getOpcode(); 3158 3159 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3160 return true; 3161 3162 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3163 assert(Src0Idx != -1); 3164 3165 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3166 if (!Src0.isReg()) 3167 return true; 3168 3169 auto Reg = Src0.getReg(); 3170 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3171 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3172 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3173 return false; 3174 } 3175 3176 return true; 3177 } 3178 3179 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3180 3181 const unsigned Opc = Inst.getOpcode(); 3182 const MCInstrDesc &Desc = MII.get(Opc); 3183 3184 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3185 return true; 3186 3187 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3188 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3189 if (isCI() || isSI()) 3190 return false; 3191 } 3192 3193 return true; 3194 } 3195 3196 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3197 const unsigned Opc = Inst.getOpcode(); 3198 const MCInstrDesc &Desc = MII.get(Opc); 3199 3200 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3201 return true; 3202 3203 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3204 if (DimIdx < 0) 3205 return true; 3206 3207 long Imm = Inst.getOperand(DimIdx).getImm(); 3208 if (Imm < 0 || Imm >= 8) 3209 return false; 3210 3211 return true; 3212 } 3213 3214 static bool IsRevOpcode(const unsigned Opcode) 3215 { 3216 switch (Opcode) { 3217 case AMDGPU::V_SUBREV_F32_e32: 3218 case AMDGPU::V_SUBREV_F32_e64: 3219 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3220 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3221 case AMDGPU::V_SUBREV_F32_e32_vi: 3222 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3223 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3224 case AMDGPU::V_SUBREV_F32_e64_vi: 3225 3226 case AMDGPU::V_SUBREV_I32_e32: 3227 case AMDGPU::V_SUBREV_I32_e64: 3228 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3229 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3230 3231 case AMDGPU::V_SUBBREV_U32_e32: 3232 case AMDGPU::V_SUBBREV_U32_e64: 3233 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3234 case AMDGPU::V_SUBBREV_U32_e32_vi: 3235 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3236 case AMDGPU::V_SUBBREV_U32_e64_vi: 3237 3238 case AMDGPU::V_SUBREV_U32_e32: 3239 case AMDGPU::V_SUBREV_U32_e64: 3240 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3241 case AMDGPU::V_SUBREV_U32_e32_vi: 3242 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3243 case AMDGPU::V_SUBREV_U32_e64_vi: 3244 3245 case AMDGPU::V_SUBREV_F16_e32: 3246 case AMDGPU::V_SUBREV_F16_e64: 3247 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3248 case AMDGPU::V_SUBREV_F16_e32_vi: 3249 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3250 case AMDGPU::V_SUBREV_F16_e64_vi: 3251 3252 case AMDGPU::V_SUBREV_U16_e32: 3253 case AMDGPU::V_SUBREV_U16_e64: 3254 case AMDGPU::V_SUBREV_U16_e32_vi: 3255 case AMDGPU::V_SUBREV_U16_e64_vi: 3256 3257 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3258 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3259 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3260 3261 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3262 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3263 3264 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3265 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3266 3267 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3268 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3269 3270 case AMDGPU::V_LSHRREV_B32_e32: 3271 case AMDGPU::V_LSHRREV_B32_e64: 3272 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3273 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3274 case AMDGPU::V_LSHRREV_B32_e32_vi: 3275 case AMDGPU::V_LSHRREV_B32_e64_vi: 3276 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3277 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3278 3279 case AMDGPU::V_ASHRREV_I32_e32: 3280 case AMDGPU::V_ASHRREV_I32_e64: 3281 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3282 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3283 case AMDGPU::V_ASHRREV_I32_e32_vi: 3284 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3285 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3286 case AMDGPU::V_ASHRREV_I32_e64_vi: 3287 3288 case AMDGPU::V_LSHLREV_B32_e32: 3289 case AMDGPU::V_LSHLREV_B32_e64: 3290 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3291 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3292 case AMDGPU::V_LSHLREV_B32_e32_vi: 3293 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3294 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3295 case AMDGPU::V_LSHLREV_B32_e64_vi: 3296 3297 case AMDGPU::V_LSHLREV_B16_e32: 3298 case AMDGPU::V_LSHLREV_B16_e64: 3299 case AMDGPU::V_LSHLREV_B16_e32_vi: 3300 case AMDGPU::V_LSHLREV_B16_e64_vi: 3301 case AMDGPU::V_LSHLREV_B16_gfx10: 3302 3303 case AMDGPU::V_LSHRREV_B16_e32: 3304 case AMDGPU::V_LSHRREV_B16_e64: 3305 case AMDGPU::V_LSHRREV_B16_e32_vi: 3306 case AMDGPU::V_LSHRREV_B16_e64_vi: 3307 case AMDGPU::V_LSHRREV_B16_gfx10: 3308 3309 case AMDGPU::V_ASHRREV_I16_e32: 3310 case AMDGPU::V_ASHRREV_I16_e64: 3311 case AMDGPU::V_ASHRREV_I16_e32_vi: 3312 case AMDGPU::V_ASHRREV_I16_e64_vi: 3313 case AMDGPU::V_ASHRREV_I16_gfx10: 3314 3315 case AMDGPU::V_LSHLREV_B64: 3316 case AMDGPU::V_LSHLREV_B64_gfx10: 3317 case AMDGPU::V_LSHLREV_B64_vi: 3318 3319 case AMDGPU::V_LSHRREV_B64: 3320 case AMDGPU::V_LSHRREV_B64_gfx10: 3321 case AMDGPU::V_LSHRREV_B64_vi: 3322 3323 case AMDGPU::V_ASHRREV_I64: 3324 case AMDGPU::V_ASHRREV_I64_gfx10: 3325 case AMDGPU::V_ASHRREV_I64_vi: 3326 3327 case AMDGPU::V_PK_LSHLREV_B16: 3328 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3329 case AMDGPU::V_PK_LSHLREV_B16_vi: 3330 3331 case AMDGPU::V_PK_LSHRREV_B16: 3332 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3333 case AMDGPU::V_PK_LSHRREV_B16_vi: 3334 case AMDGPU::V_PK_ASHRREV_I16: 3335 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3336 case AMDGPU::V_PK_ASHRREV_I16_vi: 3337 return true; 3338 default: 3339 return false; 3340 } 3341 } 3342 3343 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3344 3345 using namespace SIInstrFlags; 3346 const unsigned Opcode = Inst.getOpcode(); 3347 const MCInstrDesc &Desc = MII.get(Opcode); 3348 3349 // lds_direct register is defined so that it can be used 3350 // with 9-bit operands only. Ignore encodings which do not accept these. 3351 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3352 return true; 3353 3354 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3355 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3356 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3357 3358 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3359 3360 // lds_direct cannot be specified as either src1 or src2. 3361 for (int SrcIdx : SrcIndices) { 3362 if (SrcIdx == -1) break; 3363 const MCOperand &Src = Inst.getOperand(SrcIdx); 3364 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3365 return false; 3366 } 3367 } 3368 3369 if (Src0Idx == -1) 3370 return true; 3371 3372 const MCOperand &Src = Inst.getOperand(Src0Idx); 3373 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3374 return true; 3375 3376 // lds_direct is specified as src0. Check additional limitations. 3377 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3378 } 3379 3380 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3381 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3382 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3383 if (Op.isFlatOffset()) 3384 return Op.getStartLoc(); 3385 } 3386 return getLoc(); 3387 } 3388 3389 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3390 const OperandVector &Operands) { 3391 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3392 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3393 return true; 3394 3395 auto Opcode = Inst.getOpcode(); 3396 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3397 assert(OpNum != -1); 3398 3399 const auto &Op = Inst.getOperand(OpNum); 3400 if (!hasFlatOffsets() && Op.getImm() != 0) { 3401 Error(getFlatOffsetLoc(Operands), 3402 "flat offset modifier is not supported on this GPU"); 3403 return false; 3404 } 3405 3406 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3407 // For FLAT segment the offset must be positive; 3408 // MSB is ignored and forced to zero. 3409 unsigned OffsetSize = isGFX9() ? 13 : 12; 3410 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3411 if (!isIntN(OffsetSize, Op.getImm())) { 3412 Error(getFlatOffsetLoc(Operands), 3413 isGFX9() ? "expected a 13-bit signed offset" : 3414 "expected a 12-bit signed offset"); 3415 return false; 3416 } 3417 } else { 3418 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3419 Error(getFlatOffsetLoc(Operands), 3420 isGFX9() ? "expected a 12-bit unsigned offset" : 3421 "expected an 11-bit unsigned offset"); 3422 return false; 3423 } 3424 } 3425 3426 return true; 3427 } 3428 3429 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3430 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3431 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3432 if (Op.isSMEMOffset()) 3433 return Op.getStartLoc(); 3434 } 3435 return getLoc(); 3436 } 3437 3438 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3439 const OperandVector &Operands) { 3440 if (isCI() || isSI()) 3441 return true; 3442 3443 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3444 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3445 return true; 3446 3447 auto Opcode = Inst.getOpcode(); 3448 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3449 if (OpNum == -1) 3450 return true; 3451 3452 const auto &Op = Inst.getOperand(OpNum); 3453 if (!Op.isImm()) 3454 return true; 3455 3456 uint64_t Offset = Op.getImm(); 3457 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3458 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3459 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3460 return true; 3461 3462 Error(getSMEMOffsetLoc(Operands), 3463 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3464 "expected a 21-bit signed offset"); 3465 3466 return false; 3467 } 3468 3469 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3470 unsigned Opcode = Inst.getOpcode(); 3471 const MCInstrDesc &Desc = MII.get(Opcode); 3472 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3473 return true; 3474 3475 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3476 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3477 3478 const int OpIndices[] = { Src0Idx, Src1Idx }; 3479 3480 unsigned NumExprs = 0; 3481 unsigned NumLiterals = 0; 3482 uint32_t LiteralValue; 3483 3484 for (int OpIdx : OpIndices) { 3485 if (OpIdx == -1) break; 3486 3487 const MCOperand &MO = Inst.getOperand(OpIdx); 3488 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3489 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3490 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3491 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3492 if (NumLiterals == 0 || LiteralValue != Value) { 3493 LiteralValue = Value; 3494 ++NumLiterals; 3495 } 3496 } else if (MO.isExpr()) { 3497 ++NumExprs; 3498 } 3499 } 3500 } 3501 3502 return NumLiterals + NumExprs <= 1; 3503 } 3504 3505 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3506 const unsigned Opc = Inst.getOpcode(); 3507 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3508 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3509 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3510 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3511 3512 if (OpSel & ~3) 3513 return false; 3514 } 3515 return true; 3516 } 3517 3518 // Check if VCC register matches wavefront size 3519 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3520 auto FB = getFeatureBits(); 3521 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3522 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3523 } 3524 3525 // VOP3 literal is only allowed in GFX10+ and only one can be used 3526 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3527 unsigned Opcode = Inst.getOpcode(); 3528 const MCInstrDesc &Desc = MII.get(Opcode); 3529 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3530 return true; 3531 3532 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3533 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3534 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3535 3536 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3537 3538 unsigned NumExprs = 0; 3539 unsigned NumLiterals = 0; 3540 uint32_t LiteralValue; 3541 3542 for (int OpIdx : OpIndices) { 3543 if (OpIdx == -1) break; 3544 3545 const MCOperand &MO = Inst.getOperand(OpIdx); 3546 if (!MO.isImm() && !MO.isExpr()) 3547 continue; 3548 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3549 continue; 3550 3551 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3552 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3553 return false; 3554 3555 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3556 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3557 if (NumLiterals == 0 || LiteralValue != Value) { 3558 LiteralValue = Value; 3559 ++NumLiterals; 3560 } 3561 } else if (MO.isExpr()) { 3562 ++NumExprs; 3563 } 3564 } 3565 NumLiterals += NumExprs; 3566 3567 return !NumLiterals || 3568 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3569 } 3570 3571 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3572 const SMLoc &IDLoc, 3573 const OperandVector &Operands) { 3574 if (!validateLdsDirect(Inst)) { 3575 Error(IDLoc, 3576 "invalid use of lds_direct"); 3577 return false; 3578 } 3579 if (!validateSOPLiteral(Inst)) { 3580 Error(IDLoc, 3581 "only one literal operand is allowed"); 3582 return false; 3583 } 3584 if (!validateVOP3Literal(Inst)) { 3585 Error(IDLoc, 3586 "invalid literal operand"); 3587 return false; 3588 } 3589 if (!validateConstantBusLimitations(Inst)) { 3590 Error(IDLoc, 3591 "invalid operand (violates constant bus restrictions)"); 3592 return false; 3593 } 3594 if (!validateEarlyClobberLimitations(Inst)) { 3595 Error(IDLoc, 3596 "destination must be different than all sources"); 3597 return false; 3598 } 3599 if (!validateIntClampSupported(Inst)) { 3600 Error(IDLoc, 3601 "integer clamping is not supported on this GPU"); 3602 return false; 3603 } 3604 if (!validateOpSel(Inst)) { 3605 Error(IDLoc, 3606 "invalid op_sel operand"); 3607 return false; 3608 } 3609 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3610 if (!validateMIMGD16(Inst)) { 3611 Error(IDLoc, 3612 "d16 modifier is not supported on this GPU"); 3613 return false; 3614 } 3615 if (!validateMIMGDim(Inst)) { 3616 Error(IDLoc, "dim modifier is required on this GPU"); 3617 return false; 3618 } 3619 if (!validateMIMGDataSize(Inst)) { 3620 Error(IDLoc, 3621 "image data size does not match dmask and tfe"); 3622 return false; 3623 } 3624 if (!validateMIMGAddrSize(Inst)) { 3625 Error(IDLoc, 3626 "image address size does not match dim and a16"); 3627 return false; 3628 } 3629 if (!validateMIMGAtomicDMask(Inst)) { 3630 Error(IDLoc, 3631 "invalid atomic image dmask"); 3632 return false; 3633 } 3634 if (!validateMIMGGatherDMask(Inst)) { 3635 Error(IDLoc, 3636 "invalid image_gather dmask: only one bit must be set"); 3637 return false; 3638 } 3639 if (!validateMovrels(Inst)) { 3640 Error(IDLoc, "source operand must be a VGPR"); 3641 return false; 3642 } 3643 if (!validateFlatOffset(Inst, Operands)) { 3644 return false; 3645 } 3646 if (!validateSMEMOffset(Inst, Operands)) { 3647 return false; 3648 } 3649 if (!validateMAIAccWrite(Inst)) { 3650 return false; 3651 } 3652 3653 return true; 3654 } 3655 3656 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3657 const FeatureBitset &FBS, 3658 unsigned VariantID = 0); 3659 3660 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3661 OperandVector &Operands, 3662 MCStreamer &Out, 3663 uint64_t &ErrorInfo, 3664 bool MatchingInlineAsm) { 3665 MCInst Inst; 3666 unsigned Result = Match_Success; 3667 for (auto Variant : getMatchedVariants()) { 3668 uint64_t EI; 3669 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3670 Variant); 3671 // We order match statuses from least to most specific. We use most specific 3672 // status as resulting 3673 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3674 if ((R == Match_Success) || 3675 (R == Match_PreferE32) || 3676 (R == Match_MissingFeature && Result != Match_PreferE32) || 3677 (R == Match_InvalidOperand && Result != Match_MissingFeature 3678 && Result != Match_PreferE32) || 3679 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3680 && Result != Match_MissingFeature 3681 && Result != Match_PreferE32)) { 3682 Result = R; 3683 ErrorInfo = EI; 3684 } 3685 if (R == Match_Success) 3686 break; 3687 } 3688 3689 switch (Result) { 3690 default: break; 3691 case Match_Success: 3692 if (!validateInstruction(Inst, IDLoc, Operands)) { 3693 return true; 3694 } 3695 Inst.setLoc(IDLoc); 3696 Out.emitInstruction(Inst, getSTI()); 3697 return false; 3698 3699 case Match_MissingFeature: 3700 return Error(IDLoc, "instruction not supported on this GPU"); 3701 3702 case Match_MnemonicFail: { 3703 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3704 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3705 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3706 return Error(IDLoc, "invalid instruction" + Suggestion, 3707 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3708 } 3709 3710 case Match_InvalidOperand: { 3711 SMLoc ErrorLoc = IDLoc; 3712 if (ErrorInfo != ~0ULL) { 3713 if (ErrorInfo >= Operands.size()) { 3714 return Error(IDLoc, "too few operands for instruction"); 3715 } 3716 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3717 if (ErrorLoc == SMLoc()) 3718 ErrorLoc = IDLoc; 3719 } 3720 return Error(ErrorLoc, "invalid operand for instruction"); 3721 } 3722 3723 case Match_PreferE32: 3724 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3725 "should be encoded as e32"); 3726 } 3727 llvm_unreachable("Implement any new match types added!"); 3728 } 3729 3730 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3731 int64_t Tmp = -1; 3732 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3733 return true; 3734 } 3735 if (getParser().parseAbsoluteExpression(Tmp)) { 3736 return true; 3737 } 3738 Ret = static_cast<uint32_t>(Tmp); 3739 return false; 3740 } 3741 3742 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3743 uint32_t &Minor) { 3744 if (ParseAsAbsoluteExpression(Major)) 3745 return TokError("invalid major version"); 3746 3747 if (getLexer().isNot(AsmToken::Comma)) 3748 return TokError("minor version number required, comma expected"); 3749 Lex(); 3750 3751 if (ParseAsAbsoluteExpression(Minor)) 3752 return TokError("invalid minor version"); 3753 3754 return false; 3755 } 3756 3757 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3758 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3759 return TokError("directive only supported for amdgcn architecture"); 3760 3761 std::string Target; 3762 3763 SMLoc TargetStart = getTok().getLoc(); 3764 if (getParser().parseEscapedString(Target)) 3765 return true; 3766 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3767 3768 std::string ExpectedTarget; 3769 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3770 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3771 3772 if (Target != ExpectedTargetOS.str()) 3773 return getParser().Error(TargetRange.Start, "target must match options", 3774 TargetRange); 3775 3776 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3777 return false; 3778 } 3779 3780 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3781 return getParser().Error(Range.Start, "value out of range", Range); 3782 } 3783 3784 bool AMDGPUAsmParser::calculateGPRBlocks( 3785 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3786 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3787 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3788 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3789 // TODO(scott.linder): These calculations are duplicated from 3790 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3791 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3792 3793 unsigned NumVGPRs = NextFreeVGPR; 3794 unsigned NumSGPRs = NextFreeSGPR; 3795 3796 if (Version.Major >= 10) 3797 NumSGPRs = 0; 3798 else { 3799 unsigned MaxAddressableNumSGPRs = 3800 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3801 3802 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3803 NumSGPRs > MaxAddressableNumSGPRs) 3804 return OutOfRangeError(SGPRRange); 3805 3806 NumSGPRs += 3807 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3808 3809 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3810 NumSGPRs > MaxAddressableNumSGPRs) 3811 return OutOfRangeError(SGPRRange); 3812 3813 if (Features.test(FeatureSGPRInitBug)) 3814 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3815 } 3816 3817 VGPRBlocks = 3818 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3819 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3820 3821 return false; 3822 } 3823 3824 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3825 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3826 return TokError("directive only supported for amdgcn architecture"); 3827 3828 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3829 return TokError("directive only supported for amdhsa OS"); 3830 3831 StringRef KernelName; 3832 if (getParser().parseIdentifier(KernelName)) 3833 return true; 3834 3835 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3836 3837 StringSet<> Seen; 3838 3839 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3840 3841 SMRange VGPRRange; 3842 uint64_t NextFreeVGPR = 0; 3843 SMRange SGPRRange; 3844 uint64_t NextFreeSGPR = 0; 3845 unsigned UserSGPRCount = 0; 3846 bool ReserveVCC = true; 3847 bool ReserveFlatScr = true; 3848 bool ReserveXNACK = hasXNACK(); 3849 Optional<bool> EnableWavefrontSize32; 3850 3851 while (true) { 3852 while (getLexer().is(AsmToken::EndOfStatement)) 3853 Lex(); 3854 3855 if (getLexer().isNot(AsmToken::Identifier)) 3856 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3857 3858 StringRef ID = getTok().getIdentifier(); 3859 SMRange IDRange = getTok().getLocRange(); 3860 Lex(); 3861 3862 if (ID == ".end_amdhsa_kernel") 3863 break; 3864 3865 if (Seen.find(ID) != Seen.end()) 3866 return TokError(".amdhsa_ directives cannot be repeated"); 3867 Seen.insert(ID); 3868 3869 SMLoc ValStart = getTok().getLoc(); 3870 int64_t IVal; 3871 if (getParser().parseAbsoluteExpression(IVal)) 3872 return true; 3873 SMLoc ValEnd = getTok().getLoc(); 3874 SMRange ValRange = SMRange(ValStart, ValEnd); 3875 3876 if (IVal < 0) 3877 return OutOfRangeError(ValRange); 3878 3879 uint64_t Val = IVal; 3880 3881 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3882 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3883 return OutOfRangeError(RANGE); \ 3884 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3885 3886 if (ID == ".amdhsa_group_segment_fixed_size") { 3887 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3888 return OutOfRangeError(ValRange); 3889 KD.group_segment_fixed_size = Val; 3890 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3891 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3892 return OutOfRangeError(ValRange); 3893 KD.private_segment_fixed_size = Val; 3894 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3895 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3896 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3897 Val, ValRange); 3898 if (Val) 3899 UserSGPRCount += 4; 3900 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3901 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3902 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3903 ValRange); 3904 if (Val) 3905 UserSGPRCount += 2; 3906 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3907 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3908 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3909 ValRange); 3910 if (Val) 3911 UserSGPRCount += 2; 3912 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3913 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3914 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3915 Val, ValRange); 3916 if (Val) 3917 UserSGPRCount += 2; 3918 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3919 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3920 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3921 ValRange); 3922 if (Val) 3923 UserSGPRCount += 2; 3924 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3925 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3926 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3927 ValRange); 3928 if (Val) 3929 UserSGPRCount += 2; 3930 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3931 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3932 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3933 Val, ValRange); 3934 if (Val) 3935 UserSGPRCount += 1; 3936 } else if (ID == ".amdhsa_wavefront_size32") { 3937 if (IVersion.Major < 10) 3938 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3939 IDRange); 3940 EnableWavefrontSize32 = Val; 3941 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3942 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3943 Val, ValRange); 3944 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3945 PARSE_BITS_ENTRY( 3946 KD.compute_pgm_rsrc2, 3947 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3948 ValRange); 3949 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3950 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3951 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3952 ValRange); 3953 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3954 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3955 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3956 ValRange); 3957 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3959 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3960 ValRange); 3961 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3963 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3964 ValRange); 3965 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3966 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3967 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3968 ValRange); 3969 } else if (ID == ".amdhsa_next_free_vgpr") { 3970 VGPRRange = ValRange; 3971 NextFreeVGPR = Val; 3972 } else if (ID == ".amdhsa_next_free_sgpr") { 3973 SGPRRange = ValRange; 3974 NextFreeSGPR = Val; 3975 } else if (ID == ".amdhsa_reserve_vcc") { 3976 if (!isUInt<1>(Val)) 3977 return OutOfRangeError(ValRange); 3978 ReserveVCC = Val; 3979 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3980 if (IVersion.Major < 7) 3981 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3982 IDRange); 3983 if (!isUInt<1>(Val)) 3984 return OutOfRangeError(ValRange); 3985 ReserveFlatScr = Val; 3986 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3987 if (IVersion.Major < 8) 3988 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3989 IDRange); 3990 if (!isUInt<1>(Val)) 3991 return OutOfRangeError(ValRange); 3992 ReserveXNACK = Val; 3993 } else if (ID == ".amdhsa_float_round_mode_32") { 3994 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3995 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3996 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3997 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3998 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3999 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4000 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4001 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4002 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4003 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4004 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4005 ValRange); 4006 } else if (ID == ".amdhsa_dx10_clamp") { 4007 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4008 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4009 } else if (ID == ".amdhsa_ieee_mode") { 4010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4011 Val, ValRange); 4012 } else if (ID == ".amdhsa_fp16_overflow") { 4013 if (IVersion.Major < 9) 4014 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4015 IDRange); 4016 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4017 ValRange); 4018 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4019 if (IVersion.Major < 10) 4020 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4021 IDRange); 4022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4023 ValRange); 4024 } else if (ID == ".amdhsa_memory_ordered") { 4025 if (IVersion.Major < 10) 4026 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4027 IDRange); 4028 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4029 ValRange); 4030 } else if (ID == ".amdhsa_forward_progress") { 4031 if (IVersion.Major < 10) 4032 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4033 IDRange); 4034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4035 ValRange); 4036 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4037 PARSE_BITS_ENTRY( 4038 KD.compute_pgm_rsrc2, 4039 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4040 ValRange); 4041 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4043 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4044 Val, ValRange); 4045 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4046 PARSE_BITS_ENTRY( 4047 KD.compute_pgm_rsrc2, 4048 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4049 ValRange); 4050 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4051 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4052 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4053 Val, ValRange); 4054 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4056 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4057 Val, ValRange); 4058 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4059 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4060 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4061 Val, ValRange); 4062 } else if (ID == ".amdhsa_exception_int_div_zero") { 4063 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4064 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4065 Val, ValRange); 4066 } else { 4067 return getParser().Error(IDRange.Start, 4068 "unknown .amdhsa_kernel directive", IDRange); 4069 } 4070 4071 #undef PARSE_BITS_ENTRY 4072 } 4073 4074 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4075 return TokError(".amdhsa_next_free_vgpr directive is required"); 4076 4077 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4078 return TokError(".amdhsa_next_free_sgpr directive is required"); 4079 4080 unsigned VGPRBlocks; 4081 unsigned SGPRBlocks; 4082 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4083 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4084 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4085 SGPRBlocks)) 4086 return true; 4087 4088 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4089 VGPRBlocks)) 4090 return OutOfRangeError(VGPRRange); 4091 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4092 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4093 4094 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4095 SGPRBlocks)) 4096 return OutOfRangeError(SGPRRange); 4097 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4098 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4099 SGPRBlocks); 4100 4101 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4102 return TokError("too many user SGPRs enabled"); 4103 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4104 UserSGPRCount); 4105 4106 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4107 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4108 ReserveFlatScr, ReserveXNACK); 4109 return false; 4110 } 4111 4112 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4113 uint32_t Major; 4114 uint32_t Minor; 4115 4116 if (ParseDirectiveMajorMinor(Major, Minor)) 4117 return true; 4118 4119 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4120 return false; 4121 } 4122 4123 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4124 uint32_t Major; 4125 uint32_t Minor; 4126 uint32_t Stepping; 4127 StringRef VendorName; 4128 StringRef ArchName; 4129 4130 // If this directive has no arguments, then use the ISA version for the 4131 // targeted GPU. 4132 if (getLexer().is(AsmToken::EndOfStatement)) { 4133 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4134 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4135 ISA.Stepping, 4136 "AMD", "AMDGPU"); 4137 return false; 4138 } 4139 4140 if (ParseDirectiveMajorMinor(Major, Minor)) 4141 return true; 4142 4143 if (getLexer().isNot(AsmToken::Comma)) 4144 return TokError("stepping version number required, comma expected"); 4145 Lex(); 4146 4147 if (ParseAsAbsoluteExpression(Stepping)) 4148 return TokError("invalid stepping version"); 4149 4150 if (getLexer().isNot(AsmToken::Comma)) 4151 return TokError("vendor name required, comma expected"); 4152 Lex(); 4153 4154 if (getLexer().isNot(AsmToken::String)) 4155 return TokError("invalid vendor name"); 4156 4157 VendorName = getLexer().getTok().getStringContents(); 4158 Lex(); 4159 4160 if (getLexer().isNot(AsmToken::Comma)) 4161 return TokError("arch name required, comma expected"); 4162 Lex(); 4163 4164 if (getLexer().isNot(AsmToken::String)) 4165 return TokError("invalid arch name"); 4166 4167 ArchName = getLexer().getTok().getStringContents(); 4168 Lex(); 4169 4170 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4171 VendorName, ArchName); 4172 return false; 4173 } 4174 4175 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4176 amd_kernel_code_t &Header) { 4177 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4178 // assembly for backwards compatibility. 4179 if (ID == "max_scratch_backing_memory_byte_size") { 4180 Parser.eatToEndOfStatement(); 4181 return false; 4182 } 4183 4184 SmallString<40> ErrStr; 4185 raw_svector_ostream Err(ErrStr); 4186 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4187 return TokError(Err.str()); 4188 } 4189 Lex(); 4190 4191 if (ID == "enable_wavefront_size32") { 4192 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4193 if (!isGFX10()) 4194 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4195 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4196 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4197 } else { 4198 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4199 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4200 } 4201 } 4202 4203 if (ID == "wavefront_size") { 4204 if (Header.wavefront_size == 5) { 4205 if (!isGFX10()) 4206 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4207 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4208 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4209 } else if (Header.wavefront_size == 6) { 4210 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4211 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4212 } 4213 } 4214 4215 if (ID == "enable_wgp_mode") { 4216 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4217 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4218 } 4219 4220 if (ID == "enable_mem_ordered") { 4221 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4222 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4223 } 4224 4225 if (ID == "enable_fwd_progress") { 4226 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4227 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4228 } 4229 4230 return false; 4231 } 4232 4233 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4234 amd_kernel_code_t Header; 4235 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4236 4237 while (true) { 4238 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4239 // will set the current token to EndOfStatement. 4240 while(getLexer().is(AsmToken::EndOfStatement)) 4241 Lex(); 4242 4243 if (getLexer().isNot(AsmToken::Identifier)) 4244 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4245 4246 StringRef ID = getLexer().getTok().getIdentifier(); 4247 Lex(); 4248 4249 if (ID == ".end_amd_kernel_code_t") 4250 break; 4251 4252 if (ParseAMDKernelCodeTValue(ID, Header)) 4253 return true; 4254 } 4255 4256 getTargetStreamer().EmitAMDKernelCodeT(Header); 4257 4258 return false; 4259 } 4260 4261 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4262 if (getLexer().isNot(AsmToken::Identifier)) 4263 return TokError("expected symbol name"); 4264 4265 StringRef KernelName = Parser.getTok().getString(); 4266 4267 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4268 ELF::STT_AMDGPU_HSA_KERNEL); 4269 Lex(); 4270 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4271 KernelScope.initialize(getContext()); 4272 return false; 4273 } 4274 4275 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4276 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4277 return Error(getParser().getTok().getLoc(), 4278 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4279 "architectures"); 4280 } 4281 4282 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4283 4284 std::string ISAVersionStringFromSTI; 4285 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4286 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4287 4288 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4289 return Error(getParser().getTok().getLoc(), 4290 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4291 "arguments specified through the command line"); 4292 } 4293 4294 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4295 Lex(); 4296 4297 return false; 4298 } 4299 4300 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4301 const char *AssemblerDirectiveBegin; 4302 const char *AssemblerDirectiveEnd; 4303 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4304 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4305 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4306 HSAMD::V3::AssemblerDirectiveEnd) 4307 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4308 HSAMD::AssemblerDirectiveEnd); 4309 4310 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4311 return Error(getParser().getTok().getLoc(), 4312 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4313 "not available on non-amdhsa OSes")).str()); 4314 } 4315 4316 std::string HSAMetadataString; 4317 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4318 HSAMetadataString)) 4319 return true; 4320 4321 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4322 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4323 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4324 } else { 4325 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4326 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4327 } 4328 4329 return false; 4330 } 4331 4332 /// Common code to parse out a block of text (typically YAML) between start and 4333 /// end directives. 4334 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4335 const char *AssemblerDirectiveEnd, 4336 std::string &CollectString) { 4337 4338 raw_string_ostream CollectStream(CollectString); 4339 4340 getLexer().setSkipSpace(false); 4341 4342 bool FoundEnd = false; 4343 while (!getLexer().is(AsmToken::Eof)) { 4344 while (getLexer().is(AsmToken::Space)) { 4345 CollectStream << getLexer().getTok().getString(); 4346 Lex(); 4347 } 4348 4349 if (getLexer().is(AsmToken::Identifier)) { 4350 StringRef ID = getLexer().getTok().getIdentifier(); 4351 if (ID == AssemblerDirectiveEnd) { 4352 Lex(); 4353 FoundEnd = true; 4354 break; 4355 } 4356 } 4357 4358 CollectStream << Parser.parseStringToEndOfStatement() 4359 << getContext().getAsmInfo()->getSeparatorString(); 4360 4361 Parser.eatToEndOfStatement(); 4362 } 4363 4364 getLexer().setSkipSpace(true); 4365 4366 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4367 return TokError(Twine("expected directive ") + 4368 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4369 } 4370 4371 CollectStream.flush(); 4372 return false; 4373 } 4374 4375 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4376 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4377 std::string String; 4378 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4379 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4380 return true; 4381 4382 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4383 if (!PALMetadata->setFromString(String)) 4384 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4385 return false; 4386 } 4387 4388 /// Parse the assembler directive for old linear-format PAL metadata. 4389 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4390 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4391 return Error(getParser().getTok().getLoc(), 4392 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4393 "not available on non-amdpal OSes")).str()); 4394 } 4395 4396 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4397 PALMetadata->setLegacy(); 4398 for (;;) { 4399 uint32_t Key, Value; 4400 if (ParseAsAbsoluteExpression(Key)) { 4401 return TokError(Twine("invalid value in ") + 4402 Twine(PALMD::AssemblerDirective)); 4403 } 4404 if (getLexer().isNot(AsmToken::Comma)) { 4405 return TokError(Twine("expected an even number of values in ") + 4406 Twine(PALMD::AssemblerDirective)); 4407 } 4408 Lex(); 4409 if (ParseAsAbsoluteExpression(Value)) { 4410 return TokError(Twine("invalid value in ") + 4411 Twine(PALMD::AssemblerDirective)); 4412 } 4413 PALMetadata->setRegister(Key, Value); 4414 if (getLexer().isNot(AsmToken::Comma)) 4415 break; 4416 Lex(); 4417 } 4418 return false; 4419 } 4420 4421 /// ParseDirectiveAMDGPULDS 4422 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4423 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4424 if (getParser().checkForValidSection()) 4425 return true; 4426 4427 StringRef Name; 4428 SMLoc NameLoc = getLexer().getLoc(); 4429 if (getParser().parseIdentifier(Name)) 4430 return TokError("expected identifier in directive"); 4431 4432 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4433 if (parseToken(AsmToken::Comma, "expected ','")) 4434 return true; 4435 4436 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4437 4438 int64_t Size; 4439 SMLoc SizeLoc = getLexer().getLoc(); 4440 if (getParser().parseAbsoluteExpression(Size)) 4441 return true; 4442 if (Size < 0) 4443 return Error(SizeLoc, "size must be non-negative"); 4444 if (Size > LocalMemorySize) 4445 return Error(SizeLoc, "size is too large"); 4446 4447 int64_t Align = 4; 4448 if (getLexer().is(AsmToken::Comma)) { 4449 Lex(); 4450 SMLoc AlignLoc = getLexer().getLoc(); 4451 if (getParser().parseAbsoluteExpression(Align)) 4452 return true; 4453 if (Align < 0 || !isPowerOf2_64(Align)) 4454 return Error(AlignLoc, "alignment must be a power of two"); 4455 4456 // Alignment larger than the size of LDS is possible in theory, as long 4457 // as the linker manages to place to symbol at address 0, but we do want 4458 // to make sure the alignment fits nicely into a 32-bit integer. 4459 if (Align >= 1u << 31) 4460 return Error(AlignLoc, "alignment is too large"); 4461 } 4462 4463 if (parseToken(AsmToken::EndOfStatement, 4464 "unexpected token in '.amdgpu_lds' directive")) 4465 return true; 4466 4467 Symbol->redefineIfPossible(); 4468 if (!Symbol->isUndefined()) 4469 return Error(NameLoc, "invalid symbol redefinition"); 4470 4471 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4472 return false; 4473 } 4474 4475 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4476 StringRef IDVal = DirectiveID.getString(); 4477 4478 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4479 if (IDVal == ".amdgcn_target") 4480 return ParseDirectiveAMDGCNTarget(); 4481 4482 if (IDVal == ".amdhsa_kernel") 4483 return ParseDirectiveAMDHSAKernel(); 4484 4485 // TODO: Restructure/combine with PAL metadata directive. 4486 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4487 return ParseDirectiveHSAMetadata(); 4488 } else { 4489 if (IDVal == ".hsa_code_object_version") 4490 return ParseDirectiveHSACodeObjectVersion(); 4491 4492 if (IDVal == ".hsa_code_object_isa") 4493 return ParseDirectiveHSACodeObjectISA(); 4494 4495 if (IDVal == ".amd_kernel_code_t") 4496 return ParseDirectiveAMDKernelCodeT(); 4497 4498 if (IDVal == ".amdgpu_hsa_kernel") 4499 return ParseDirectiveAMDGPUHsaKernel(); 4500 4501 if (IDVal == ".amd_amdgpu_isa") 4502 return ParseDirectiveISAVersion(); 4503 4504 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4505 return ParseDirectiveHSAMetadata(); 4506 } 4507 4508 if (IDVal == ".amdgpu_lds") 4509 return ParseDirectiveAMDGPULDS(); 4510 4511 if (IDVal == PALMD::AssemblerDirectiveBegin) 4512 return ParseDirectivePALMetadataBegin(); 4513 4514 if (IDVal == PALMD::AssemblerDirective) 4515 return ParseDirectivePALMetadata(); 4516 4517 return true; 4518 } 4519 4520 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4521 unsigned RegNo) const { 4522 4523 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4524 R.isValid(); ++R) { 4525 if (*R == RegNo) 4526 return isGFX9() || isGFX10(); 4527 } 4528 4529 // GFX10 has 2 more SGPRs 104 and 105. 4530 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4531 R.isValid(); ++R) { 4532 if (*R == RegNo) 4533 return hasSGPR104_SGPR105(); 4534 } 4535 4536 switch (RegNo) { 4537 case AMDGPU::SRC_SHARED_BASE: 4538 case AMDGPU::SRC_SHARED_LIMIT: 4539 case AMDGPU::SRC_PRIVATE_BASE: 4540 case AMDGPU::SRC_PRIVATE_LIMIT: 4541 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4542 return !isCI() && !isSI() && !isVI(); 4543 case AMDGPU::TBA: 4544 case AMDGPU::TBA_LO: 4545 case AMDGPU::TBA_HI: 4546 case AMDGPU::TMA: 4547 case AMDGPU::TMA_LO: 4548 case AMDGPU::TMA_HI: 4549 return !isGFX9() && !isGFX10(); 4550 case AMDGPU::XNACK_MASK: 4551 case AMDGPU::XNACK_MASK_LO: 4552 case AMDGPU::XNACK_MASK_HI: 4553 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4554 case AMDGPU::SGPR_NULL: 4555 return isGFX10(); 4556 default: 4557 break; 4558 } 4559 4560 if (isCI()) 4561 return true; 4562 4563 if (isSI() || isGFX10()) { 4564 // No flat_scr on SI. 4565 // On GFX10 flat scratch is not a valid register operand and can only be 4566 // accessed with s_setreg/s_getreg. 4567 switch (RegNo) { 4568 case AMDGPU::FLAT_SCR: 4569 case AMDGPU::FLAT_SCR_LO: 4570 case AMDGPU::FLAT_SCR_HI: 4571 return false; 4572 default: 4573 return true; 4574 } 4575 } 4576 4577 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4578 // SI/CI have. 4579 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4580 R.isValid(); ++R) { 4581 if (*R == RegNo) 4582 return hasSGPR102_SGPR103(); 4583 } 4584 4585 return true; 4586 } 4587 4588 OperandMatchResultTy 4589 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4590 OperandMode Mode) { 4591 // Try to parse with a custom parser 4592 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4593 4594 // If we successfully parsed the operand or if there as an error parsing, 4595 // we are done. 4596 // 4597 // If we are parsing after we reach EndOfStatement then this means we 4598 // are appending default values to the Operands list. This is only done 4599 // by custom parser, so we shouldn't continue on to the generic parsing. 4600 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4601 getLexer().is(AsmToken::EndOfStatement)) 4602 return ResTy; 4603 4604 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4605 unsigned Prefix = Operands.size(); 4606 SMLoc LBraceLoc = getTok().getLoc(); 4607 Parser.Lex(); // eat the '[' 4608 4609 for (;;) { 4610 ResTy = parseReg(Operands); 4611 if (ResTy != MatchOperand_Success) 4612 return ResTy; 4613 4614 if (getLexer().is(AsmToken::RBrac)) 4615 break; 4616 4617 if (getLexer().isNot(AsmToken::Comma)) 4618 return MatchOperand_ParseFail; 4619 Parser.Lex(); 4620 } 4621 4622 if (Operands.size() - Prefix > 1) { 4623 Operands.insert(Operands.begin() + Prefix, 4624 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4625 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4626 getTok().getLoc())); 4627 } 4628 4629 Parser.Lex(); // eat the ']' 4630 return MatchOperand_Success; 4631 } 4632 4633 return parseRegOrImm(Operands); 4634 } 4635 4636 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4637 // Clear any forced encodings from the previous instruction. 4638 setForcedEncodingSize(0); 4639 setForcedDPP(false); 4640 setForcedSDWA(false); 4641 4642 if (Name.endswith("_e64")) { 4643 setForcedEncodingSize(64); 4644 return Name.substr(0, Name.size() - 4); 4645 } else if (Name.endswith("_e32")) { 4646 setForcedEncodingSize(32); 4647 return Name.substr(0, Name.size() - 4); 4648 } else if (Name.endswith("_dpp")) { 4649 setForcedDPP(true); 4650 return Name.substr(0, Name.size() - 4); 4651 } else if (Name.endswith("_sdwa")) { 4652 setForcedSDWA(true); 4653 return Name.substr(0, Name.size() - 5); 4654 } 4655 return Name; 4656 } 4657 4658 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4659 StringRef Name, 4660 SMLoc NameLoc, OperandVector &Operands) { 4661 // Add the instruction mnemonic 4662 Name = parseMnemonicSuffix(Name); 4663 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4664 4665 bool IsMIMG = Name.startswith("image_"); 4666 4667 while (!getLexer().is(AsmToken::EndOfStatement)) { 4668 OperandMode Mode = OperandMode_Default; 4669 if (IsMIMG && isGFX10() && Operands.size() == 2) 4670 Mode = OperandMode_NSA; 4671 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4672 4673 // Eat the comma or space if there is one. 4674 if (getLexer().is(AsmToken::Comma)) 4675 Parser.Lex(); 4676 4677 switch (Res) { 4678 case MatchOperand_Success: break; 4679 case MatchOperand_ParseFail: 4680 // FIXME: use real operand location rather than the current location. 4681 Error(getLexer().getLoc(), "failed parsing operand."); 4682 while (!getLexer().is(AsmToken::EndOfStatement)) { 4683 Parser.Lex(); 4684 } 4685 return true; 4686 case MatchOperand_NoMatch: 4687 // FIXME: use real operand location rather than the current location. 4688 Error(getLexer().getLoc(), "not a valid operand."); 4689 while (!getLexer().is(AsmToken::EndOfStatement)) { 4690 Parser.Lex(); 4691 } 4692 return true; 4693 } 4694 } 4695 4696 return false; 4697 } 4698 4699 //===----------------------------------------------------------------------===// 4700 // Utility functions 4701 //===----------------------------------------------------------------------===// 4702 4703 OperandMatchResultTy 4704 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4705 4706 if (!trySkipId(Prefix, AsmToken::Colon)) 4707 return MatchOperand_NoMatch; 4708 4709 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4710 } 4711 4712 OperandMatchResultTy 4713 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4714 AMDGPUOperand::ImmTy ImmTy, 4715 bool (*ConvertResult)(int64_t&)) { 4716 SMLoc S = getLoc(); 4717 int64_t Value = 0; 4718 4719 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4720 if (Res != MatchOperand_Success) 4721 return Res; 4722 4723 if (ConvertResult && !ConvertResult(Value)) { 4724 Error(S, "invalid " + StringRef(Prefix) + " value."); 4725 } 4726 4727 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4728 return MatchOperand_Success; 4729 } 4730 4731 OperandMatchResultTy 4732 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4733 OperandVector &Operands, 4734 AMDGPUOperand::ImmTy ImmTy, 4735 bool (*ConvertResult)(int64_t&)) { 4736 SMLoc S = getLoc(); 4737 if (!trySkipId(Prefix, AsmToken::Colon)) 4738 return MatchOperand_NoMatch; 4739 4740 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4741 return MatchOperand_ParseFail; 4742 4743 unsigned Val = 0; 4744 const unsigned MaxSize = 4; 4745 4746 // FIXME: How to verify the number of elements matches the number of src 4747 // operands? 4748 for (int I = 0; ; ++I) { 4749 int64_t Op; 4750 SMLoc Loc = getLoc(); 4751 if (!parseExpr(Op)) 4752 return MatchOperand_ParseFail; 4753 4754 if (Op != 0 && Op != 1) { 4755 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4756 return MatchOperand_ParseFail; 4757 } 4758 4759 Val |= (Op << I); 4760 4761 if (trySkipToken(AsmToken::RBrac)) 4762 break; 4763 4764 if (I + 1 == MaxSize) { 4765 Error(getLoc(), "expected a closing square bracket"); 4766 return MatchOperand_ParseFail; 4767 } 4768 4769 if (!skipToken(AsmToken::Comma, "expected a comma")) 4770 return MatchOperand_ParseFail; 4771 } 4772 4773 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4774 return MatchOperand_Success; 4775 } 4776 4777 OperandMatchResultTy 4778 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4779 AMDGPUOperand::ImmTy ImmTy) { 4780 int64_t Bit = 0; 4781 SMLoc S = Parser.getTok().getLoc(); 4782 4783 // We are at the end of the statement, and this is a default argument, so 4784 // use a default value. 4785 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4786 switch(getLexer().getKind()) { 4787 case AsmToken::Identifier: { 4788 StringRef Tok = Parser.getTok().getString(); 4789 if (Tok == Name) { 4790 if (Tok == "r128" && !hasMIMG_R128()) 4791 Error(S, "r128 modifier is not supported on this GPU"); 4792 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4793 Error(S, "a16 modifier is not supported on this GPU"); 4794 Bit = 1; 4795 Parser.Lex(); 4796 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4797 Bit = 0; 4798 Parser.Lex(); 4799 } else { 4800 return MatchOperand_NoMatch; 4801 } 4802 break; 4803 } 4804 default: 4805 return MatchOperand_NoMatch; 4806 } 4807 } 4808 4809 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4810 return MatchOperand_ParseFail; 4811 4812 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4813 ImmTy = AMDGPUOperand::ImmTyR128A16; 4814 4815 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4816 return MatchOperand_Success; 4817 } 4818 4819 static void addOptionalImmOperand( 4820 MCInst& Inst, const OperandVector& Operands, 4821 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4822 AMDGPUOperand::ImmTy ImmT, 4823 int64_t Default = 0) { 4824 auto i = OptionalIdx.find(ImmT); 4825 if (i != OptionalIdx.end()) { 4826 unsigned Idx = i->second; 4827 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4828 } else { 4829 Inst.addOperand(MCOperand::createImm(Default)); 4830 } 4831 } 4832 4833 OperandMatchResultTy 4834 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4835 if (getLexer().isNot(AsmToken::Identifier)) { 4836 return MatchOperand_NoMatch; 4837 } 4838 StringRef Tok = Parser.getTok().getString(); 4839 if (Tok != Prefix) { 4840 return MatchOperand_NoMatch; 4841 } 4842 4843 Parser.Lex(); 4844 if (getLexer().isNot(AsmToken::Colon)) { 4845 return MatchOperand_ParseFail; 4846 } 4847 4848 Parser.Lex(); 4849 if (getLexer().isNot(AsmToken::Identifier)) { 4850 return MatchOperand_ParseFail; 4851 } 4852 4853 Value = Parser.getTok().getString(); 4854 return MatchOperand_Success; 4855 } 4856 4857 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4858 // values to live in a joint format operand in the MCInst encoding. 4859 OperandMatchResultTy 4860 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4861 SMLoc S = Parser.getTok().getLoc(); 4862 int64_t Dfmt = 0, Nfmt = 0; 4863 // dfmt and nfmt can appear in either order, and each is optional. 4864 bool GotDfmt = false, GotNfmt = false; 4865 while (!GotDfmt || !GotNfmt) { 4866 if (!GotDfmt) { 4867 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4868 if (Res != MatchOperand_NoMatch) { 4869 if (Res != MatchOperand_Success) 4870 return Res; 4871 if (Dfmt >= 16) { 4872 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4873 return MatchOperand_ParseFail; 4874 } 4875 GotDfmt = true; 4876 Parser.Lex(); 4877 continue; 4878 } 4879 } 4880 if (!GotNfmt) { 4881 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4882 if (Res != MatchOperand_NoMatch) { 4883 if (Res != MatchOperand_Success) 4884 return Res; 4885 if (Nfmt >= 8) { 4886 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4887 return MatchOperand_ParseFail; 4888 } 4889 GotNfmt = true; 4890 Parser.Lex(); 4891 continue; 4892 } 4893 } 4894 break; 4895 } 4896 if (!GotDfmt && !GotNfmt) 4897 return MatchOperand_NoMatch; 4898 auto Format = Dfmt | Nfmt << 4; 4899 Operands.push_back( 4900 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4901 return MatchOperand_Success; 4902 } 4903 4904 //===----------------------------------------------------------------------===// 4905 // ds 4906 //===----------------------------------------------------------------------===// 4907 4908 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4909 const OperandVector &Operands) { 4910 OptionalImmIndexMap OptionalIdx; 4911 4912 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4913 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4914 4915 // Add the register arguments 4916 if (Op.isReg()) { 4917 Op.addRegOperands(Inst, 1); 4918 continue; 4919 } 4920 4921 // Handle optional arguments 4922 OptionalIdx[Op.getImmTy()] = i; 4923 } 4924 4925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4926 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4927 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4928 4929 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4930 } 4931 4932 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4933 bool IsGdsHardcoded) { 4934 OptionalImmIndexMap OptionalIdx; 4935 4936 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4937 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4938 4939 // Add the register arguments 4940 if (Op.isReg()) { 4941 Op.addRegOperands(Inst, 1); 4942 continue; 4943 } 4944 4945 if (Op.isToken() && Op.getToken() == "gds") { 4946 IsGdsHardcoded = true; 4947 continue; 4948 } 4949 4950 // Handle optional arguments 4951 OptionalIdx[Op.getImmTy()] = i; 4952 } 4953 4954 AMDGPUOperand::ImmTy OffsetType = 4955 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4956 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4957 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4958 AMDGPUOperand::ImmTyOffset; 4959 4960 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4961 4962 if (!IsGdsHardcoded) { 4963 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4964 } 4965 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4966 } 4967 4968 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4969 OptionalImmIndexMap OptionalIdx; 4970 4971 unsigned OperandIdx[4]; 4972 unsigned EnMask = 0; 4973 int SrcIdx = 0; 4974 4975 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4976 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4977 4978 // Add the register arguments 4979 if (Op.isReg()) { 4980 assert(SrcIdx < 4); 4981 OperandIdx[SrcIdx] = Inst.size(); 4982 Op.addRegOperands(Inst, 1); 4983 ++SrcIdx; 4984 continue; 4985 } 4986 4987 if (Op.isOff()) { 4988 assert(SrcIdx < 4); 4989 OperandIdx[SrcIdx] = Inst.size(); 4990 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4991 ++SrcIdx; 4992 continue; 4993 } 4994 4995 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4996 Op.addImmOperands(Inst, 1); 4997 continue; 4998 } 4999 5000 if (Op.isToken() && Op.getToken() == "done") 5001 continue; 5002 5003 // Handle optional arguments 5004 OptionalIdx[Op.getImmTy()] = i; 5005 } 5006 5007 assert(SrcIdx == 4); 5008 5009 bool Compr = false; 5010 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5011 Compr = true; 5012 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5013 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5014 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5015 } 5016 5017 for (auto i = 0; i < SrcIdx; ++i) { 5018 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5019 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5020 } 5021 } 5022 5023 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5024 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5025 5026 Inst.addOperand(MCOperand::createImm(EnMask)); 5027 } 5028 5029 //===----------------------------------------------------------------------===// 5030 // s_waitcnt 5031 //===----------------------------------------------------------------------===// 5032 5033 static bool 5034 encodeCnt( 5035 const AMDGPU::IsaVersion ISA, 5036 int64_t &IntVal, 5037 int64_t CntVal, 5038 bool Saturate, 5039 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5040 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5041 { 5042 bool Failed = false; 5043 5044 IntVal = encode(ISA, IntVal, CntVal); 5045 if (CntVal != decode(ISA, IntVal)) { 5046 if (Saturate) { 5047 IntVal = encode(ISA, IntVal, -1); 5048 } else { 5049 Failed = true; 5050 } 5051 } 5052 return Failed; 5053 } 5054 5055 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5056 5057 SMLoc CntLoc = getLoc(); 5058 StringRef CntName = getTokenStr(); 5059 5060 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5061 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5062 return false; 5063 5064 int64_t CntVal; 5065 SMLoc ValLoc = getLoc(); 5066 if (!parseExpr(CntVal)) 5067 return false; 5068 5069 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5070 5071 bool Failed = true; 5072 bool Sat = CntName.endswith("_sat"); 5073 5074 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5075 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5076 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5077 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5078 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5079 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5080 } else { 5081 Error(CntLoc, "invalid counter name " + CntName); 5082 return false; 5083 } 5084 5085 if (Failed) { 5086 Error(ValLoc, "too large value for " + CntName); 5087 return false; 5088 } 5089 5090 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5091 return false; 5092 5093 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5094 if (isToken(AsmToken::EndOfStatement)) { 5095 Error(getLoc(), "expected a counter name"); 5096 return false; 5097 } 5098 } 5099 5100 return true; 5101 } 5102 5103 OperandMatchResultTy 5104 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5105 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5106 int64_t Waitcnt = getWaitcntBitMask(ISA); 5107 SMLoc S = getLoc(); 5108 5109 // If parse failed, do not return error code 5110 // to avoid excessive error messages. 5111 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5112 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5113 } else { 5114 parseExpr(Waitcnt); 5115 } 5116 5117 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5118 return MatchOperand_Success; 5119 } 5120 5121 bool 5122 AMDGPUOperand::isSWaitCnt() const { 5123 return isImm(); 5124 } 5125 5126 //===----------------------------------------------------------------------===// 5127 // hwreg 5128 //===----------------------------------------------------------------------===// 5129 5130 bool 5131 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5132 int64_t &Offset, 5133 int64_t &Width) { 5134 using namespace llvm::AMDGPU::Hwreg; 5135 5136 // The register may be specified by name or using a numeric code 5137 if (isToken(AsmToken::Identifier) && 5138 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5139 HwReg.IsSymbolic = true; 5140 lex(); // skip message name 5141 } else if (!parseExpr(HwReg.Id)) { 5142 return false; 5143 } 5144 5145 if (trySkipToken(AsmToken::RParen)) 5146 return true; 5147 5148 // parse optional params 5149 return 5150 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5151 parseExpr(Offset) && 5152 skipToken(AsmToken::Comma, "expected a comma") && 5153 parseExpr(Width) && 5154 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5155 } 5156 5157 bool 5158 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5159 const int64_t Offset, 5160 const int64_t Width, 5161 const SMLoc Loc) { 5162 5163 using namespace llvm::AMDGPU::Hwreg; 5164 5165 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5166 Error(Loc, "specified hardware register is not supported on this GPU"); 5167 return false; 5168 } else if (!isValidHwreg(HwReg.Id)) { 5169 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5170 return false; 5171 } else if (!isValidHwregOffset(Offset)) { 5172 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5173 return false; 5174 } else if (!isValidHwregWidth(Width)) { 5175 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5176 return false; 5177 } 5178 return true; 5179 } 5180 5181 OperandMatchResultTy 5182 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5183 using namespace llvm::AMDGPU::Hwreg; 5184 5185 int64_t ImmVal = 0; 5186 SMLoc Loc = getLoc(); 5187 5188 // If parse failed, do not return error code 5189 // to avoid excessive error messages. 5190 if (trySkipId("hwreg", AsmToken::LParen)) { 5191 OperandInfoTy HwReg(ID_UNKNOWN_); 5192 int64_t Offset = OFFSET_DEFAULT_; 5193 int64_t Width = WIDTH_DEFAULT_; 5194 if (parseHwregBody(HwReg, Offset, Width) && 5195 validateHwreg(HwReg, Offset, Width, Loc)) { 5196 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5197 } 5198 } else if (parseExpr(ImmVal)) { 5199 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5200 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5201 } 5202 5203 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5204 return MatchOperand_Success; 5205 } 5206 5207 bool AMDGPUOperand::isHwreg() const { 5208 return isImmTy(ImmTyHwreg); 5209 } 5210 5211 //===----------------------------------------------------------------------===// 5212 // sendmsg 5213 //===----------------------------------------------------------------------===// 5214 5215 bool 5216 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5217 OperandInfoTy &Op, 5218 OperandInfoTy &Stream) { 5219 using namespace llvm::AMDGPU::SendMsg; 5220 5221 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5222 Msg.IsSymbolic = true; 5223 lex(); // skip message name 5224 } else if (!parseExpr(Msg.Id)) { 5225 return false; 5226 } 5227 5228 if (trySkipToken(AsmToken::Comma)) { 5229 Op.IsDefined = true; 5230 if (isToken(AsmToken::Identifier) && 5231 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5232 lex(); // skip operation name 5233 } else if (!parseExpr(Op.Id)) { 5234 return false; 5235 } 5236 5237 if (trySkipToken(AsmToken::Comma)) { 5238 Stream.IsDefined = true; 5239 if (!parseExpr(Stream.Id)) 5240 return false; 5241 } 5242 } 5243 5244 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5245 } 5246 5247 bool 5248 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5249 const OperandInfoTy &Op, 5250 const OperandInfoTy &Stream, 5251 const SMLoc S) { 5252 using namespace llvm::AMDGPU::SendMsg; 5253 5254 // Validation strictness depends on whether message is specified 5255 // in a symbolc or in a numeric form. In the latter case 5256 // only encoding possibility is checked. 5257 bool Strict = Msg.IsSymbolic; 5258 5259 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5260 Error(S, "invalid message id"); 5261 return false; 5262 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5263 Error(S, Op.IsDefined ? 5264 "message does not support operations" : 5265 "missing message operation"); 5266 return false; 5267 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5268 Error(S, "invalid operation id"); 5269 return false; 5270 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5271 Error(S, "message operation does not support streams"); 5272 return false; 5273 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5274 Error(S, "invalid message stream id"); 5275 return false; 5276 } 5277 return true; 5278 } 5279 5280 OperandMatchResultTy 5281 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5282 using namespace llvm::AMDGPU::SendMsg; 5283 5284 int64_t ImmVal = 0; 5285 SMLoc Loc = getLoc(); 5286 5287 // If parse failed, do not return error code 5288 // to avoid excessive error messages. 5289 if (trySkipId("sendmsg", AsmToken::LParen)) { 5290 OperandInfoTy Msg(ID_UNKNOWN_); 5291 OperandInfoTy Op(OP_NONE_); 5292 OperandInfoTy Stream(STREAM_ID_NONE_); 5293 if (parseSendMsgBody(Msg, Op, Stream) && 5294 validateSendMsg(Msg, Op, Stream, Loc)) { 5295 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5296 } 5297 } else if (parseExpr(ImmVal)) { 5298 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5299 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5300 } 5301 5302 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5303 return MatchOperand_Success; 5304 } 5305 5306 bool AMDGPUOperand::isSendMsg() const { 5307 return isImmTy(ImmTySendMsg); 5308 } 5309 5310 //===----------------------------------------------------------------------===// 5311 // v_interp 5312 //===----------------------------------------------------------------------===// 5313 5314 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5315 if (getLexer().getKind() != AsmToken::Identifier) 5316 return MatchOperand_NoMatch; 5317 5318 StringRef Str = Parser.getTok().getString(); 5319 int Slot = StringSwitch<int>(Str) 5320 .Case("p10", 0) 5321 .Case("p20", 1) 5322 .Case("p0", 2) 5323 .Default(-1); 5324 5325 SMLoc S = Parser.getTok().getLoc(); 5326 if (Slot == -1) 5327 return MatchOperand_ParseFail; 5328 5329 Parser.Lex(); 5330 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5331 AMDGPUOperand::ImmTyInterpSlot)); 5332 return MatchOperand_Success; 5333 } 5334 5335 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5336 if (getLexer().getKind() != AsmToken::Identifier) 5337 return MatchOperand_NoMatch; 5338 5339 StringRef Str = Parser.getTok().getString(); 5340 if (!Str.startswith("attr")) 5341 return MatchOperand_NoMatch; 5342 5343 StringRef Chan = Str.take_back(2); 5344 int AttrChan = StringSwitch<int>(Chan) 5345 .Case(".x", 0) 5346 .Case(".y", 1) 5347 .Case(".z", 2) 5348 .Case(".w", 3) 5349 .Default(-1); 5350 if (AttrChan == -1) 5351 return MatchOperand_ParseFail; 5352 5353 Str = Str.drop_back(2).drop_front(4); 5354 5355 uint8_t Attr; 5356 if (Str.getAsInteger(10, Attr)) 5357 return MatchOperand_ParseFail; 5358 5359 SMLoc S = Parser.getTok().getLoc(); 5360 Parser.Lex(); 5361 if (Attr > 63) { 5362 Error(S, "out of bounds attr"); 5363 return MatchOperand_Success; 5364 } 5365 5366 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5367 5368 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5369 AMDGPUOperand::ImmTyInterpAttr)); 5370 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5371 AMDGPUOperand::ImmTyAttrChan)); 5372 return MatchOperand_Success; 5373 } 5374 5375 //===----------------------------------------------------------------------===// 5376 // exp 5377 //===----------------------------------------------------------------------===// 5378 5379 void AMDGPUAsmParser::errorExpTgt() { 5380 Error(Parser.getTok().getLoc(), "invalid exp target"); 5381 } 5382 5383 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5384 uint8_t &Val) { 5385 if (Str == "null") { 5386 Val = 9; 5387 return MatchOperand_Success; 5388 } 5389 5390 if (Str.startswith("mrt")) { 5391 Str = Str.drop_front(3); 5392 if (Str == "z") { // == mrtz 5393 Val = 8; 5394 return MatchOperand_Success; 5395 } 5396 5397 if (Str.getAsInteger(10, Val)) 5398 return MatchOperand_ParseFail; 5399 5400 if (Val > 7) 5401 errorExpTgt(); 5402 5403 return MatchOperand_Success; 5404 } 5405 5406 if (Str.startswith("pos")) { 5407 Str = Str.drop_front(3); 5408 if (Str.getAsInteger(10, Val)) 5409 return MatchOperand_ParseFail; 5410 5411 if (Val > 4 || (Val == 4 && !isGFX10())) 5412 errorExpTgt(); 5413 5414 Val += 12; 5415 return MatchOperand_Success; 5416 } 5417 5418 if (isGFX10() && Str == "prim") { 5419 Val = 20; 5420 return MatchOperand_Success; 5421 } 5422 5423 if (Str.startswith("param")) { 5424 Str = Str.drop_front(5); 5425 if (Str.getAsInteger(10, Val)) 5426 return MatchOperand_ParseFail; 5427 5428 if (Val >= 32) 5429 errorExpTgt(); 5430 5431 Val += 32; 5432 return MatchOperand_Success; 5433 } 5434 5435 if (Str.startswith("invalid_target_")) { 5436 Str = Str.drop_front(15); 5437 if (Str.getAsInteger(10, Val)) 5438 return MatchOperand_ParseFail; 5439 5440 errorExpTgt(); 5441 return MatchOperand_Success; 5442 } 5443 5444 return MatchOperand_NoMatch; 5445 } 5446 5447 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5448 uint8_t Val; 5449 StringRef Str = Parser.getTok().getString(); 5450 5451 auto Res = parseExpTgtImpl(Str, Val); 5452 if (Res != MatchOperand_Success) 5453 return Res; 5454 5455 SMLoc S = Parser.getTok().getLoc(); 5456 Parser.Lex(); 5457 5458 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5459 AMDGPUOperand::ImmTyExpTgt)); 5460 return MatchOperand_Success; 5461 } 5462 5463 //===----------------------------------------------------------------------===// 5464 // parser helpers 5465 //===----------------------------------------------------------------------===// 5466 5467 bool 5468 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5469 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5470 } 5471 5472 bool 5473 AMDGPUAsmParser::isId(const StringRef Id) const { 5474 return isId(getToken(), Id); 5475 } 5476 5477 bool 5478 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5479 return getTokenKind() == Kind; 5480 } 5481 5482 bool 5483 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5484 if (isId(Id)) { 5485 lex(); 5486 return true; 5487 } 5488 return false; 5489 } 5490 5491 bool 5492 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5493 if (isId(Id) && peekToken().is(Kind)) { 5494 lex(); 5495 lex(); 5496 return true; 5497 } 5498 return false; 5499 } 5500 5501 bool 5502 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5503 if (isToken(Kind)) { 5504 lex(); 5505 return true; 5506 } 5507 return false; 5508 } 5509 5510 bool 5511 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5512 const StringRef ErrMsg) { 5513 if (!trySkipToken(Kind)) { 5514 Error(getLoc(), ErrMsg); 5515 return false; 5516 } 5517 return true; 5518 } 5519 5520 bool 5521 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5522 return !getParser().parseAbsoluteExpression(Imm); 5523 } 5524 5525 bool 5526 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5527 SMLoc S = getLoc(); 5528 5529 const MCExpr *Expr; 5530 if (Parser.parseExpression(Expr)) 5531 return false; 5532 5533 int64_t IntVal; 5534 if (Expr->evaluateAsAbsolute(IntVal)) { 5535 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5536 } else { 5537 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5538 } 5539 return true; 5540 } 5541 5542 bool 5543 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5544 if (isToken(AsmToken::String)) { 5545 Val = getToken().getStringContents(); 5546 lex(); 5547 return true; 5548 } else { 5549 Error(getLoc(), ErrMsg); 5550 return false; 5551 } 5552 } 5553 5554 AsmToken 5555 AMDGPUAsmParser::getToken() const { 5556 return Parser.getTok(); 5557 } 5558 5559 AsmToken 5560 AMDGPUAsmParser::peekToken() { 5561 return getLexer().peekTok(); 5562 } 5563 5564 void 5565 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5566 auto TokCount = getLexer().peekTokens(Tokens); 5567 5568 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5569 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5570 } 5571 5572 AsmToken::TokenKind 5573 AMDGPUAsmParser::getTokenKind() const { 5574 return getLexer().getKind(); 5575 } 5576 5577 SMLoc 5578 AMDGPUAsmParser::getLoc() const { 5579 return getToken().getLoc(); 5580 } 5581 5582 StringRef 5583 AMDGPUAsmParser::getTokenStr() const { 5584 return getToken().getString(); 5585 } 5586 5587 void 5588 AMDGPUAsmParser::lex() { 5589 Parser.Lex(); 5590 } 5591 5592 //===----------------------------------------------------------------------===// 5593 // swizzle 5594 //===----------------------------------------------------------------------===// 5595 5596 LLVM_READNONE 5597 static unsigned 5598 encodeBitmaskPerm(const unsigned AndMask, 5599 const unsigned OrMask, 5600 const unsigned XorMask) { 5601 using namespace llvm::AMDGPU::Swizzle; 5602 5603 return BITMASK_PERM_ENC | 5604 (AndMask << BITMASK_AND_SHIFT) | 5605 (OrMask << BITMASK_OR_SHIFT) | 5606 (XorMask << BITMASK_XOR_SHIFT); 5607 } 5608 5609 bool 5610 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5611 const unsigned MinVal, 5612 const unsigned MaxVal, 5613 const StringRef ErrMsg) { 5614 for (unsigned i = 0; i < OpNum; ++i) { 5615 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5616 return false; 5617 } 5618 SMLoc ExprLoc = Parser.getTok().getLoc(); 5619 if (!parseExpr(Op[i])) { 5620 return false; 5621 } 5622 if (Op[i] < MinVal || Op[i] > MaxVal) { 5623 Error(ExprLoc, ErrMsg); 5624 return false; 5625 } 5626 } 5627 5628 return true; 5629 } 5630 5631 bool 5632 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5633 using namespace llvm::AMDGPU::Swizzle; 5634 5635 int64_t Lane[LANE_NUM]; 5636 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5637 "expected a 2-bit lane id")) { 5638 Imm = QUAD_PERM_ENC; 5639 for (unsigned I = 0; I < LANE_NUM; ++I) { 5640 Imm |= Lane[I] << (LANE_SHIFT * I); 5641 } 5642 return true; 5643 } 5644 return false; 5645 } 5646 5647 bool 5648 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5649 using namespace llvm::AMDGPU::Swizzle; 5650 5651 SMLoc S = Parser.getTok().getLoc(); 5652 int64_t GroupSize; 5653 int64_t LaneIdx; 5654 5655 if (!parseSwizzleOperands(1, &GroupSize, 5656 2, 32, 5657 "group size must be in the interval [2,32]")) { 5658 return false; 5659 } 5660 if (!isPowerOf2_64(GroupSize)) { 5661 Error(S, "group size must be a power of two"); 5662 return false; 5663 } 5664 if (parseSwizzleOperands(1, &LaneIdx, 5665 0, GroupSize - 1, 5666 "lane id must be in the interval [0,group size - 1]")) { 5667 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5668 return true; 5669 } 5670 return false; 5671 } 5672 5673 bool 5674 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5675 using namespace llvm::AMDGPU::Swizzle; 5676 5677 SMLoc S = Parser.getTok().getLoc(); 5678 int64_t GroupSize; 5679 5680 if (!parseSwizzleOperands(1, &GroupSize, 5681 2, 32, "group size must be in the interval [2,32]")) { 5682 return false; 5683 } 5684 if (!isPowerOf2_64(GroupSize)) { 5685 Error(S, "group size must be a power of two"); 5686 return false; 5687 } 5688 5689 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5690 return true; 5691 } 5692 5693 bool 5694 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5695 using namespace llvm::AMDGPU::Swizzle; 5696 5697 SMLoc S = Parser.getTok().getLoc(); 5698 int64_t GroupSize; 5699 5700 if (!parseSwizzleOperands(1, &GroupSize, 5701 1, 16, "group size must be in the interval [1,16]")) { 5702 return false; 5703 } 5704 if (!isPowerOf2_64(GroupSize)) { 5705 Error(S, "group size must be a power of two"); 5706 return false; 5707 } 5708 5709 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5710 return true; 5711 } 5712 5713 bool 5714 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5715 using namespace llvm::AMDGPU::Swizzle; 5716 5717 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5718 return false; 5719 } 5720 5721 StringRef Ctl; 5722 SMLoc StrLoc = Parser.getTok().getLoc(); 5723 if (!parseString(Ctl)) { 5724 return false; 5725 } 5726 if (Ctl.size() != BITMASK_WIDTH) { 5727 Error(StrLoc, "expected a 5-character mask"); 5728 return false; 5729 } 5730 5731 unsigned AndMask = 0; 5732 unsigned OrMask = 0; 5733 unsigned XorMask = 0; 5734 5735 for (size_t i = 0; i < Ctl.size(); ++i) { 5736 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5737 switch(Ctl[i]) { 5738 default: 5739 Error(StrLoc, "invalid mask"); 5740 return false; 5741 case '0': 5742 break; 5743 case '1': 5744 OrMask |= Mask; 5745 break; 5746 case 'p': 5747 AndMask |= Mask; 5748 break; 5749 case 'i': 5750 AndMask |= Mask; 5751 XorMask |= Mask; 5752 break; 5753 } 5754 } 5755 5756 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5757 return true; 5758 } 5759 5760 bool 5761 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5762 5763 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5764 5765 if (!parseExpr(Imm)) { 5766 return false; 5767 } 5768 if (!isUInt<16>(Imm)) { 5769 Error(OffsetLoc, "expected a 16-bit offset"); 5770 return false; 5771 } 5772 return true; 5773 } 5774 5775 bool 5776 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5777 using namespace llvm::AMDGPU::Swizzle; 5778 5779 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5780 5781 SMLoc ModeLoc = Parser.getTok().getLoc(); 5782 bool Ok = false; 5783 5784 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5785 Ok = parseSwizzleQuadPerm(Imm); 5786 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5787 Ok = parseSwizzleBitmaskPerm(Imm); 5788 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5789 Ok = parseSwizzleBroadcast(Imm); 5790 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5791 Ok = parseSwizzleSwap(Imm); 5792 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5793 Ok = parseSwizzleReverse(Imm); 5794 } else { 5795 Error(ModeLoc, "expected a swizzle mode"); 5796 } 5797 5798 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5799 } 5800 5801 return false; 5802 } 5803 5804 OperandMatchResultTy 5805 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5806 SMLoc S = Parser.getTok().getLoc(); 5807 int64_t Imm = 0; 5808 5809 if (trySkipId("offset")) { 5810 5811 bool Ok = false; 5812 if (skipToken(AsmToken::Colon, "expected a colon")) { 5813 if (trySkipId("swizzle")) { 5814 Ok = parseSwizzleMacro(Imm); 5815 } else { 5816 Ok = parseSwizzleOffset(Imm); 5817 } 5818 } 5819 5820 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5821 5822 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5823 } else { 5824 // Swizzle "offset" operand is optional. 5825 // If it is omitted, try parsing other optional operands. 5826 return parseOptionalOpr(Operands); 5827 } 5828 } 5829 5830 bool 5831 AMDGPUOperand::isSwizzle() const { 5832 return isImmTy(ImmTySwizzle); 5833 } 5834 5835 //===----------------------------------------------------------------------===// 5836 // VGPR Index Mode 5837 //===----------------------------------------------------------------------===// 5838 5839 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5840 5841 using namespace llvm::AMDGPU::VGPRIndexMode; 5842 5843 if (trySkipToken(AsmToken::RParen)) { 5844 return OFF; 5845 } 5846 5847 int64_t Imm = 0; 5848 5849 while (true) { 5850 unsigned Mode = 0; 5851 SMLoc S = Parser.getTok().getLoc(); 5852 5853 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5854 if (trySkipId(IdSymbolic[ModeId])) { 5855 Mode = 1 << ModeId; 5856 break; 5857 } 5858 } 5859 5860 if (Mode == 0) { 5861 Error(S, (Imm == 0)? 5862 "expected a VGPR index mode or a closing parenthesis" : 5863 "expected a VGPR index mode"); 5864 break; 5865 } 5866 5867 if (Imm & Mode) { 5868 Error(S, "duplicate VGPR index mode"); 5869 break; 5870 } 5871 Imm |= Mode; 5872 5873 if (trySkipToken(AsmToken::RParen)) 5874 break; 5875 if (!skipToken(AsmToken::Comma, 5876 "expected a comma or a closing parenthesis")) 5877 break; 5878 } 5879 5880 return Imm; 5881 } 5882 5883 OperandMatchResultTy 5884 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5885 5886 int64_t Imm = 0; 5887 SMLoc S = Parser.getTok().getLoc(); 5888 5889 if (getLexer().getKind() == AsmToken::Identifier && 5890 Parser.getTok().getString() == "gpr_idx" && 5891 getLexer().peekTok().is(AsmToken::LParen)) { 5892 5893 Parser.Lex(); 5894 Parser.Lex(); 5895 5896 // If parse failed, trigger an error but do not return error code 5897 // to avoid excessive error messages. 5898 Imm = parseGPRIdxMacro(); 5899 5900 } else { 5901 if (getParser().parseAbsoluteExpression(Imm)) 5902 return MatchOperand_NoMatch; 5903 if (Imm < 0 || !isUInt<4>(Imm)) { 5904 Error(S, "invalid immediate: only 4-bit values are legal"); 5905 } 5906 } 5907 5908 Operands.push_back( 5909 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5910 return MatchOperand_Success; 5911 } 5912 5913 bool AMDGPUOperand::isGPRIdxMode() const { 5914 return isImmTy(ImmTyGprIdxMode); 5915 } 5916 5917 //===----------------------------------------------------------------------===// 5918 // sopp branch targets 5919 //===----------------------------------------------------------------------===// 5920 5921 OperandMatchResultTy 5922 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5923 5924 // Make sure we are not parsing something 5925 // that looks like a label or an expression but is not. 5926 // This will improve error messages. 5927 if (isRegister() || isModifier()) 5928 return MatchOperand_NoMatch; 5929 5930 if (parseExpr(Operands)) { 5931 5932 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5933 assert(Opr.isImm() || Opr.isExpr()); 5934 SMLoc Loc = Opr.getStartLoc(); 5935 5936 // Currently we do not support arbitrary expressions as branch targets. 5937 // Only labels and absolute expressions are accepted. 5938 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5939 Error(Loc, "expected an absolute expression or a label"); 5940 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5941 Error(Loc, "expected a 16-bit signed jump offset"); 5942 } 5943 } 5944 5945 return MatchOperand_Success; // avoid excessive error messages 5946 } 5947 5948 //===----------------------------------------------------------------------===// 5949 // Boolean holding registers 5950 //===----------------------------------------------------------------------===// 5951 5952 OperandMatchResultTy 5953 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5954 return parseReg(Operands); 5955 } 5956 5957 //===----------------------------------------------------------------------===// 5958 // mubuf 5959 //===----------------------------------------------------------------------===// 5960 5961 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5962 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5963 } 5964 5965 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5966 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5967 } 5968 5969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5970 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5971 } 5972 5973 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5974 const OperandVector &Operands, 5975 bool IsAtomic, 5976 bool IsAtomicReturn, 5977 bool IsLds) { 5978 bool IsLdsOpcode = IsLds; 5979 bool HasLdsModifier = false; 5980 OptionalImmIndexMap OptionalIdx; 5981 assert(IsAtomicReturn ? IsAtomic : true); 5982 unsigned FirstOperandIdx = 1; 5983 5984 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5985 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5986 5987 // Add the register arguments 5988 if (Op.isReg()) { 5989 Op.addRegOperands(Inst, 1); 5990 // Insert a tied src for atomic return dst. 5991 // This cannot be postponed as subsequent calls to 5992 // addImmOperands rely on correct number of MC operands. 5993 if (IsAtomicReturn && i == FirstOperandIdx) 5994 Op.addRegOperands(Inst, 1); 5995 continue; 5996 } 5997 5998 // Handle the case where soffset is an immediate 5999 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6000 Op.addImmOperands(Inst, 1); 6001 continue; 6002 } 6003 6004 HasLdsModifier |= Op.isLDS(); 6005 6006 // Handle tokens like 'offen' which are sometimes hard-coded into the 6007 // asm string. There are no MCInst operands for these. 6008 if (Op.isToken()) { 6009 continue; 6010 } 6011 assert(Op.isImm()); 6012 6013 // Handle optional arguments 6014 OptionalIdx[Op.getImmTy()] = i; 6015 } 6016 6017 // This is a workaround for an llvm quirk which may result in an 6018 // incorrect instruction selection. Lds and non-lds versions of 6019 // MUBUF instructions are identical except that lds versions 6020 // have mandatory 'lds' modifier. However this modifier follows 6021 // optional modifiers and llvm asm matcher regards this 'lds' 6022 // modifier as an optional one. As a result, an lds version 6023 // of opcode may be selected even if it has no 'lds' modifier. 6024 if (IsLdsOpcode && !HasLdsModifier) { 6025 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6026 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6027 Inst.setOpcode(NoLdsOpcode); 6028 IsLdsOpcode = false; 6029 } 6030 } 6031 6032 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6033 if (!IsAtomic) { // glc is hard-coded. 6034 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6035 } 6036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6037 6038 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6039 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6040 } 6041 6042 if (isGFX10()) 6043 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6044 } 6045 6046 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6047 OptionalImmIndexMap OptionalIdx; 6048 6049 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6050 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6051 6052 // Add the register arguments 6053 if (Op.isReg()) { 6054 Op.addRegOperands(Inst, 1); 6055 continue; 6056 } 6057 6058 // Handle the case where soffset is an immediate 6059 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6060 Op.addImmOperands(Inst, 1); 6061 continue; 6062 } 6063 6064 // Handle tokens like 'offen' which are sometimes hard-coded into the 6065 // asm string. There are no MCInst operands for these. 6066 if (Op.isToken()) { 6067 continue; 6068 } 6069 assert(Op.isImm()); 6070 6071 // Handle optional arguments 6072 OptionalIdx[Op.getImmTy()] = i; 6073 } 6074 6075 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6076 AMDGPUOperand::ImmTyOffset); 6077 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6081 6082 if (isGFX10()) 6083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6084 } 6085 6086 //===----------------------------------------------------------------------===// 6087 // mimg 6088 //===----------------------------------------------------------------------===// 6089 6090 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6091 bool IsAtomic) { 6092 unsigned I = 1; 6093 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6094 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6095 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6096 } 6097 6098 if (IsAtomic) { 6099 // Add src, same as dst 6100 assert(Desc.getNumDefs() == 1); 6101 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6102 } 6103 6104 OptionalImmIndexMap OptionalIdx; 6105 6106 for (unsigned E = Operands.size(); I != E; ++I) { 6107 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6108 6109 // Add the register arguments 6110 if (Op.isReg()) { 6111 Op.addRegOperands(Inst, 1); 6112 } else if (Op.isImmModifier()) { 6113 OptionalIdx[Op.getImmTy()] = I; 6114 } else if (!Op.isToken()) { 6115 llvm_unreachable("unexpected operand type"); 6116 } 6117 } 6118 6119 bool IsGFX10 = isGFX10(); 6120 6121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6122 if (IsGFX10) 6123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6125 if (IsGFX10) 6126 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6130 if (IsGFX10) 6131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6134 if (!IsGFX10) 6135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6137 } 6138 6139 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6140 cvtMIMG(Inst, Operands, true); 6141 } 6142 6143 //===----------------------------------------------------------------------===// 6144 // smrd 6145 //===----------------------------------------------------------------------===// 6146 6147 bool AMDGPUOperand::isSMRDOffset8() const { 6148 return isImm() && isUInt<8>(getImm()); 6149 } 6150 6151 bool AMDGPUOperand::isSMEMOffset() const { 6152 return isImm(); // Offset range is checked later by validator. 6153 } 6154 6155 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6156 // 32-bit literals are only supported on CI and we only want to use them 6157 // when the offset is > 8-bits. 6158 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6159 } 6160 6161 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6162 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6163 } 6164 6165 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6166 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6167 } 6168 6169 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6170 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6171 } 6172 6173 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6174 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6175 } 6176 6177 //===----------------------------------------------------------------------===// 6178 // vop3 6179 //===----------------------------------------------------------------------===// 6180 6181 static bool ConvertOmodMul(int64_t &Mul) { 6182 if (Mul != 1 && Mul != 2 && Mul != 4) 6183 return false; 6184 6185 Mul >>= 1; 6186 return true; 6187 } 6188 6189 static bool ConvertOmodDiv(int64_t &Div) { 6190 if (Div == 1) { 6191 Div = 0; 6192 return true; 6193 } 6194 6195 if (Div == 2) { 6196 Div = 3; 6197 return true; 6198 } 6199 6200 return false; 6201 } 6202 6203 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6204 if (BoundCtrl == 0) { 6205 BoundCtrl = 1; 6206 return true; 6207 } 6208 6209 if (BoundCtrl == -1) { 6210 BoundCtrl = 0; 6211 return true; 6212 } 6213 6214 return false; 6215 } 6216 6217 // Note: the order in this table matches the order of operands in AsmString. 6218 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6219 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6220 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6221 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6222 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6223 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6224 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6225 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6226 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6227 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6228 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6229 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6230 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6231 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6232 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6233 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6234 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6235 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6236 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6237 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6238 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6239 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6240 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6241 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6242 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6243 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6244 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6245 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6246 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6247 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6248 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6249 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6250 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6251 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6252 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6253 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6254 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6255 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6256 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6257 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6258 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6259 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6260 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6261 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6262 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6263 }; 6264 6265 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6266 6267 OperandMatchResultTy res = parseOptionalOpr(Operands); 6268 6269 // This is a hack to enable hardcoded mandatory operands which follow 6270 // optional operands. 6271 // 6272 // Current design assumes that all operands after the first optional operand 6273 // are also optional. However implementation of some instructions violates 6274 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6275 // 6276 // To alleviate this problem, we have to (implicitly) parse extra operands 6277 // to make sure autogenerated parser of custom operands never hit hardcoded 6278 // mandatory operands. 6279 6280 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6281 if (res != MatchOperand_Success || 6282 isToken(AsmToken::EndOfStatement)) 6283 break; 6284 6285 trySkipToken(AsmToken::Comma); 6286 res = parseOptionalOpr(Operands); 6287 } 6288 6289 return res; 6290 } 6291 6292 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6293 OperandMatchResultTy res; 6294 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6295 // try to parse any optional operand here 6296 if (Op.IsBit) { 6297 res = parseNamedBit(Op.Name, Operands, Op.Type); 6298 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6299 res = parseOModOperand(Operands); 6300 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6301 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6302 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6303 res = parseSDWASel(Operands, Op.Name, Op.Type); 6304 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6305 res = parseSDWADstUnused(Operands); 6306 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6307 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6308 Op.Type == AMDGPUOperand::ImmTyNegLo || 6309 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6310 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6311 Op.ConvertResult); 6312 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6313 res = parseDim(Operands); 6314 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6315 res = parseDfmtNfmt(Operands); 6316 } else { 6317 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6318 } 6319 if (res != MatchOperand_NoMatch) { 6320 return res; 6321 } 6322 } 6323 return MatchOperand_NoMatch; 6324 } 6325 6326 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6327 StringRef Name = Parser.getTok().getString(); 6328 if (Name == "mul") { 6329 return parseIntWithPrefix("mul", Operands, 6330 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6331 } 6332 6333 if (Name == "div") { 6334 return parseIntWithPrefix("div", Operands, 6335 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6336 } 6337 6338 return MatchOperand_NoMatch; 6339 } 6340 6341 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6342 cvtVOP3P(Inst, Operands); 6343 6344 int Opc = Inst.getOpcode(); 6345 6346 int SrcNum; 6347 const int Ops[] = { AMDGPU::OpName::src0, 6348 AMDGPU::OpName::src1, 6349 AMDGPU::OpName::src2 }; 6350 for (SrcNum = 0; 6351 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6352 ++SrcNum); 6353 assert(SrcNum > 0); 6354 6355 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6356 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6357 6358 if ((OpSel & (1 << SrcNum)) != 0) { 6359 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6360 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6361 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6362 } 6363 } 6364 6365 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6366 // 1. This operand is input modifiers 6367 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6368 // 2. This is not last operand 6369 && Desc.NumOperands > (OpNum + 1) 6370 // 3. Next operand is register class 6371 && Desc.OpInfo[OpNum + 1].RegClass != -1 6372 // 4. Next register is not tied to any other operand 6373 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6374 } 6375 6376 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6377 { 6378 OptionalImmIndexMap OptionalIdx; 6379 unsigned Opc = Inst.getOpcode(); 6380 6381 unsigned I = 1; 6382 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6383 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6384 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6385 } 6386 6387 for (unsigned E = Operands.size(); I != E; ++I) { 6388 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6389 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6390 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6391 } else if (Op.isInterpSlot() || 6392 Op.isInterpAttr() || 6393 Op.isAttrChan()) { 6394 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6395 } else if (Op.isImmModifier()) { 6396 OptionalIdx[Op.getImmTy()] = I; 6397 } else { 6398 llvm_unreachable("unhandled operand type"); 6399 } 6400 } 6401 6402 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6404 } 6405 6406 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6407 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6408 } 6409 6410 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6412 } 6413 } 6414 6415 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6416 OptionalImmIndexMap &OptionalIdx) { 6417 unsigned Opc = Inst.getOpcode(); 6418 6419 unsigned I = 1; 6420 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6421 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6422 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6423 } 6424 6425 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6426 // This instruction has src modifiers 6427 for (unsigned E = Operands.size(); I != E; ++I) { 6428 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6429 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6430 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6431 } else if (Op.isImmModifier()) { 6432 OptionalIdx[Op.getImmTy()] = I; 6433 } else if (Op.isRegOrImm()) { 6434 Op.addRegOrImmOperands(Inst, 1); 6435 } else { 6436 llvm_unreachable("unhandled operand type"); 6437 } 6438 } 6439 } else { 6440 // No src modifiers 6441 for (unsigned E = Operands.size(); I != E; ++I) { 6442 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6443 if (Op.isMod()) { 6444 OptionalIdx[Op.getImmTy()] = I; 6445 } else { 6446 Op.addRegOrImmOperands(Inst, 1); 6447 } 6448 } 6449 } 6450 6451 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6453 } 6454 6455 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6457 } 6458 6459 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6460 // it has src2 register operand that is tied to dst operand 6461 // we don't allow modifiers for this operand in assembler so src2_modifiers 6462 // should be 0. 6463 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6464 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6465 Opc == AMDGPU::V_MAC_F32_e64_vi || 6466 Opc == AMDGPU::V_MAC_F16_e64_vi || 6467 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6468 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6469 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6470 auto it = Inst.begin(); 6471 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6472 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6473 ++it; 6474 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6475 } 6476 } 6477 6478 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6479 OptionalImmIndexMap OptionalIdx; 6480 cvtVOP3(Inst, Operands, OptionalIdx); 6481 } 6482 6483 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6484 const OperandVector &Operands) { 6485 OptionalImmIndexMap OptIdx; 6486 const int Opc = Inst.getOpcode(); 6487 const MCInstrDesc &Desc = MII.get(Opc); 6488 6489 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6490 6491 cvtVOP3(Inst, Operands, OptIdx); 6492 6493 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6494 assert(!IsPacked); 6495 Inst.addOperand(Inst.getOperand(0)); 6496 } 6497 6498 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6499 // instruction, and then figure out where to actually put the modifiers 6500 6501 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6502 6503 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6504 if (OpSelHiIdx != -1) { 6505 int DefaultVal = IsPacked ? -1 : 0; 6506 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6507 DefaultVal); 6508 } 6509 6510 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6511 if (NegLoIdx != -1) { 6512 assert(IsPacked); 6513 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6514 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6515 } 6516 6517 const int Ops[] = { AMDGPU::OpName::src0, 6518 AMDGPU::OpName::src1, 6519 AMDGPU::OpName::src2 }; 6520 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6521 AMDGPU::OpName::src1_modifiers, 6522 AMDGPU::OpName::src2_modifiers }; 6523 6524 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6525 6526 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6527 unsigned OpSelHi = 0; 6528 unsigned NegLo = 0; 6529 unsigned NegHi = 0; 6530 6531 if (OpSelHiIdx != -1) { 6532 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6533 } 6534 6535 if (NegLoIdx != -1) { 6536 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6537 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6538 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6539 } 6540 6541 for (int J = 0; J < 3; ++J) { 6542 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6543 if (OpIdx == -1) 6544 break; 6545 6546 uint32_t ModVal = 0; 6547 6548 if ((OpSel & (1 << J)) != 0) 6549 ModVal |= SISrcMods::OP_SEL_0; 6550 6551 if ((OpSelHi & (1 << J)) != 0) 6552 ModVal |= SISrcMods::OP_SEL_1; 6553 6554 if ((NegLo & (1 << J)) != 0) 6555 ModVal |= SISrcMods::NEG; 6556 6557 if ((NegHi & (1 << J)) != 0) 6558 ModVal |= SISrcMods::NEG_HI; 6559 6560 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6561 6562 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6563 } 6564 } 6565 6566 //===----------------------------------------------------------------------===// 6567 // dpp 6568 //===----------------------------------------------------------------------===// 6569 6570 bool AMDGPUOperand::isDPP8() const { 6571 return isImmTy(ImmTyDPP8); 6572 } 6573 6574 bool AMDGPUOperand::isDPPCtrl() const { 6575 using namespace AMDGPU::DPP; 6576 6577 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6578 if (result) { 6579 int64_t Imm = getImm(); 6580 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6581 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6582 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6583 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6584 (Imm == DppCtrl::WAVE_SHL1) || 6585 (Imm == DppCtrl::WAVE_ROL1) || 6586 (Imm == DppCtrl::WAVE_SHR1) || 6587 (Imm == DppCtrl::WAVE_ROR1) || 6588 (Imm == DppCtrl::ROW_MIRROR) || 6589 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6590 (Imm == DppCtrl::BCAST15) || 6591 (Imm == DppCtrl::BCAST31) || 6592 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6593 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6594 } 6595 return false; 6596 } 6597 6598 //===----------------------------------------------------------------------===// 6599 // mAI 6600 //===----------------------------------------------------------------------===// 6601 6602 bool AMDGPUOperand::isBLGP() const { 6603 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6604 } 6605 6606 bool AMDGPUOperand::isCBSZ() const { 6607 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6608 } 6609 6610 bool AMDGPUOperand::isABID() const { 6611 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6612 } 6613 6614 bool AMDGPUOperand::isS16Imm() const { 6615 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6616 } 6617 6618 bool AMDGPUOperand::isU16Imm() const { 6619 return isImm() && isUInt<16>(getImm()); 6620 } 6621 6622 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6623 if (!isGFX10()) 6624 return MatchOperand_NoMatch; 6625 6626 SMLoc S = Parser.getTok().getLoc(); 6627 6628 if (getLexer().isNot(AsmToken::Identifier)) 6629 return MatchOperand_NoMatch; 6630 if (getLexer().getTok().getString() != "dim") 6631 return MatchOperand_NoMatch; 6632 6633 Parser.Lex(); 6634 if (getLexer().isNot(AsmToken::Colon)) 6635 return MatchOperand_ParseFail; 6636 6637 Parser.Lex(); 6638 6639 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6640 // integer. 6641 std::string Token; 6642 if (getLexer().is(AsmToken::Integer)) { 6643 SMLoc Loc = getLexer().getTok().getEndLoc(); 6644 Token = std::string(getLexer().getTok().getString()); 6645 Parser.Lex(); 6646 if (getLexer().getTok().getLoc() != Loc) 6647 return MatchOperand_ParseFail; 6648 } 6649 if (getLexer().isNot(AsmToken::Identifier)) 6650 return MatchOperand_ParseFail; 6651 Token += getLexer().getTok().getString(); 6652 6653 StringRef DimId = Token; 6654 if (DimId.startswith("SQ_RSRC_IMG_")) 6655 DimId = DimId.substr(12); 6656 6657 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6658 if (!DimInfo) 6659 return MatchOperand_ParseFail; 6660 6661 Parser.Lex(); 6662 6663 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6664 AMDGPUOperand::ImmTyDim)); 6665 return MatchOperand_Success; 6666 } 6667 6668 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6669 SMLoc S = Parser.getTok().getLoc(); 6670 StringRef Prefix; 6671 6672 if (getLexer().getKind() == AsmToken::Identifier) { 6673 Prefix = Parser.getTok().getString(); 6674 } else { 6675 return MatchOperand_NoMatch; 6676 } 6677 6678 if (Prefix != "dpp8") 6679 return parseDPPCtrl(Operands); 6680 if (!isGFX10()) 6681 return MatchOperand_NoMatch; 6682 6683 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6684 6685 int64_t Sels[8]; 6686 6687 Parser.Lex(); 6688 if (getLexer().isNot(AsmToken::Colon)) 6689 return MatchOperand_ParseFail; 6690 6691 Parser.Lex(); 6692 if (getLexer().isNot(AsmToken::LBrac)) 6693 return MatchOperand_ParseFail; 6694 6695 Parser.Lex(); 6696 if (getParser().parseAbsoluteExpression(Sels[0])) 6697 return MatchOperand_ParseFail; 6698 if (0 > Sels[0] || 7 < Sels[0]) 6699 return MatchOperand_ParseFail; 6700 6701 for (size_t i = 1; i < 8; ++i) { 6702 if (getLexer().isNot(AsmToken::Comma)) 6703 return MatchOperand_ParseFail; 6704 6705 Parser.Lex(); 6706 if (getParser().parseAbsoluteExpression(Sels[i])) 6707 return MatchOperand_ParseFail; 6708 if (0 > Sels[i] || 7 < Sels[i]) 6709 return MatchOperand_ParseFail; 6710 } 6711 6712 if (getLexer().isNot(AsmToken::RBrac)) 6713 return MatchOperand_ParseFail; 6714 Parser.Lex(); 6715 6716 unsigned DPP8 = 0; 6717 for (size_t i = 0; i < 8; ++i) 6718 DPP8 |= (Sels[i] << (i * 3)); 6719 6720 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6721 return MatchOperand_Success; 6722 } 6723 6724 OperandMatchResultTy 6725 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6726 using namespace AMDGPU::DPP; 6727 6728 SMLoc S = Parser.getTok().getLoc(); 6729 StringRef Prefix; 6730 int64_t Int; 6731 6732 if (getLexer().getKind() == AsmToken::Identifier) { 6733 Prefix = Parser.getTok().getString(); 6734 } else { 6735 return MatchOperand_NoMatch; 6736 } 6737 6738 if (Prefix == "row_mirror") { 6739 Int = DppCtrl::ROW_MIRROR; 6740 Parser.Lex(); 6741 } else if (Prefix == "row_half_mirror") { 6742 Int = DppCtrl::ROW_HALF_MIRROR; 6743 Parser.Lex(); 6744 } else { 6745 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6746 if (Prefix != "quad_perm" 6747 && Prefix != "row_shl" 6748 && Prefix != "row_shr" 6749 && Prefix != "row_ror" 6750 && Prefix != "wave_shl" 6751 && Prefix != "wave_rol" 6752 && Prefix != "wave_shr" 6753 && Prefix != "wave_ror" 6754 && Prefix != "row_bcast" 6755 && Prefix != "row_share" 6756 && Prefix != "row_xmask") { 6757 return MatchOperand_NoMatch; 6758 } 6759 6760 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6761 return MatchOperand_NoMatch; 6762 6763 if (!isVI() && !isGFX9() && 6764 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6765 Prefix == "wave_rol" || Prefix == "wave_ror" || 6766 Prefix == "row_bcast")) 6767 return MatchOperand_NoMatch; 6768 6769 Parser.Lex(); 6770 if (getLexer().isNot(AsmToken::Colon)) 6771 return MatchOperand_ParseFail; 6772 6773 if (Prefix == "quad_perm") { 6774 // quad_perm:[%d,%d,%d,%d] 6775 Parser.Lex(); 6776 if (getLexer().isNot(AsmToken::LBrac)) 6777 return MatchOperand_ParseFail; 6778 Parser.Lex(); 6779 6780 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6781 return MatchOperand_ParseFail; 6782 6783 for (int i = 0; i < 3; ++i) { 6784 if (getLexer().isNot(AsmToken::Comma)) 6785 return MatchOperand_ParseFail; 6786 Parser.Lex(); 6787 6788 int64_t Temp; 6789 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6790 return MatchOperand_ParseFail; 6791 const int shift = i*2 + 2; 6792 Int += (Temp << shift); 6793 } 6794 6795 if (getLexer().isNot(AsmToken::RBrac)) 6796 return MatchOperand_ParseFail; 6797 Parser.Lex(); 6798 } else { 6799 // sel:%d 6800 Parser.Lex(); 6801 if (getParser().parseAbsoluteExpression(Int)) 6802 return MatchOperand_ParseFail; 6803 6804 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6805 Int |= DppCtrl::ROW_SHL0; 6806 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6807 Int |= DppCtrl::ROW_SHR0; 6808 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6809 Int |= DppCtrl::ROW_ROR0; 6810 } else if (Prefix == "wave_shl" && 1 == Int) { 6811 Int = DppCtrl::WAVE_SHL1; 6812 } else if (Prefix == "wave_rol" && 1 == Int) { 6813 Int = DppCtrl::WAVE_ROL1; 6814 } else if (Prefix == "wave_shr" && 1 == Int) { 6815 Int = DppCtrl::WAVE_SHR1; 6816 } else if (Prefix == "wave_ror" && 1 == Int) { 6817 Int = DppCtrl::WAVE_ROR1; 6818 } else if (Prefix == "row_bcast") { 6819 if (Int == 15) { 6820 Int = DppCtrl::BCAST15; 6821 } else if (Int == 31) { 6822 Int = DppCtrl::BCAST31; 6823 } else { 6824 return MatchOperand_ParseFail; 6825 } 6826 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6827 Int |= DppCtrl::ROW_SHARE_FIRST; 6828 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6829 Int |= DppCtrl::ROW_XMASK_FIRST; 6830 } else { 6831 return MatchOperand_ParseFail; 6832 } 6833 } 6834 } 6835 6836 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6837 return MatchOperand_Success; 6838 } 6839 6840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6841 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6842 } 6843 6844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6845 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6846 } 6847 6848 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6849 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6850 } 6851 6852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6853 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6854 } 6855 6856 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6857 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6858 } 6859 6860 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6861 OptionalImmIndexMap OptionalIdx; 6862 6863 unsigned I = 1; 6864 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6865 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6866 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6867 } 6868 6869 int Fi = 0; 6870 for (unsigned E = Operands.size(); I != E; ++I) { 6871 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6872 MCOI::TIED_TO); 6873 if (TiedTo != -1) { 6874 assert((unsigned)TiedTo < Inst.getNumOperands()); 6875 // handle tied old or src2 for MAC instructions 6876 Inst.addOperand(Inst.getOperand(TiedTo)); 6877 } 6878 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6879 // Add the register arguments 6880 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6881 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6882 // Skip it. 6883 continue; 6884 } 6885 6886 if (IsDPP8) { 6887 if (Op.isDPP8()) { 6888 Op.addImmOperands(Inst, 1); 6889 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6890 Op.addRegWithFPInputModsOperands(Inst, 2); 6891 } else if (Op.isFI()) { 6892 Fi = Op.getImm(); 6893 } else if (Op.isReg()) { 6894 Op.addRegOperands(Inst, 1); 6895 } else { 6896 llvm_unreachable("Invalid operand type"); 6897 } 6898 } else { 6899 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6900 Op.addRegWithFPInputModsOperands(Inst, 2); 6901 } else if (Op.isDPPCtrl()) { 6902 Op.addImmOperands(Inst, 1); 6903 } else if (Op.isImm()) { 6904 // Handle optional arguments 6905 OptionalIdx[Op.getImmTy()] = I; 6906 } else { 6907 llvm_unreachable("Invalid operand type"); 6908 } 6909 } 6910 } 6911 6912 if (IsDPP8) { 6913 using namespace llvm::AMDGPU::DPP; 6914 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6915 } else { 6916 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6917 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6918 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6919 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6920 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6921 } 6922 } 6923 } 6924 6925 //===----------------------------------------------------------------------===// 6926 // sdwa 6927 //===----------------------------------------------------------------------===// 6928 6929 OperandMatchResultTy 6930 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6931 AMDGPUOperand::ImmTy Type) { 6932 using namespace llvm::AMDGPU::SDWA; 6933 6934 SMLoc S = Parser.getTok().getLoc(); 6935 StringRef Value; 6936 OperandMatchResultTy res; 6937 6938 res = parseStringWithPrefix(Prefix, Value); 6939 if (res != MatchOperand_Success) { 6940 return res; 6941 } 6942 6943 int64_t Int; 6944 Int = StringSwitch<int64_t>(Value) 6945 .Case("BYTE_0", SdwaSel::BYTE_0) 6946 .Case("BYTE_1", SdwaSel::BYTE_1) 6947 .Case("BYTE_2", SdwaSel::BYTE_2) 6948 .Case("BYTE_3", SdwaSel::BYTE_3) 6949 .Case("WORD_0", SdwaSel::WORD_0) 6950 .Case("WORD_1", SdwaSel::WORD_1) 6951 .Case("DWORD", SdwaSel::DWORD) 6952 .Default(0xffffffff); 6953 Parser.Lex(); // eat last token 6954 6955 if (Int == 0xffffffff) { 6956 return MatchOperand_ParseFail; 6957 } 6958 6959 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6960 return MatchOperand_Success; 6961 } 6962 6963 OperandMatchResultTy 6964 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6965 using namespace llvm::AMDGPU::SDWA; 6966 6967 SMLoc S = Parser.getTok().getLoc(); 6968 StringRef Value; 6969 OperandMatchResultTy res; 6970 6971 res = parseStringWithPrefix("dst_unused", Value); 6972 if (res != MatchOperand_Success) { 6973 return res; 6974 } 6975 6976 int64_t Int; 6977 Int = StringSwitch<int64_t>(Value) 6978 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6979 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6980 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6981 .Default(0xffffffff); 6982 Parser.Lex(); // eat last token 6983 6984 if (Int == 0xffffffff) { 6985 return MatchOperand_ParseFail; 6986 } 6987 6988 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6989 return MatchOperand_Success; 6990 } 6991 6992 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6993 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6994 } 6995 6996 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6997 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6998 } 6999 7000 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7001 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7002 } 7003 7004 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7005 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7006 } 7007 7008 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7009 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7010 } 7011 7012 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7013 uint64_t BasicInstType, 7014 bool SkipDstVcc, 7015 bool SkipSrcVcc) { 7016 using namespace llvm::AMDGPU::SDWA; 7017 7018 OptionalImmIndexMap OptionalIdx; 7019 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7020 bool SkippedVcc = false; 7021 7022 unsigned I = 1; 7023 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7024 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7025 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7026 } 7027 7028 for (unsigned E = Operands.size(); I != E; ++I) { 7029 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7030 if (SkipVcc && !SkippedVcc && Op.isReg() && 7031 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7032 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7033 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7034 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7035 // Skip VCC only if we didn't skip it on previous iteration. 7036 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7037 if (BasicInstType == SIInstrFlags::VOP2 && 7038 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7039 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7040 SkippedVcc = true; 7041 continue; 7042 } else if (BasicInstType == SIInstrFlags::VOPC && 7043 Inst.getNumOperands() == 0) { 7044 SkippedVcc = true; 7045 continue; 7046 } 7047 } 7048 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7049 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7050 } else if (Op.isImm()) { 7051 // Handle optional arguments 7052 OptionalIdx[Op.getImmTy()] = I; 7053 } else { 7054 llvm_unreachable("Invalid operand type"); 7055 } 7056 SkippedVcc = false; 7057 } 7058 7059 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7060 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7061 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7062 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7063 switch (BasicInstType) { 7064 case SIInstrFlags::VOP1: 7065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7066 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7068 } 7069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7072 break; 7073 7074 case SIInstrFlags::VOP2: 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7076 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7077 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7078 } 7079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7083 break; 7084 7085 case SIInstrFlags::VOPC: 7086 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7090 break; 7091 7092 default: 7093 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7094 } 7095 } 7096 7097 // special case v_mac_{f16, f32}: 7098 // it has src2 register operand that is tied to dst operand 7099 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7100 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7101 auto it = Inst.begin(); 7102 std::advance( 7103 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7104 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7105 } 7106 } 7107 7108 //===----------------------------------------------------------------------===// 7109 // mAI 7110 //===----------------------------------------------------------------------===// 7111 7112 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7113 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7114 } 7115 7116 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7117 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7118 } 7119 7120 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7121 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7122 } 7123 7124 /// Force static initialization. 7125 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7126 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7127 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7128 } 7129 7130 #define GET_REGISTER_MATCHER 7131 #define GET_MATCHER_IMPLEMENTATION 7132 #define GET_MNEMONIC_SPELL_CHECKER 7133 #include "AMDGPUGenAsmMatcher.inc" 7134 7135 // This fuction should be defined after auto-generated include so that we have 7136 // MatchClassKind enum defined 7137 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7138 unsigned Kind) { 7139 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7140 // But MatchInstructionImpl() expects to meet token and fails to validate 7141 // operand. This method checks if we are given immediate operand but expect to 7142 // get corresponding token. 7143 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7144 switch (Kind) { 7145 case MCK_addr64: 7146 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7147 case MCK_gds: 7148 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7149 case MCK_lds: 7150 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7151 case MCK_glc: 7152 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7153 case MCK_idxen: 7154 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7155 case MCK_offen: 7156 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7157 case MCK_SSrcB32: 7158 // When operands have expression values, they will return true for isToken, 7159 // because it is not possible to distinguish between a token and an 7160 // expression at parse time. MatchInstructionImpl() will always try to 7161 // match an operand as a token, when isToken returns true, and when the 7162 // name of the expression is not a valid token, the match will fail, 7163 // so we need to handle it here. 7164 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7165 case MCK_SSrcF32: 7166 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7167 case MCK_SoppBrTarget: 7168 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7169 case MCK_VReg32OrOff: 7170 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7171 case MCK_InterpSlot: 7172 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7173 case MCK_Attr: 7174 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7175 case MCK_AttrChan: 7176 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7177 case MCK_ImmSMEMOffset: 7178 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7179 case MCK_SReg_64: 7180 case MCK_SReg_64_XEXEC: 7181 // Null is defined as a 32-bit register but 7182 // it should also be enabled with 64-bit operands. 7183 // The following code enables it for SReg_64 operands 7184 // used as source and destination. Remaining source 7185 // operands are handled in isInlinableImm. 7186 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7187 default: 7188 return Match_InvalidOperand; 7189 } 7190 } 7191 7192 //===----------------------------------------------------------------------===// 7193 // endpgm 7194 //===----------------------------------------------------------------------===// 7195 7196 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7197 SMLoc S = Parser.getTok().getLoc(); 7198 int64_t Imm = 0; 7199 7200 if (!parseExpr(Imm)) { 7201 // The operand is optional, if not present default to 0 7202 Imm = 0; 7203 } 7204 7205 if (!isUInt<16>(Imm)) { 7206 Error(S, "expected a 16-bit value"); 7207 return MatchOperand_ParseFail; 7208 } 7209 7210 Operands.push_back( 7211 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7212 return MatchOperand_Success; 7213 } 7214 7215 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7216