1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcF16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 ImmTy getImmTy() const { 693 assert(isImm()); 694 return Imm.Type; 695 } 696 697 unsigned getReg() const override { 698 assert(isRegKind()); 699 return Reg.RegNo; 700 } 701 702 SMLoc getStartLoc() const override { 703 return StartLoc; 704 } 705 706 SMLoc getEndLoc() const override { 707 return EndLoc; 708 } 709 710 SMRange getLocRange() const { 711 return SMRange(StartLoc, EndLoc); 712 } 713 714 Modifiers getModifiers() const { 715 assert(isRegKind() || isImmTy(ImmTyNone)); 716 return isRegKind() ? Reg.Mods : Imm.Mods; 717 } 718 719 void setModifiers(Modifiers Mods) { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 if (isRegKind()) 722 Reg.Mods = Mods; 723 else 724 Imm.Mods = Mods; 725 } 726 727 bool hasModifiers() const { 728 return getModifiers().hasModifiers(); 729 } 730 731 bool hasFPModifiers() const { 732 return getModifiers().hasFPModifiers(); 733 } 734 735 bool hasIntModifiers() const { 736 return getModifiers().hasIntModifiers(); 737 } 738 739 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 740 741 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 742 743 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 744 745 template <unsigned Bitwidth> 746 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 747 748 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 749 addKImmFPOperands<16>(Inst, N); 750 } 751 752 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 753 addKImmFPOperands<32>(Inst, N); 754 } 755 756 void addRegOperands(MCInst &Inst, unsigned N) const; 757 758 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 759 addRegOperands(Inst, N); 760 } 761 762 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 763 if (isRegKind()) 764 addRegOperands(Inst, N); 765 else if (isExpr()) 766 Inst.addOperand(MCOperand::createExpr(Expr)); 767 else 768 addImmOperands(Inst, N); 769 } 770 771 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 772 Modifiers Mods = getModifiers(); 773 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 774 if (isRegKind()) { 775 addRegOperands(Inst, N); 776 } else { 777 addImmOperands(Inst, N, false); 778 } 779 } 780 781 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 782 assert(!hasIntModifiers()); 783 addRegOrImmWithInputModsOperands(Inst, N); 784 } 785 786 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasFPModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 792 Modifiers Mods = getModifiers(); 793 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 794 assert(isRegKind()); 795 addRegOperands(Inst, N); 796 } 797 798 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 799 assert(!hasIntModifiers()); 800 addRegWithInputModsOperands(Inst, N); 801 } 802 803 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasFPModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 809 if (isImm()) 810 addImmOperands(Inst, N); 811 else { 812 assert(isExpr()); 813 Inst.addOperand(MCOperand::createExpr(Expr)); 814 } 815 } 816 817 static void printImmTy(raw_ostream& OS, ImmTy Type) { 818 switch (Type) { 819 case ImmTyNone: OS << "None"; break; 820 case ImmTyGDS: OS << "GDS"; break; 821 case ImmTyLDS: OS << "LDS"; break; 822 case ImmTyOffen: OS << "Offen"; break; 823 case ImmTyIdxen: OS << "Idxen"; break; 824 case ImmTyAddr64: OS << "Addr64"; break; 825 case ImmTyOffset: OS << "Offset"; break; 826 case ImmTyInstOffset: OS << "InstOffset"; break; 827 case ImmTyOffset0: OS << "Offset0"; break; 828 case ImmTyOffset1: OS << "Offset1"; break; 829 case ImmTyDLC: OS << "DLC"; break; 830 case ImmTyGLC: OS << "GLC"; break; 831 case ImmTySLC: OS << "SLC"; break; 832 case ImmTySWZ: OS << "SWZ"; break; 833 case ImmTyTFE: OS << "TFE"; break; 834 case ImmTyD16: OS << "D16"; break; 835 case ImmTyFORMAT: OS << "FORMAT"; break; 836 case ImmTyClampSI: OS << "ClampSI"; break; 837 case ImmTyOModSI: OS << "OModSI"; break; 838 case ImmTyDPP8: OS << "DPP8"; break; 839 case ImmTyDppCtrl: OS << "DppCtrl"; break; 840 case ImmTyDppRowMask: OS << "DppRowMask"; break; 841 case ImmTyDppBankMask: OS << "DppBankMask"; break; 842 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 843 case ImmTyDppFi: OS << "FI"; break; 844 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 845 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 846 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 847 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 848 case ImmTyDMask: OS << "DMask"; break; 849 case ImmTyDim: OS << "Dim"; break; 850 case ImmTyUNorm: OS << "UNorm"; break; 851 case ImmTyDA: OS << "DA"; break; 852 case ImmTyR128A16: OS << "R128A16"; break; 853 case ImmTyA16: OS << "A16"; break; 854 case ImmTyLWE: OS << "LWE"; break; 855 case ImmTyOff: OS << "Off"; break; 856 case ImmTyExpTgt: OS << "ExpTgt"; break; 857 case ImmTyExpCompr: OS << "ExpCompr"; break; 858 case ImmTyExpVM: OS << "ExpVM"; break; 859 case ImmTyHwreg: OS << "Hwreg"; break; 860 case ImmTySendMsg: OS << "SendMsg"; break; 861 case ImmTyInterpSlot: OS << "InterpSlot"; break; 862 case ImmTyInterpAttr: OS << "InterpAttr"; break; 863 case ImmTyAttrChan: OS << "AttrChan"; break; 864 case ImmTyOpSel: OS << "OpSel"; break; 865 case ImmTyOpSelHi: OS << "OpSelHi"; break; 866 case ImmTyNegLo: OS << "NegLo"; break; 867 case ImmTyNegHi: OS << "NegHi"; break; 868 case ImmTySwizzle: OS << "Swizzle"; break; 869 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 870 case ImmTyHigh: OS << "High"; break; 871 case ImmTyBLGP: OS << "BLGP"; break; 872 case ImmTyCBSZ: OS << "CBSZ"; break; 873 case ImmTyABID: OS << "ABID"; break; 874 case ImmTyEndpgm: OS << "Endpgm"; break; 875 } 876 } 877 878 void print(raw_ostream &OS) const override { 879 switch (Kind) { 880 case Register: 881 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 882 break; 883 case Immediate: 884 OS << '<' << getImm(); 885 if (getImmTy() != ImmTyNone) { 886 OS << " type: "; printImmTy(OS, getImmTy()); 887 } 888 OS << " mods: " << Imm.Mods << '>'; 889 break; 890 case Token: 891 OS << '\'' << getToken() << '\''; 892 break; 893 case Expression: 894 OS << "<expr " << *Expr << '>'; 895 break; 896 } 897 } 898 899 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 900 int64_t Val, SMLoc Loc, 901 ImmTy Type = ImmTyNone, 902 bool IsFPImm = false) { 903 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 904 Op->Imm.Val = Val; 905 Op->Imm.IsFPImm = IsFPImm; 906 Op->Imm.Type = Type; 907 Op->Imm.Mods = Modifiers(); 908 Op->StartLoc = Loc; 909 Op->EndLoc = Loc; 910 return Op; 911 } 912 913 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 914 StringRef Str, SMLoc Loc, 915 bool HasExplicitEncodingSize = true) { 916 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 917 Res->Tok.Data = Str.data(); 918 Res->Tok.Length = Str.size(); 919 Res->StartLoc = Loc; 920 Res->EndLoc = Loc; 921 return Res; 922 } 923 924 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 925 unsigned RegNo, SMLoc S, 926 SMLoc E) { 927 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 928 Op->Reg.RegNo = RegNo; 929 Op->Reg.Mods = Modifiers(); 930 Op->StartLoc = S; 931 Op->EndLoc = E; 932 return Op; 933 } 934 935 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 936 const class MCExpr *Expr, SMLoc S) { 937 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 938 Op->Expr = Expr; 939 Op->StartLoc = S; 940 Op->EndLoc = S; 941 return Op; 942 } 943 }; 944 945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 946 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 947 return OS; 948 } 949 950 //===----------------------------------------------------------------------===// 951 // AsmParser 952 //===----------------------------------------------------------------------===// 953 954 // Holds info related to the current kernel, e.g. count of SGPRs used. 955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 956 // .amdgpu_hsa_kernel or at EOF. 957 class KernelScopeInfo { 958 int SgprIndexUnusedMin = -1; 959 int VgprIndexUnusedMin = -1; 960 MCContext *Ctx = nullptr; 961 962 void usesSgprAt(int i) { 963 if (i >= SgprIndexUnusedMin) { 964 SgprIndexUnusedMin = ++i; 965 if (Ctx) { 966 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 967 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 968 } 969 } 970 } 971 972 void usesVgprAt(int i) { 973 if (i >= VgprIndexUnusedMin) { 974 VgprIndexUnusedMin = ++i; 975 if (Ctx) { 976 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 977 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 978 } 979 } 980 } 981 982 public: 983 KernelScopeInfo() = default; 984 985 void initialize(MCContext &Context) { 986 Ctx = &Context; 987 usesSgprAt(SgprIndexUnusedMin = -1); 988 usesVgprAt(VgprIndexUnusedMin = -1); 989 } 990 991 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 992 switch (RegKind) { 993 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 994 case IS_AGPR: // fall through 995 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 996 default: break; 997 } 998 } 999 }; 1000 1001 class AMDGPUAsmParser : public MCTargetAsmParser { 1002 MCAsmParser &Parser; 1003 1004 // Number of extra operands parsed after the first optional operand. 1005 // This may be necessary to skip hardcoded mandatory operands. 1006 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1007 1008 unsigned ForcedEncodingSize = 0; 1009 bool ForcedDPP = false; 1010 bool ForcedSDWA = false; 1011 KernelScopeInfo KernelScope; 1012 1013 /// @name Auto-generated Match Functions 1014 /// { 1015 1016 #define GET_ASSEMBLER_HEADER 1017 #include "AMDGPUGenAsmMatcher.inc" 1018 1019 /// } 1020 1021 private: 1022 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1023 bool OutOfRangeError(SMRange Range); 1024 /// Calculate VGPR/SGPR blocks required for given target, reserved 1025 /// registers, and user-specified NextFreeXGPR values. 1026 /// 1027 /// \param Features [in] Target features, used for bug corrections. 1028 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1029 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1030 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1031 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1032 /// descriptor field, if valid. 1033 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1034 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1035 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1036 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1037 /// \param VGPRBlocks [out] Result VGPR block count. 1038 /// \param SGPRBlocks [out] Result SGPR block count. 1039 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1040 bool FlatScrUsed, bool XNACKUsed, 1041 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1042 SMRange VGPRRange, unsigned NextFreeSGPR, 1043 SMRange SGPRRange, unsigned &VGPRBlocks, 1044 unsigned &SGPRBlocks); 1045 bool ParseDirectiveAMDGCNTarget(); 1046 bool ParseDirectiveAMDHSAKernel(); 1047 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1048 bool ParseDirectiveHSACodeObjectVersion(); 1049 bool ParseDirectiveHSACodeObjectISA(); 1050 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1051 bool ParseDirectiveAMDKernelCodeT(); 1052 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1053 bool ParseDirectiveAMDGPUHsaKernel(); 1054 1055 bool ParseDirectiveISAVersion(); 1056 bool ParseDirectiveHSAMetadata(); 1057 bool ParseDirectivePALMetadataBegin(); 1058 bool ParseDirectivePALMetadata(); 1059 bool ParseDirectiveAMDGPULDS(); 1060 1061 /// Common code to parse out a block of text (typically YAML) between start and 1062 /// end directives. 1063 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1064 const char *AssemblerDirectiveEnd, 1065 std::string &CollectString); 1066 1067 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1068 RegisterKind RegKind, unsigned Reg1); 1069 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1070 unsigned &RegNum, unsigned &RegWidth, 1071 bool RestoreOnFailure = false); 1072 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1073 unsigned &RegNum, unsigned &RegWidth, 1074 SmallVectorImpl<AsmToken> &Tokens); 1075 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1076 unsigned &RegWidth, 1077 SmallVectorImpl<AsmToken> &Tokens); 1078 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1079 unsigned &RegWidth, 1080 SmallVectorImpl<AsmToken> &Tokens); 1081 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1082 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1083 bool ParseRegRange(unsigned& Num, unsigned& Width); 1084 unsigned getRegularReg(RegisterKind RegKind, 1085 unsigned RegNum, 1086 unsigned RegWidth); 1087 1088 bool isRegister(); 1089 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1090 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1091 void initializeGprCountSymbol(RegisterKind RegKind); 1092 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1093 unsigned RegWidth); 1094 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1095 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1096 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1097 bool IsGdsHardcoded); 1098 1099 public: 1100 enum AMDGPUMatchResultTy { 1101 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1102 }; 1103 enum OperandMode { 1104 OperandMode_Default, 1105 OperandMode_NSA, 1106 }; 1107 1108 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1109 1110 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1111 const MCInstrInfo &MII, 1112 const MCTargetOptions &Options) 1113 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1114 MCAsmParserExtension::Initialize(Parser); 1115 1116 if (getFeatureBits().none()) { 1117 // Set default features. 1118 copySTI().ToggleFeature("southern-islands"); 1119 } 1120 1121 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1122 1123 { 1124 // TODO: make those pre-defined variables read-only. 1125 // Currently there is none suitable machinery in the core llvm-mc for this. 1126 // MCSymbol::isRedefinable is intended for another purpose, and 1127 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1129 MCContext &Ctx = getContext(); 1130 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1131 MCSymbol *Sym = 1132 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1134 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1135 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1136 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1138 } else { 1139 MCSymbol *Sym = 1140 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1144 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1145 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1146 } 1147 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1148 initializeGprCountSymbol(IS_VGPR); 1149 initializeGprCountSymbol(IS_SGPR); 1150 } else 1151 KernelScope.initialize(getContext()); 1152 } 1153 } 1154 1155 bool hasXNACK() const { 1156 return AMDGPU::hasXNACK(getSTI()); 1157 } 1158 1159 bool hasMIMG_R128() const { 1160 return AMDGPU::hasMIMG_R128(getSTI()); 1161 } 1162 1163 bool hasPackedD16() const { 1164 return AMDGPU::hasPackedD16(getSTI()); 1165 } 1166 1167 bool hasGFX10A16() const { 1168 return AMDGPU::hasGFX10A16(getSTI()); 1169 } 1170 1171 bool isSI() const { 1172 return AMDGPU::isSI(getSTI()); 1173 } 1174 1175 bool isCI() const { 1176 return AMDGPU::isCI(getSTI()); 1177 } 1178 1179 bool isVI() const { 1180 return AMDGPU::isVI(getSTI()); 1181 } 1182 1183 bool isGFX9() const { 1184 return AMDGPU::isGFX9(getSTI()); 1185 } 1186 1187 bool isGFX10() const { 1188 return AMDGPU::isGFX10(getSTI()); 1189 } 1190 1191 bool hasInv2PiInlineImm() const { 1192 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1193 } 1194 1195 bool hasFlatOffsets() const { 1196 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1197 } 1198 1199 bool hasSGPR102_SGPR103() const { 1200 return !isVI() && !isGFX9(); 1201 } 1202 1203 bool hasSGPR104_SGPR105() const { 1204 return isGFX10(); 1205 } 1206 1207 bool hasIntClamp() const { 1208 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1209 } 1210 1211 AMDGPUTargetStreamer &getTargetStreamer() { 1212 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1213 return static_cast<AMDGPUTargetStreamer &>(TS); 1214 } 1215 1216 const MCRegisterInfo *getMRI() const { 1217 // We need this const_cast because for some reason getContext() is not const 1218 // in MCAsmParser. 1219 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1220 } 1221 1222 const MCInstrInfo *getMII() const { 1223 return &MII; 1224 } 1225 1226 const FeatureBitset &getFeatureBits() const { 1227 return getSTI().getFeatureBits(); 1228 } 1229 1230 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1231 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1232 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1233 1234 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1235 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1236 bool isForcedDPP() const { return ForcedDPP; } 1237 bool isForcedSDWA() const { return ForcedSDWA; } 1238 ArrayRef<unsigned> getMatchedVariants() const; 1239 1240 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1241 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1242 bool RestoreOnFailure); 1243 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1244 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1245 SMLoc &EndLoc) override; 1246 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1247 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1248 unsigned Kind) override; 1249 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1250 OperandVector &Operands, MCStreamer &Out, 1251 uint64_t &ErrorInfo, 1252 bool MatchingInlineAsm) override; 1253 bool ParseDirective(AsmToken DirectiveID) override; 1254 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1255 OperandMode Mode = OperandMode_Default); 1256 StringRef parseMnemonicSuffix(StringRef Name); 1257 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1258 SMLoc NameLoc, OperandVector &Operands) override; 1259 //bool ProcessInstruction(MCInst &Inst); 1260 1261 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1262 1263 OperandMatchResultTy 1264 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1265 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1266 bool (*ConvertResult)(int64_t &) = nullptr); 1267 1268 OperandMatchResultTy 1269 parseOperandArrayWithPrefix(const char *Prefix, 1270 OperandVector &Operands, 1271 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1272 bool (*ConvertResult)(int64_t&) = nullptr); 1273 1274 OperandMatchResultTy 1275 parseNamedBit(const char *Name, OperandVector &Operands, 1276 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1277 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1278 StringRef &Value); 1279 1280 bool isModifier(); 1281 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1282 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1283 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1284 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1285 bool parseSP3NegModifier(); 1286 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1287 OperandMatchResultTy parseReg(OperandVector &Operands); 1288 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1289 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1290 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1291 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1292 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1293 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1294 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1295 1296 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1297 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1298 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1299 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1300 1301 bool parseCnt(int64_t &IntVal); 1302 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1303 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1304 1305 private: 1306 struct OperandInfoTy { 1307 int64_t Id; 1308 bool IsSymbolic = false; 1309 bool IsDefined = false; 1310 1311 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1312 }; 1313 1314 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1315 bool validateSendMsg(const OperandInfoTy &Msg, 1316 const OperandInfoTy &Op, 1317 const OperandInfoTy &Stream, 1318 const SMLoc Loc); 1319 1320 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1321 bool validateHwreg(const OperandInfoTy &HwReg, 1322 const int64_t Offset, 1323 const int64_t Width, 1324 const SMLoc Loc); 1325 1326 void errorExpTgt(); 1327 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1328 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1329 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1330 1331 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1332 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1333 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1334 bool validateSOPLiteral(const MCInst &Inst) const; 1335 bool validateConstantBusLimitations(const MCInst &Inst); 1336 bool validateEarlyClobberLimitations(const MCInst &Inst); 1337 bool validateIntClampSupported(const MCInst &Inst); 1338 bool validateMIMGAtomicDMask(const MCInst &Inst); 1339 bool validateMIMGGatherDMask(const MCInst &Inst); 1340 bool validateMovrels(const MCInst &Inst); 1341 bool validateMIMGDataSize(const MCInst &Inst); 1342 bool validateMIMGAddrSize(const MCInst &Inst); 1343 bool validateMIMGD16(const MCInst &Inst); 1344 bool validateMIMGDim(const MCInst &Inst); 1345 bool validateLdsDirect(const MCInst &Inst); 1346 bool validateOpSel(const MCInst &Inst); 1347 bool validateVccOperand(unsigned Reg) const; 1348 bool validateVOP3Literal(const MCInst &Inst) const; 1349 bool validateMAIAccWrite(const MCInst &Inst); 1350 unsigned getConstantBusLimit(unsigned Opcode) const; 1351 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1352 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1353 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1354 1355 bool isId(const StringRef Id) const; 1356 bool isId(const AsmToken &Token, const StringRef Id) const; 1357 bool isToken(const AsmToken::TokenKind Kind) const; 1358 bool trySkipId(const StringRef Id); 1359 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1360 bool trySkipToken(const AsmToken::TokenKind Kind); 1361 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1362 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1363 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1364 AsmToken::TokenKind getTokenKind() const; 1365 bool parseExpr(int64_t &Imm); 1366 bool parseExpr(OperandVector &Operands); 1367 StringRef getTokenStr() const; 1368 AsmToken peekToken(); 1369 AsmToken getToken() const; 1370 SMLoc getLoc() const; 1371 void lex(); 1372 1373 public: 1374 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1375 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1376 1377 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1378 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1379 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1380 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1381 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1382 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1383 1384 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1385 const unsigned MinVal, 1386 const unsigned MaxVal, 1387 const StringRef ErrMsg); 1388 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1389 bool parseSwizzleOffset(int64_t &Imm); 1390 bool parseSwizzleMacro(int64_t &Imm); 1391 bool parseSwizzleQuadPerm(int64_t &Imm); 1392 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1393 bool parseSwizzleBroadcast(int64_t &Imm); 1394 bool parseSwizzleSwap(int64_t &Imm); 1395 bool parseSwizzleReverse(int64_t &Imm); 1396 1397 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1398 int64_t parseGPRIdxMacro(); 1399 1400 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1401 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1402 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1403 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1404 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1405 1406 AMDGPUOperand::Ptr defaultDLC() const; 1407 AMDGPUOperand::Ptr defaultGLC() const; 1408 AMDGPUOperand::Ptr defaultSLC() const; 1409 1410 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1411 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1412 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1413 AMDGPUOperand::Ptr defaultFlatOffset() const; 1414 1415 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1416 1417 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1418 OptionalImmIndexMap &OptionalIdx); 1419 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1420 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1421 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1422 1423 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1424 1425 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1426 bool IsAtomic = false); 1427 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1428 1429 OperandMatchResultTy parseDim(OperandVector &Operands); 1430 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1431 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1432 AMDGPUOperand::Ptr defaultRowMask() const; 1433 AMDGPUOperand::Ptr defaultBankMask() const; 1434 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1435 AMDGPUOperand::Ptr defaultFI() const; 1436 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1437 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1438 1439 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1440 AMDGPUOperand::ImmTy Type); 1441 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1442 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1443 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1444 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1445 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1446 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1447 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1448 uint64_t BasicInstType, 1449 bool SkipDstVcc = false, 1450 bool SkipSrcVcc = false); 1451 1452 AMDGPUOperand::Ptr defaultBLGP() const; 1453 AMDGPUOperand::Ptr defaultCBSZ() const; 1454 AMDGPUOperand::Ptr defaultABID() const; 1455 1456 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1457 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1458 }; 1459 1460 struct OptionalOperand { 1461 const char *Name; 1462 AMDGPUOperand::ImmTy Type; 1463 bool IsBit; 1464 bool (*ConvertResult)(int64_t&); 1465 }; 1466 1467 } // end anonymous namespace 1468 1469 // May be called with integer type with equivalent bitwidth. 1470 static const fltSemantics *getFltSemantics(unsigned Size) { 1471 switch (Size) { 1472 case 4: 1473 return &APFloat::IEEEsingle(); 1474 case 8: 1475 return &APFloat::IEEEdouble(); 1476 case 2: 1477 return &APFloat::IEEEhalf(); 1478 default: 1479 llvm_unreachable("unsupported fp type"); 1480 } 1481 } 1482 1483 static const fltSemantics *getFltSemantics(MVT VT) { 1484 return getFltSemantics(VT.getSizeInBits() / 8); 1485 } 1486 1487 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1488 switch (OperandType) { 1489 case AMDGPU::OPERAND_REG_IMM_INT32: 1490 case AMDGPU::OPERAND_REG_IMM_FP32: 1491 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1492 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1493 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1494 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1495 return &APFloat::IEEEsingle(); 1496 case AMDGPU::OPERAND_REG_IMM_INT64: 1497 case AMDGPU::OPERAND_REG_IMM_FP64: 1498 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1499 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1500 return &APFloat::IEEEdouble(); 1501 case AMDGPU::OPERAND_REG_IMM_INT16: 1502 case AMDGPU::OPERAND_REG_IMM_FP16: 1503 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1504 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1505 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1506 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1507 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1508 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1509 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1510 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1511 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1512 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1513 return &APFloat::IEEEhalf(); 1514 default: 1515 llvm_unreachable("unsupported fp type"); 1516 } 1517 } 1518 1519 //===----------------------------------------------------------------------===// 1520 // Operand 1521 //===----------------------------------------------------------------------===// 1522 1523 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1524 bool Lost; 1525 1526 // Convert literal to single precision 1527 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1528 APFloat::rmNearestTiesToEven, 1529 &Lost); 1530 // We allow precision lost but not overflow or underflow 1531 if (Status != APFloat::opOK && 1532 Lost && 1533 ((Status & APFloat::opOverflow) != 0 || 1534 (Status & APFloat::opUnderflow) != 0)) { 1535 return false; 1536 } 1537 1538 return true; 1539 } 1540 1541 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1542 return isUIntN(Size, Val) || isIntN(Size, Val); 1543 } 1544 1545 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1546 1547 // This is a hack to enable named inline values like 1548 // shared_base with both 32-bit and 64-bit operands. 1549 // Note that these values are defined as 1550 // 32-bit operands only. 1551 if (isInlineValue()) { 1552 return true; 1553 } 1554 1555 if (!isImmTy(ImmTyNone)) { 1556 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1557 return false; 1558 } 1559 // TODO: We should avoid using host float here. It would be better to 1560 // check the float bit values which is what a few other places do. 1561 // We've had bot failures before due to weird NaN support on mips hosts. 1562 1563 APInt Literal(64, Imm.Val); 1564 1565 if (Imm.IsFPImm) { // We got fp literal token 1566 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1567 return AMDGPU::isInlinableLiteral64(Imm.Val, 1568 AsmParser->hasInv2PiInlineImm()); 1569 } 1570 1571 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1572 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1573 return false; 1574 1575 if (type.getScalarSizeInBits() == 16) { 1576 return AMDGPU::isInlinableLiteral16( 1577 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1578 AsmParser->hasInv2PiInlineImm()); 1579 } 1580 1581 // Check if single precision literal is inlinable 1582 return AMDGPU::isInlinableLiteral32( 1583 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1584 AsmParser->hasInv2PiInlineImm()); 1585 } 1586 1587 // We got int literal token. 1588 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1589 return AMDGPU::isInlinableLiteral64(Imm.Val, 1590 AsmParser->hasInv2PiInlineImm()); 1591 } 1592 1593 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1594 return false; 1595 } 1596 1597 if (type.getScalarSizeInBits() == 16) { 1598 return AMDGPU::isInlinableLiteral16( 1599 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1600 AsmParser->hasInv2PiInlineImm()); 1601 } 1602 1603 return AMDGPU::isInlinableLiteral32( 1604 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1605 AsmParser->hasInv2PiInlineImm()); 1606 } 1607 1608 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1609 // Check that this immediate can be added as literal 1610 if (!isImmTy(ImmTyNone)) { 1611 return false; 1612 } 1613 1614 if (!Imm.IsFPImm) { 1615 // We got int literal token. 1616 1617 if (type == MVT::f64 && hasFPModifiers()) { 1618 // Cannot apply fp modifiers to int literals preserving the same semantics 1619 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1620 // disable these cases. 1621 return false; 1622 } 1623 1624 unsigned Size = type.getSizeInBits(); 1625 if (Size == 64) 1626 Size = 32; 1627 1628 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1629 // types. 1630 return isSafeTruncation(Imm.Val, Size); 1631 } 1632 1633 // We got fp literal token 1634 if (type == MVT::f64) { // Expected 64-bit fp operand 1635 // We would set low 64-bits of literal to zeroes but we accept this literals 1636 return true; 1637 } 1638 1639 if (type == MVT::i64) { // Expected 64-bit int operand 1640 // We don't allow fp literals in 64-bit integer instructions. It is 1641 // unclear how we should encode them. 1642 return false; 1643 } 1644 1645 // We allow fp literals with f16x2 operands assuming that the specified 1646 // literal goes into the lower half and the upper half is zero. We also 1647 // require that the literal may be losslesly converted to f16. 1648 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1649 (type == MVT::v2i16)? MVT::i16 : type; 1650 1651 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1652 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1653 } 1654 1655 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1656 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1657 } 1658 1659 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1660 if (AsmParser->isVI()) 1661 return isVReg32(); 1662 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1663 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1664 else 1665 return false; 1666 } 1667 1668 bool AMDGPUOperand::isSDWAFP16Operand() const { 1669 return isSDWAOperand(MVT::f16); 1670 } 1671 1672 bool AMDGPUOperand::isSDWAFP32Operand() const { 1673 return isSDWAOperand(MVT::f32); 1674 } 1675 1676 bool AMDGPUOperand::isSDWAInt16Operand() const { 1677 return isSDWAOperand(MVT::i16); 1678 } 1679 1680 bool AMDGPUOperand::isSDWAInt32Operand() const { 1681 return isSDWAOperand(MVT::i32); 1682 } 1683 1684 bool AMDGPUOperand::isBoolReg() const { 1685 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1686 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1687 } 1688 1689 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1690 { 1691 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1692 assert(Size == 2 || Size == 4 || Size == 8); 1693 1694 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1695 1696 if (Imm.Mods.Abs) { 1697 Val &= ~FpSignMask; 1698 } 1699 if (Imm.Mods.Neg) { 1700 Val ^= FpSignMask; 1701 } 1702 1703 return Val; 1704 } 1705 1706 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1707 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1708 Inst.getNumOperands())) { 1709 addLiteralImmOperand(Inst, Imm.Val, 1710 ApplyModifiers & 1711 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1712 } else { 1713 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1714 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1715 } 1716 } 1717 1718 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1719 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1720 auto OpNum = Inst.getNumOperands(); 1721 // Check that this operand accepts literals 1722 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1723 1724 if (ApplyModifiers) { 1725 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1726 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1727 Val = applyInputFPModifiers(Val, Size); 1728 } 1729 1730 APInt Literal(64, Val); 1731 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1732 1733 if (Imm.IsFPImm) { // We got fp literal token 1734 switch (OpTy) { 1735 case AMDGPU::OPERAND_REG_IMM_INT64: 1736 case AMDGPU::OPERAND_REG_IMM_FP64: 1737 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1738 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1739 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1740 AsmParser->hasInv2PiInlineImm())) { 1741 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1742 return; 1743 } 1744 1745 // Non-inlineable 1746 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1747 // For fp operands we check if low 32 bits are zeros 1748 if (Literal.getLoBits(32) != 0) { 1749 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1750 "Can't encode literal as exact 64-bit floating-point operand. " 1751 "Low 32-bits will be set to zero"); 1752 } 1753 1754 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1755 return; 1756 } 1757 1758 // We don't allow fp literals in 64-bit integer instructions. It is 1759 // unclear how we should encode them. This case should be checked earlier 1760 // in predicate methods (isLiteralImm()) 1761 llvm_unreachable("fp literal in 64-bit integer instruction."); 1762 1763 case AMDGPU::OPERAND_REG_IMM_INT32: 1764 case AMDGPU::OPERAND_REG_IMM_FP32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1766 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1768 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1769 case AMDGPU::OPERAND_REG_IMM_INT16: 1770 case AMDGPU::OPERAND_REG_IMM_FP16: 1771 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1772 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1773 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1774 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1775 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1776 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1777 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1778 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1779 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1780 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1781 bool lost; 1782 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1783 // Convert literal to single precision 1784 FPLiteral.convert(*getOpFltSemantics(OpTy), 1785 APFloat::rmNearestTiesToEven, &lost); 1786 // We allow precision lost but not overflow or underflow. This should be 1787 // checked earlier in isLiteralImm() 1788 1789 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1790 Inst.addOperand(MCOperand::createImm(ImmVal)); 1791 return; 1792 } 1793 default: 1794 llvm_unreachable("invalid operand size"); 1795 } 1796 1797 return; 1798 } 1799 1800 // We got int literal token. 1801 // Only sign extend inline immediates. 1802 switch (OpTy) { 1803 case AMDGPU::OPERAND_REG_IMM_INT32: 1804 case AMDGPU::OPERAND_REG_IMM_FP32: 1805 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1806 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1809 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1810 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1811 if (isSafeTruncation(Val, 32) && 1812 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1813 AsmParser->hasInv2PiInlineImm())) { 1814 Inst.addOperand(MCOperand::createImm(Val)); 1815 return; 1816 } 1817 1818 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1819 return; 1820 1821 case AMDGPU::OPERAND_REG_IMM_INT64: 1822 case AMDGPU::OPERAND_REG_IMM_FP64: 1823 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1824 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1825 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1826 Inst.addOperand(MCOperand::createImm(Val)); 1827 return; 1828 } 1829 1830 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1831 return; 1832 1833 case AMDGPU::OPERAND_REG_IMM_INT16: 1834 case AMDGPU::OPERAND_REG_IMM_FP16: 1835 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1836 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1838 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1839 if (isSafeTruncation(Val, 16) && 1840 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1841 AsmParser->hasInv2PiInlineImm())) { 1842 Inst.addOperand(MCOperand::createImm(Val)); 1843 return; 1844 } 1845 1846 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1847 return; 1848 1849 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1850 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1852 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1853 assert(isSafeTruncation(Val, 16)); 1854 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1855 AsmParser->hasInv2PiInlineImm())); 1856 1857 Inst.addOperand(MCOperand::createImm(Val)); 1858 return; 1859 } 1860 default: 1861 llvm_unreachable("invalid operand size"); 1862 } 1863 } 1864 1865 template <unsigned Bitwidth> 1866 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1867 APInt Literal(64, Imm.Val); 1868 1869 if (!Imm.IsFPImm) { 1870 // We got int literal token. 1871 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1872 return; 1873 } 1874 1875 bool Lost; 1876 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1877 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1878 APFloat::rmNearestTiesToEven, &Lost); 1879 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1880 } 1881 1882 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1883 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1884 } 1885 1886 static bool isInlineValue(unsigned Reg) { 1887 switch (Reg) { 1888 case AMDGPU::SRC_SHARED_BASE: 1889 case AMDGPU::SRC_SHARED_LIMIT: 1890 case AMDGPU::SRC_PRIVATE_BASE: 1891 case AMDGPU::SRC_PRIVATE_LIMIT: 1892 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1893 return true; 1894 case AMDGPU::SRC_VCCZ: 1895 case AMDGPU::SRC_EXECZ: 1896 case AMDGPU::SRC_SCC: 1897 return true; 1898 case AMDGPU::SGPR_NULL: 1899 return true; 1900 default: 1901 return false; 1902 } 1903 } 1904 1905 bool AMDGPUOperand::isInlineValue() const { 1906 return isRegKind() && ::isInlineValue(getReg()); 1907 } 1908 1909 //===----------------------------------------------------------------------===// 1910 // AsmParser 1911 //===----------------------------------------------------------------------===// 1912 1913 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1914 if (Is == IS_VGPR) { 1915 switch (RegWidth) { 1916 default: return -1; 1917 case 1: return AMDGPU::VGPR_32RegClassID; 1918 case 2: return AMDGPU::VReg_64RegClassID; 1919 case 3: return AMDGPU::VReg_96RegClassID; 1920 case 4: return AMDGPU::VReg_128RegClassID; 1921 case 5: return AMDGPU::VReg_160RegClassID; 1922 case 6: return AMDGPU::VReg_192RegClassID; 1923 case 8: return AMDGPU::VReg_256RegClassID; 1924 case 16: return AMDGPU::VReg_512RegClassID; 1925 case 32: return AMDGPU::VReg_1024RegClassID; 1926 } 1927 } else if (Is == IS_TTMP) { 1928 switch (RegWidth) { 1929 default: return -1; 1930 case 1: return AMDGPU::TTMP_32RegClassID; 1931 case 2: return AMDGPU::TTMP_64RegClassID; 1932 case 4: return AMDGPU::TTMP_128RegClassID; 1933 case 8: return AMDGPU::TTMP_256RegClassID; 1934 case 16: return AMDGPU::TTMP_512RegClassID; 1935 } 1936 } else if (Is == IS_SGPR) { 1937 switch (RegWidth) { 1938 default: return -1; 1939 case 1: return AMDGPU::SGPR_32RegClassID; 1940 case 2: return AMDGPU::SGPR_64RegClassID; 1941 case 3: return AMDGPU::SGPR_96RegClassID; 1942 case 4: return AMDGPU::SGPR_128RegClassID; 1943 case 5: return AMDGPU::SGPR_160RegClassID; 1944 case 6: return AMDGPU::SGPR_192RegClassID; 1945 case 8: return AMDGPU::SGPR_256RegClassID; 1946 case 16: return AMDGPU::SGPR_512RegClassID; 1947 } 1948 } else if (Is == IS_AGPR) { 1949 switch (RegWidth) { 1950 default: return -1; 1951 case 1: return AMDGPU::AGPR_32RegClassID; 1952 case 2: return AMDGPU::AReg_64RegClassID; 1953 case 3: return AMDGPU::AReg_96RegClassID; 1954 case 4: return AMDGPU::AReg_128RegClassID; 1955 case 5: return AMDGPU::AReg_160RegClassID; 1956 case 6: return AMDGPU::AReg_192RegClassID; 1957 case 8: return AMDGPU::AReg_256RegClassID; 1958 case 16: return AMDGPU::AReg_512RegClassID; 1959 case 32: return AMDGPU::AReg_1024RegClassID; 1960 } 1961 } 1962 return -1; 1963 } 1964 1965 static unsigned getSpecialRegForName(StringRef RegName) { 1966 return StringSwitch<unsigned>(RegName) 1967 .Case("exec", AMDGPU::EXEC) 1968 .Case("vcc", AMDGPU::VCC) 1969 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1970 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1971 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1972 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1973 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1974 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1975 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1976 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1977 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1978 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1979 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1980 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1981 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1982 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1983 .Case("m0", AMDGPU::M0) 1984 .Case("vccz", AMDGPU::SRC_VCCZ) 1985 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1986 .Case("execz", AMDGPU::SRC_EXECZ) 1987 .Case("src_execz", AMDGPU::SRC_EXECZ) 1988 .Case("scc", AMDGPU::SRC_SCC) 1989 .Case("src_scc", AMDGPU::SRC_SCC) 1990 .Case("tba", AMDGPU::TBA) 1991 .Case("tma", AMDGPU::TMA) 1992 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1993 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1994 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1995 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1996 .Case("vcc_lo", AMDGPU::VCC_LO) 1997 .Case("vcc_hi", AMDGPU::VCC_HI) 1998 .Case("exec_lo", AMDGPU::EXEC_LO) 1999 .Case("exec_hi", AMDGPU::EXEC_HI) 2000 .Case("tma_lo", AMDGPU::TMA_LO) 2001 .Case("tma_hi", AMDGPU::TMA_HI) 2002 .Case("tba_lo", AMDGPU::TBA_LO) 2003 .Case("tba_hi", AMDGPU::TBA_HI) 2004 .Case("pc", AMDGPU::PC_REG) 2005 .Case("null", AMDGPU::SGPR_NULL) 2006 .Default(AMDGPU::NoRegister); 2007 } 2008 2009 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2010 SMLoc &EndLoc, bool RestoreOnFailure) { 2011 auto R = parseRegister(); 2012 if (!R) return true; 2013 assert(R->isReg()); 2014 RegNo = R->getReg(); 2015 StartLoc = R->getStartLoc(); 2016 EndLoc = R->getEndLoc(); 2017 return false; 2018 } 2019 2020 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2021 SMLoc &EndLoc) { 2022 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2023 } 2024 2025 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2026 SMLoc &StartLoc, 2027 SMLoc &EndLoc) { 2028 bool Result = 2029 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2030 bool PendingErrors = getParser().hasPendingError(); 2031 getParser().clearPendingErrors(); 2032 if (PendingErrors) 2033 return MatchOperand_ParseFail; 2034 if (Result) 2035 return MatchOperand_NoMatch; 2036 return MatchOperand_Success; 2037 } 2038 2039 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2040 RegisterKind RegKind, unsigned Reg1) { 2041 switch (RegKind) { 2042 case IS_SPECIAL: 2043 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2044 Reg = AMDGPU::EXEC; 2045 RegWidth = 2; 2046 return true; 2047 } 2048 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2049 Reg = AMDGPU::FLAT_SCR; 2050 RegWidth = 2; 2051 return true; 2052 } 2053 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2054 Reg = AMDGPU::XNACK_MASK; 2055 RegWidth = 2; 2056 return true; 2057 } 2058 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2059 Reg = AMDGPU::VCC; 2060 RegWidth = 2; 2061 return true; 2062 } 2063 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2064 Reg = AMDGPU::TBA; 2065 RegWidth = 2; 2066 return true; 2067 } 2068 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2069 Reg = AMDGPU::TMA; 2070 RegWidth = 2; 2071 return true; 2072 } 2073 return false; 2074 case IS_VGPR: 2075 case IS_SGPR: 2076 case IS_AGPR: 2077 case IS_TTMP: 2078 if (Reg1 != Reg + RegWidth) { 2079 return false; 2080 } 2081 RegWidth++; 2082 return true; 2083 default: 2084 llvm_unreachable("unexpected register kind"); 2085 } 2086 } 2087 2088 struct RegInfo { 2089 StringLiteral Name; 2090 RegisterKind Kind; 2091 }; 2092 2093 static constexpr RegInfo RegularRegisters[] = { 2094 {{"v"}, IS_VGPR}, 2095 {{"s"}, IS_SGPR}, 2096 {{"ttmp"}, IS_TTMP}, 2097 {{"acc"}, IS_AGPR}, 2098 {{"a"}, IS_AGPR}, 2099 }; 2100 2101 static bool isRegularReg(RegisterKind Kind) { 2102 return Kind == IS_VGPR || 2103 Kind == IS_SGPR || 2104 Kind == IS_TTMP || 2105 Kind == IS_AGPR; 2106 } 2107 2108 static const RegInfo* getRegularRegInfo(StringRef Str) { 2109 for (const RegInfo &Reg : RegularRegisters) 2110 if (Str.startswith(Reg.Name)) 2111 return &Reg; 2112 return nullptr; 2113 } 2114 2115 static bool getRegNum(StringRef Str, unsigned& Num) { 2116 return !Str.getAsInteger(10, Num); 2117 } 2118 2119 bool 2120 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2121 const AsmToken &NextToken) const { 2122 2123 // A list of consecutive registers: [s0,s1,s2,s3] 2124 if (Token.is(AsmToken::LBrac)) 2125 return true; 2126 2127 if (!Token.is(AsmToken::Identifier)) 2128 return false; 2129 2130 // A single register like s0 or a range of registers like s[0:1] 2131 2132 StringRef Str = Token.getString(); 2133 const RegInfo *Reg = getRegularRegInfo(Str); 2134 if (Reg) { 2135 StringRef RegName = Reg->Name; 2136 StringRef RegSuffix = Str.substr(RegName.size()); 2137 if (!RegSuffix.empty()) { 2138 unsigned Num; 2139 // A single register with an index: rXX 2140 if (getRegNum(RegSuffix, Num)) 2141 return true; 2142 } else { 2143 // A range of registers: r[XX:YY]. 2144 if (NextToken.is(AsmToken::LBrac)) 2145 return true; 2146 } 2147 } 2148 2149 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2150 } 2151 2152 bool 2153 AMDGPUAsmParser::isRegister() 2154 { 2155 return isRegister(getToken(), peekToken()); 2156 } 2157 2158 unsigned 2159 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2160 unsigned RegNum, 2161 unsigned RegWidth) { 2162 2163 assert(isRegularReg(RegKind)); 2164 2165 unsigned AlignSize = 1; 2166 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2167 // SGPR and TTMP registers must be aligned. 2168 // Max required alignment is 4 dwords. 2169 AlignSize = std::min(RegWidth, 4u); 2170 } 2171 2172 if (RegNum % AlignSize != 0) 2173 return AMDGPU::NoRegister; 2174 2175 unsigned RegIdx = RegNum / AlignSize; 2176 int RCID = getRegClass(RegKind, RegWidth); 2177 if (RCID == -1) 2178 return AMDGPU::NoRegister; 2179 2180 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2181 const MCRegisterClass RC = TRI->getRegClass(RCID); 2182 if (RegIdx >= RC.getNumRegs()) 2183 return AMDGPU::NoRegister; 2184 2185 return RC.getRegister(RegIdx); 2186 } 2187 2188 bool 2189 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2190 int64_t RegLo, RegHi; 2191 if (!trySkipToken(AsmToken::LBrac)) 2192 return false; 2193 2194 if (!parseExpr(RegLo)) 2195 return false; 2196 2197 if (trySkipToken(AsmToken::Colon)) { 2198 if (!parseExpr(RegHi)) 2199 return false; 2200 } else { 2201 RegHi = RegLo; 2202 } 2203 2204 if (!trySkipToken(AsmToken::RBrac)) 2205 return false; 2206 2207 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2208 return false; 2209 2210 Num = static_cast<unsigned>(RegLo); 2211 Width = (RegHi - RegLo) + 1; 2212 return true; 2213 } 2214 2215 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2216 unsigned &RegNum, unsigned &RegWidth, 2217 SmallVectorImpl<AsmToken> &Tokens) { 2218 assert(isToken(AsmToken::Identifier)); 2219 unsigned Reg = getSpecialRegForName(getTokenStr()); 2220 if (Reg) { 2221 RegNum = 0; 2222 RegWidth = 1; 2223 RegKind = IS_SPECIAL; 2224 Tokens.push_back(getToken()); 2225 lex(); // skip register name 2226 } 2227 return Reg; 2228 } 2229 2230 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2231 unsigned &RegNum, unsigned &RegWidth, 2232 SmallVectorImpl<AsmToken> &Tokens) { 2233 assert(isToken(AsmToken::Identifier)); 2234 StringRef RegName = getTokenStr(); 2235 2236 const RegInfo *RI = getRegularRegInfo(RegName); 2237 if (!RI) 2238 return AMDGPU::NoRegister; 2239 Tokens.push_back(getToken()); 2240 lex(); // skip register name 2241 2242 RegKind = RI->Kind; 2243 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2244 if (!RegSuffix.empty()) { 2245 // Single 32-bit register: vXX. 2246 if (!getRegNum(RegSuffix, RegNum)) 2247 return AMDGPU::NoRegister; 2248 RegWidth = 1; 2249 } else { 2250 // Range of registers: v[XX:YY]. ":YY" is optional. 2251 if (!ParseRegRange(RegNum, RegWidth)) 2252 return AMDGPU::NoRegister; 2253 } 2254 2255 return getRegularReg(RegKind, RegNum, RegWidth); 2256 } 2257 2258 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2259 unsigned &RegWidth, 2260 SmallVectorImpl<AsmToken> &Tokens) { 2261 unsigned Reg = AMDGPU::NoRegister; 2262 2263 if (!trySkipToken(AsmToken::LBrac)) 2264 return AMDGPU::NoRegister; 2265 2266 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2267 2268 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2269 return AMDGPU::NoRegister; 2270 if (RegWidth != 1) 2271 return AMDGPU::NoRegister; 2272 2273 for (; trySkipToken(AsmToken::Comma); ) { 2274 RegisterKind NextRegKind; 2275 unsigned NextReg, NextRegNum, NextRegWidth; 2276 2277 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2278 Tokens)) 2279 return AMDGPU::NoRegister; 2280 if (NextRegWidth != 1) 2281 return AMDGPU::NoRegister; 2282 if (NextRegKind != RegKind) 2283 return AMDGPU::NoRegister; 2284 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2285 return AMDGPU::NoRegister; 2286 } 2287 2288 if (!trySkipToken(AsmToken::RBrac)) 2289 return AMDGPU::NoRegister; 2290 2291 if (isRegularReg(RegKind)) 2292 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2293 2294 return Reg; 2295 } 2296 2297 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2298 unsigned &RegNum, unsigned &RegWidth, 2299 SmallVectorImpl<AsmToken> &Tokens) { 2300 Reg = AMDGPU::NoRegister; 2301 2302 if (isToken(AsmToken::Identifier)) { 2303 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2304 if (Reg == AMDGPU::NoRegister) 2305 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2306 } else { 2307 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2308 } 2309 2310 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2311 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2312 } 2313 2314 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2315 unsigned &RegNum, unsigned &RegWidth, 2316 bool RestoreOnFailure) { 2317 Reg = AMDGPU::NoRegister; 2318 2319 SmallVector<AsmToken, 1> Tokens; 2320 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2321 if (RestoreOnFailure) { 2322 while (!Tokens.empty()) { 2323 getLexer().UnLex(Tokens.pop_back_val()); 2324 } 2325 } 2326 return true; 2327 } 2328 return false; 2329 } 2330 2331 Optional<StringRef> 2332 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2333 switch (RegKind) { 2334 case IS_VGPR: 2335 return StringRef(".amdgcn.next_free_vgpr"); 2336 case IS_SGPR: 2337 return StringRef(".amdgcn.next_free_sgpr"); 2338 default: 2339 return None; 2340 } 2341 } 2342 2343 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2344 auto SymbolName = getGprCountSymbolName(RegKind); 2345 assert(SymbolName && "initializing invalid register kind"); 2346 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2347 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2348 } 2349 2350 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2351 unsigned DwordRegIndex, 2352 unsigned RegWidth) { 2353 // Symbols are only defined for GCN targets 2354 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2355 return true; 2356 2357 auto SymbolName = getGprCountSymbolName(RegKind); 2358 if (!SymbolName) 2359 return true; 2360 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2361 2362 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2363 int64_t OldCount; 2364 2365 if (!Sym->isVariable()) 2366 return !Error(getParser().getTok().getLoc(), 2367 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2368 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2369 return !Error( 2370 getParser().getTok().getLoc(), 2371 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2372 2373 if (OldCount <= NewMax) 2374 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2375 2376 return true; 2377 } 2378 2379 std::unique_ptr<AMDGPUOperand> 2380 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2381 const auto &Tok = Parser.getTok(); 2382 SMLoc StartLoc = Tok.getLoc(); 2383 SMLoc EndLoc = Tok.getEndLoc(); 2384 RegisterKind RegKind; 2385 unsigned Reg, RegNum, RegWidth; 2386 2387 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2388 //FIXME: improve error messages (bug 41303). 2389 Error(StartLoc, "not a valid operand."); 2390 return nullptr; 2391 } 2392 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2393 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2394 return nullptr; 2395 } else 2396 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2397 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2398 } 2399 2400 OperandMatchResultTy 2401 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2402 // TODO: add syntactic sugar for 1/(2*PI) 2403 2404 assert(!isRegister()); 2405 assert(!isModifier()); 2406 2407 const auto& Tok = getToken(); 2408 const auto& NextTok = peekToken(); 2409 bool IsReal = Tok.is(AsmToken::Real); 2410 SMLoc S = getLoc(); 2411 bool Negate = false; 2412 2413 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2414 lex(); 2415 IsReal = true; 2416 Negate = true; 2417 } 2418 2419 if (IsReal) { 2420 // Floating-point expressions are not supported. 2421 // Can only allow floating-point literals with an 2422 // optional sign. 2423 2424 StringRef Num = getTokenStr(); 2425 lex(); 2426 2427 APFloat RealVal(APFloat::IEEEdouble()); 2428 auto roundMode = APFloat::rmNearestTiesToEven; 2429 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2430 return MatchOperand_ParseFail; 2431 } 2432 if (Negate) 2433 RealVal.changeSign(); 2434 2435 Operands.push_back( 2436 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2437 AMDGPUOperand::ImmTyNone, true)); 2438 2439 return MatchOperand_Success; 2440 2441 } else { 2442 int64_t IntVal; 2443 const MCExpr *Expr; 2444 SMLoc S = getLoc(); 2445 2446 if (HasSP3AbsModifier) { 2447 // This is a workaround for handling expressions 2448 // as arguments of SP3 'abs' modifier, for example: 2449 // |1.0| 2450 // |-1| 2451 // |1+x| 2452 // This syntax is not compatible with syntax of standard 2453 // MC expressions (due to the trailing '|'). 2454 SMLoc EndLoc; 2455 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2456 return MatchOperand_ParseFail; 2457 } else { 2458 if (Parser.parseExpression(Expr)) 2459 return MatchOperand_ParseFail; 2460 } 2461 2462 if (Expr->evaluateAsAbsolute(IntVal)) { 2463 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2464 } else { 2465 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2466 } 2467 2468 return MatchOperand_Success; 2469 } 2470 2471 return MatchOperand_NoMatch; 2472 } 2473 2474 OperandMatchResultTy 2475 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2476 if (!isRegister()) 2477 return MatchOperand_NoMatch; 2478 2479 if (auto R = parseRegister()) { 2480 assert(R->isReg()); 2481 Operands.push_back(std::move(R)); 2482 return MatchOperand_Success; 2483 } 2484 return MatchOperand_ParseFail; 2485 } 2486 2487 OperandMatchResultTy 2488 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2489 auto res = parseReg(Operands); 2490 if (res != MatchOperand_NoMatch) { 2491 return res; 2492 } else if (isModifier()) { 2493 return MatchOperand_NoMatch; 2494 } else { 2495 return parseImm(Operands, HasSP3AbsMod); 2496 } 2497 } 2498 2499 bool 2500 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2501 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2502 const auto &str = Token.getString(); 2503 return str == "abs" || str == "neg" || str == "sext"; 2504 } 2505 return false; 2506 } 2507 2508 bool 2509 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2510 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2511 } 2512 2513 bool 2514 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2515 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2516 } 2517 2518 bool 2519 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2520 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2521 } 2522 2523 // Check if this is an operand modifier or an opcode modifier 2524 // which may look like an expression but it is not. We should 2525 // avoid parsing these modifiers as expressions. Currently 2526 // recognized sequences are: 2527 // |...| 2528 // abs(...) 2529 // neg(...) 2530 // sext(...) 2531 // -reg 2532 // -|...| 2533 // -abs(...) 2534 // name:... 2535 // Note that simple opcode modifiers like 'gds' may be parsed as 2536 // expressions; this is a special case. See getExpressionAsToken. 2537 // 2538 bool 2539 AMDGPUAsmParser::isModifier() { 2540 2541 AsmToken Tok = getToken(); 2542 AsmToken NextToken[2]; 2543 peekTokens(NextToken); 2544 2545 return isOperandModifier(Tok, NextToken[0]) || 2546 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2547 isOpcodeModifierWithVal(Tok, NextToken[0]); 2548 } 2549 2550 // Check if the current token is an SP3 'neg' modifier. 2551 // Currently this modifier is allowed in the following context: 2552 // 2553 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2554 // 2. Before an 'abs' modifier: -abs(...) 2555 // 3. Before an SP3 'abs' modifier: -|...| 2556 // 2557 // In all other cases "-" is handled as a part 2558 // of an expression that follows the sign. 2559 // 2560 // Note: When "-" is followed by an integer literal, 2561 // this is interpreted as integer negation rather 2562 // than a floating-point NEG modifier applied to N. 2563 // Beside being contr-intuitive, such use of floating-point 2564 // NEG modifier would have resulted in different meaning 2565 // of integer literals used with VOP1/2/C and VOP3, 2566 // for example: 2567 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2568 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2569 // Negative fp literals with preceding "-" are 2570 // handled likewise for unifomtity 2571 // 2572 bool 2573 AMDGPUAsmParser::parseSP3NegModifier() { 2574 2575 AsmToken NextToken[2]; 2576 peekTokens(NextToken); 2577 2578 if (isToken(AsmToken::Minus) && 2579 (isRegister(NextToken[0], NextToken[1]) || 2580 NextToken[0].is(AsmToken::Pipe) || 2581 isId(NextToken[0], "abs"))) { 2582 lex(); 2583 return true; 2584 } 2585 2586 return false; 2587 } 2588 2589 OperandMatchResultTy 2590 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2591 bool AllowImm) { 2592 bool Neg, SP3Neg; 2593 bool Abs, SP3Abs; 2594 SMLoc Loc; 2595 2596 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2597 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2598 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2599 return MatchOperand_ParseFail; 2600 } 2601 2602 SP3Neg = parseSP3NegModifier(); 2603 2604 Loc = getLoc(); 2605 Neg = trySkipId("neg"); 2606 if (Neg && SP3Neg) { 2607 Error(Loc, "expected register or immediate"); 2608 return MatchOperand_ParseFail; 2609 } 2610 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2611 return MatchOperand_ParseFail; 2612 2613 Abs = trySkipId("abs"); 2614 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2615 return MatchOperand_ParseFail; 2616 2617 Loc = getLoc(); 2618 SP3Abs = trySkipToken(AsmToken::Pipe); 2619 if (Abs && SP3Abs) { 2620 Error(Loc, "expected register or immediate"); 2621 return MatchOperand_ParseFail; 2622 } 2623 2624 OperandMatchResultTy Res; 2625 if (AllowImm) { 2626 Res = parseRegOrImm(Operands, SP3Abs); 2627 } else { 2628 Res = parseReg(Operands); 2629 } 2630 if (Res != MatchOperand_Success) { 2631 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2632 } 2633 2634 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2635 return MatchOperand_ParseFail; 2636 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2637 return MatchOperand_ParseFail; 2638 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2639 return MatchOperand_ParseFail; 2640 2641 AMDGPUOperand::Modifiers Mods; 2642 Mods.Abs = Abs || SP3Abs; 2643 Mods.Neg = Neg || SP3Neg; 2644 2645 if (Mods.hasFPModifiers()) { 2646 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2647 if (Op.isExpr()) { 2648 Error(Op.getStartLoc(), "expected an absolute expression"); 2649 return MatchOperand_ParseFail; 2650 } 2651 Op.setModifiers(Mods); 2652 } 2653 return MatchOperand_Success; 2654 } 2655 2656 OperandMatchResultTy 2657 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2658 bool AllowImm) { 2659 bool Sext = trySkipId("sext"); 2660 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2661 return MatchOperand_ParseFail; 2662 2663 OperandMatchResultTy Res; 2664 if (AllowImm) { 2665 Res = parseRegOrImm(Operands); 2666 } else { 2667 Res = parseReg(Operands); 2668 } 2669 if (Res != MatchOperand_Success) { 2670 return Sext? MatchOperand_ParseFail : Res; 2671 } 2672 2673 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2674 return MatchOperand_ParseFail; 2675 2676 AMDGPUOperand::Modifiers Mods; 2677 Mods.Sext = Sext; 2678 2679 if (Mods.hasIntModifiers()) { 2680 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2681 if (Op.isExpr()) { 2682 Error(Op.getStartLoc(), "expected an absolute expression"); 2683 return MatchOperand_ParseFail; 2684 } 2685 Op.setModifiers(Mods); 2686 } 2687 2688 return MatchOperand_Success; 2689 } 2690 2691 OperandMatchResultTy 2692 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2693 return parseRegOrImmWithFPInputMods(Operands, false); 2694 } 2695 2696 OperandMatchResultTy 2697 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2698 return parseRegOrImmWithIntInputMods(Operands, false); 2699 } 2700 2701 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2702 auto Loc = getLoc(); 2703 if (trySkipId("off")) { 2704 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2705 AMDGPUOperand::ImmTyOff, false)); 2706 return MatchOperand_Success; 2707 } 2708 2709 if (!isRegister()) 2710 return MatchOperand_NoMatch; 2711 2712 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2713 if (Reg) { 2714 Operands.push_back(std::move(Reg)); 2715 return MatchOperand_Success; 2716 } 2717 2718 return MatchOperand_ParseFail; 2719 2720 } 2721 2722 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2723 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2724 2725 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2726 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2727 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2728 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2729 return Match_InvalidOperand; 2730 2731 if ((TSFlags & SIInstrFlags::VOP3) && 2732 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2733 getForcedEncodingSize() != 64) 2734 return Match_PreferE32; 2735 2736 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2737 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2738 // v_mac_f32/16 allow only dst_sel == DWORD; 2739 auto OpNum = 2740 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2741 const auto &Op = Inst.getOperand(OpNum); 2742 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2743 return Match_InvalidOperand; 2744 } 2745 } 2746 2747 return Match_Success; 2748 } 2749 2750 // What asm variants we should check 2751 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2752 if (getForcedEncodingSize() == 32) { 2753 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2754 return makeArrayRef(Variants); 2755 } 2756 2757 if (isForcedVOP3()) { 2758 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2759 return makeArrayRef(Variants); 2760 } 2761 2762 if (isForcedSDWA()) { 2763 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2764 AMDGPUAsmVariants::SDWA9}; 2765 return makeArrayRef(Variants); 2766 } 2767 2768 if (isForcedDPP()) { 2769 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2770 return makeArrayRef(Variants); 2771 } 2772 2773 static const unsigned Variants[] = { 2774 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2775 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2776 }; 2777 2778 return makeArrayRef(Variants); 2779 } 2780 2781 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2782 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2783 const unsigned Num = Desc.getNumImplicitUses(); 2784 for (unsigned i = 0; i < Num; ++i) { 2785 unsigned Reg = Desc.ImplicitUses[i]; 2786 switch (Reg) { 2787 case AMDGPU::FLAT_SCR: 2788 case AMDGPU::VCC: 2789 case AMDGPU::VCC_LO: 2790 case AMDGPU::VCC_HI: 2791 case AMDGPU::M0: 2792 return Reg; 2793 default: 2794 break; 2795 } 2796 } 2797 return AMDGPU::NoRegister; 2798 } 2799 2800 // NB: This code is correct only when used to check constant 2801 // bus limitations because GFX7 support no f16 inline constants. 2802 // Note that there are no cases when a GFX7 opcode violates 2803 // constant bus limitations due to the use of an f16 constant. 2804 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2805 unsigned OpIdx) const { 2806 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2807 2808 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2809 return false; 2810 } 2811 2812 const MCOperand &MO = Inst.getOperand(OpIdx); 2813 2814 int64_t Val = MO.getImm(); 2815 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2816 2817 switch (OpSize) { // expected operand size 2818 case 8: 2819 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2820 case 4: 2821 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2822 case 2: { 2823 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2824 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2825 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2826 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2827 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2828 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2829 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2830 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2831 } else { 2832 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2833 } 2834 } 2835 default: 2836 llvm_unreachable("invalid operand size"); 2837 } 2838 } 2839 2840 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2841 if (!isGFX10()) 2842 return 1; 2843 2844 switch (Opcode) { 2845 // 64-bit shift instructions can use only one scalar value input 2846 case AMDGPU::V_LSHLREV_B64: 2847 case AMDGPU::V_LSHLREV_B64_gfx10: 2848 case AMDGPU::V_LSHL_B64: 2849 case AMDGPU::V_LSHRREV_B64: 2850 case AMDGPU::V_LSHRREV_B64_gfx10: 2851 case AMDGPU::V_LSHR_B64: 2852 case AMDGPU::V_ASHRREV_I64: 2853 case AMDGPU::V_ASHRREV_I64_gfx10: 2854 case AMDGPU::V_ASHR_I64: 2855 return 1; 2856 default: 2857 return 2; 2858 } 2859 } 2860 2861 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2862 const MCOperand &MO = Inst.getOperand(OpIdx); 2863 if (MO.isImm()) { 2864 return !isInlineConstant(Inst, OpIdx); 2865 } else if (MO.isReg()) { 2866 auto Reg = MO.getReg(); 2867 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2868 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2869 } else { 2870 return true; 2871 } 2872 } 2873 2874 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2875 const unsigned Opcode = Inst.getOpcode(); 2876 const MCInstrDesc &Desc = MII.get(Opcode); 2877 unsigned ConstantBusUseCount = 0; 2878 unsigned NumLiterals = 0; 2879 unsigned LiteralSize; 2880 2881 if (Desc.TSFlags & 2882 (SIInstrFlags::VOPC | 2883 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2884 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2885 SIInstrFlags::SDWA)) { 2886 // Check special imm operands (used by madmk, etc) 2887 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2888 ++ConstantBusUseCount; 2889 } 2890 2891 SmallDenseSet<unsigned> SGPRsUsed; 2892 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2893 if (SGPRUsed != AMDGPU::NoRegister) { 2894 SGPRsUsed.insert(SGPRUsed); 2895 ++ConstantBusUseCount; 2896 } 2897 2898 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2899 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2900 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2901 2902 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2903 2904 for (int OpIdx : OpIndices) { 2905 if (OpIdx == -1) break; 2906 2907 const MCOperand &MO = Inst.getOperand(OpIdx); 2908 if (usesConstantBus(Inst, OpIdx)) { 2909 if (MO.isReg()) { 2910 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2911 // Pairs of registers with a partial intersections like these 2912 // s0, s[0:1] 2913 // flat_scratch_lo, flat_scratch 2914 // flat_scratch_lo, flat_scratch_hi 2915 // are theoretically valid but they are disabled anyway. 2916 // Note that this code mimics SIInstrInfo::verifyInstruction 2917 if (!SGPRsUsed.count(Reg)) { 2918 SGPRsUsed.insert(Reg); 2919 ++ConstantBusUseCount; 2920 } 2921 } else { // Expression or a literal 2922 2923 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2924 continue; // special operand like VINTERP attr_chan 2925 2926 // An instruction may use only one literal. 2927 // This has been validated on the previous step. 2928 // See validateVOP3Literal. 2929 // This literal may be used as more than one operand. 2930 // If all these operands are of the same size, 2931 // this literal counts as one scalar value. 2932 // Otherwise it counts as 2 scalar values. 2933 // See "GFX10 Shader Programming", section 3.6.2.3. 2934 2935 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2936 if (Size < 4) Size = 4; 2937 2938 if (NumLiterals == 0) { 2939 NumLiterals = 1; 2940 LiteralSize = Size; 2941 } else if (LiteralSize != Size) { 2942 NumLiterals = 2; 2943 } 2944 } 2945 } 2946 } 2947 } 2948 ConstantBusUseCount += NumLiterals; 2949 2950 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2951 } 2952 2953 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2954 const unsigned Opcode = Inst.getOpcode(); 2955 const MCInstrDesc &Desc = MII.get(Opcode); 2956 2957 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2958 if (DstIdx == -1 || 2959 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2960 return true; 2961 } 2962 2963 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2964 2965 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2966 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2967 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2968 2969 assert(DstIdx != -1); 2970 const MCOperand &Dst = Inst.getOperand(DstIdx); 2971 assert(Dst.isReg()); 2972 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2973 2974 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2975 2976 for (int SrcIdx : SrcIndices) { 2977 if (SrcIdx == -1) break; 2978 const MCOperand &Src = Inst.getOperand(SrcIdx); 2979 if (Src.isReg()) { 2980 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2981 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2982 return false; 2983 } 2984 } 2985 } 2986 2987 return true; 2988 } 2989 2990 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2991 2992 const unsigned Opc = Inst.getOpcode(); 2993 const MCInstrDesc &Desc = MII.get(Opc); 2994 2995 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2996 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2997 assert(ClampIdx != -1); 2998 return Inst.getOperand(ClampIdx).getImm() == 0; 2999 } 3000 3001 return true; 3002 } 3003 3004 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3005 3006 const unsigned Opc = Inst.getOpcode(); 3007 const MCInstrDesc &Desc = MII.get(Opc); 3008 3009 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3010 return true; 3011 3012 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3013 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3014 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3015 3016 assert(VDataIdx != -1); 3017 assert(DMaskIdx != -1); 3018 assert(TFEIdx != -1); 3019 3020 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3021 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3022 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3023 if (DMask == 0) 3024 DMask = 1; 3025 3026 unsigned DataSize = 3027 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3028 if (hasPackedD16()) { 3029 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3030 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3031 DataSize = (DataSize + 1) / 2; 3032 } 3033 3034 return (VDataSize / 4) == DataSize + TFESize; 3035 } 3036 3037 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3038 const unsigned Opc = Inst.getOpcode(); 3039 const MCInstrDesc &Desc = MII.get(Opc); 3040 3041 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3042 return true; 3043 3044 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3045 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3046 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3047 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3048 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3049 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3050 3051 assert(VAddr0Idx != -1); 3052 assert(SrsrcIdx != -1); 3053 assert(DimIdx != -1); 3054 assert(SrsrcIdx > VAddr0Idx); 3055 3056 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3057 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3058 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3059 unsigned VAddrSize = 3060 IsNSA ? SrsrcIdx - VAddr0Idx 3061 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3062 3063 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3064 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3065 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3066 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3067 if (!IsNSA) { 3068 if (AddrSize > 8) 3069 AddrSize = 16; 3070 else if (AddrSize > 4) 3071 AddrSize = 8; 3072 } 3073 3074 return VAddrSize == AddrSize; 3075 } 3076 3077 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3078 3079 const unsigned Opc = Inst.getOpcode(); 3080 const MCInstrDesc &Desc = MII.get(Opc); 3081 3082 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3083 return true; 3084 if (!Desc.mayLoad() || !Desc.mayStore()) 3085 return true; // Not atomic 3086 3087 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3088 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3089 3090 // This is an incomplete check because image_atomic_cmpswap 3091 // may only use 0x3 and 0xf while other atomic operations 3092 // may use 0x1 and 0x3. However these limitations are 3093 // verified when we check that dmask matches dst size. 3094 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3095 } 3096 3097 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3098 3099 const unsigned Opc = Inst.getOpcode(); 3100 const MCInstrDesc &Desc = MII.get(Opc); 3101 3102 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3103 return true; 3104 3105 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3106 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3107 3108 // GATHER4 instructions use dmask in a different fashion compared to 3109 // other MIMG instructions. The only useful DMASK values are 3110 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3111 // (red,red,red,red) etc.) The ISA document doesn't mention 3112 // this. 3113 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3114 } 3115 3116 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3117 { 3118 switch (Opcode) { 3119 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3120 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3121 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3122 return true; 3123 default: 3124 return false; 3125 } 3126 } 3127 3128 // movrels* opcodes should only allow VGPRS as src0. 3129 // This is specified in .td description for vop1/vop3, 3130 // but sdwa is handled differently. See isSDWAOperand. 3131 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3132 3133 const unsigned Opc = Inst.getOpcode(); 3134 const MCInstrDesc &Desc = MII.get(Opc); 3135 3136 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3137 return true; 3138 3139 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3140 assert(Src0Idx != -1); 3141 3142 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3143 if (!Src0.isReg()) 3144 return false; 3145 3146 auto Reg = Src0.getReg(); 3147 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3148 return !isSGPR(mc2PseudoReg(Reg), TRI); 3149 } 3150 3151 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3152 3153 const unsigned Opc = Inst.getOpcode(); 3154 3155 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3156 return true; 3157 3158 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3159 assert(Src0Idx != -1); 3160 3161 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3162 if (!Src0.isReg()) 3163 return true; 3164 3165 auto Reg = Src0.getReg(); 3166 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3167 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3168 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3169 return false; 3170 } 3171 3172 return true; 3173 } 3174 3175 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3176 3177 const unsigned Opc = Inst.getOpcode(); 3178 const MCInstrDesc &Desc = MII.get(Opc); 3179 3180 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3181 return true; 3182 3183 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3184 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3185 if (isCI() || isSI()) 3186 return false; 3187 } 3188 3189 return true; 3190 } 3191 3192 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3193 const unsigned Opc = Inst.getOpcode(); 3194 const MCInstrDesc &Desc = MII.get(Opc); 3195 3196 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3197 return true; 3198 3199 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3200 if (DimIdx < 0) 3201 return true; 3202 3203 long Imm = Inst.getOperand(DimIdx).getImm(); 3204 if (Imm < 0 || Imm >= 8) 3205 return false; 3206 3207 return true; 3208 } 3209 3210 static bool IsRevOpcode(const unsigned Opcode) 3211 { 3212 switch (Opcode) { 3213 case AMDGPU::V_SUBREV_F32_e32: 3214 case AMDGPU::V_SUBREV_F32_e64: 3215 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3216 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3217 case AMDGPU::V_SUBREV_F32_e32_vi: 3218 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3219 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3220 case AMDGPU::V_SUBREV_F32_e64_vi: 3221 3222 case AMDGPU::V_SUBREV_I32_e32: 3223 case AMDGPU::V_SUBREV_I32_e64: 3224 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3225 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3226 3227 case AMDGPU::V_SUBBREV_U32_e32: 3228 case AMDGPU::V_SUBBREV_U32_e64: 3229 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3230 case AMDGPU::V_SUBBREV_U32_e32_vi: 3231 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3232 case AMDGPU::V_SUBBREV_U32_e64_vi: 3233 3234 case AMDGPU::V_SUBREV_U32_e32: 3235 case AMDGPU::V_SUBREV_U32_e64: 3236 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3237 case AMDGPU::V_SUBREV_U32_e32_vi: 3238 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3239 case AMDGPU::V_SUBREV_U32_e64_vi: 3240 3241 case AMDGPU::V_SUBREV_F16_e32: 3242 case AMDGPU::V_SUBREV_F16_e64: 3243 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3244 case AMDGPU::V_SUBREV_F16_e32_vi: 3245 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3246 case AMDGPU::V_SUBREV_F16_e64_vi: 3247 3248 case AMDGPU::V_SUBREV_U16_e32: 3249 case AMDGPU::V_SUBREV_U16_e64: 3250 case AMDGPU::V_SUBREV_U16_e32_vi: 3251 case AMDGPU::V_SUBREV_U16_e64_vi: 3252 3253 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3254 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3255 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3256 3257 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3258 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3259 3260 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3261 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3262 3263 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3264 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3265 3266 case AMDGPU::V_LSHRREV_B32_e32: 3267 case AMDGPU::V_LSHRREV_B32_e64: 3268 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3269 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3270 case AMDGPU::V_LSHRREV_B32_e32_vi: 3271 case AMDGPU::V_LSHRREV_B32_e64_vi: 3272 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3273 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3274 3275 case AMDGPU::V_ASHRREV_I32_e32: 3276 case AMDGPU::V_ASHRREV_I32_e64: 3277 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3278 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3279 case AMDGPU::V_ASHRREV_I32_e32_vi: 3280 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3281 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3282 case AMDGPU::V_ASHRREV_I32_e64_vi: 3283 3284 case AMDGPU::V_LSHLREV_B32_e32: 3285 case AMDGPU::V_LSHLREV_B32_e64: 3286 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3287 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3288 case AMDGPU::V_LSHLREV_B32_e32_vi: 3289 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3290 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3291 case AMDGPU::V_LSHLREV_B32_e64_vi: 3292 3293 case AMDGPU::V_LSHLREV_B16_e32: 3294 case AMDGPU::V_LSHLREV_B16_e64: 3295 case AMDGPU::V_LSHLREV_B16_e32_vi: 3296 case AMDGPU::V_LSHLREV_B16_e64_vi: 3297 case AMDGPU::V_LSHLREV_B16_gfx10: 3298 3299 case AMDGPU::V_LSHRREV_B16_e32: 3300 case AMDGPU::V_LSHRREV_B16_e64: 3301 case AMDGPU::V_LSHRREV_B16_e32_vi: 3302 case AMDGPU::V_LSHRREV_B16_e64_vi: 3303 case AMDGPU::V_LSHRREV_B16_gfx10: 3304 3305 case AMDGPU::V_ASHRREV_I16_e32: 3306 case AMDGPU::V_ASHRREV_I16_e64: 3307 case AMDGPU::V_ASHRREV_I16_e32_vi: 3308 case AMDGPU::V_ASHRREV_I16_e64_vi: 3309 case AMDGPU::V_ASHRREV_I16_gfx10: 3310 3311 case AMDGPU::V_LSHLREV_B64: 3312 case AMDGPU::V_LSHLREV_B64_gfx10: 3313 case AMDGPU::V_LSHLREV_B64_vi: 3314 3315 case AMDGPU::V_LSHRREV_B64: 3316 case AMDGPU::V_LSHRREV_B64_gfx10: 3317 case AMDGPU::V_LSHRREV_B64_vi: 3318 3319 case AMDGPU::V_ASHRREV_I64: 3320 case AMDGPU::V_ASHRREV_I64_gfx10: 3321 case AMDGPU::V_ASHRREV_I64_vi: 3322 3323 case AMDGPU::V_PK_LSHLREV_B16: 3324 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3325 case AMDGPU::V_PK_LSHLREV_B16_vi: 3326 3327 case AMDGPU::V_PK_LSHRREV_B16: 3328 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3329 case AMDGPU::V_PK_LSHRREV_B16_vi: 3330 case AMDGPU::V_PK_ASHRREV_I16: 3331 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3332 case AMDGPU::V_PK_ASHRREV_I16_vi: 3333 return true; 3334 default: 3335 return false; 3336 } 3337 } 3338 3339 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3340 3341 using namespace SIInstrFlags; 3342 const unsigned Opcode = Inst.getOpcode(); 3343 const MCInstrDesc &Desc = MII.get(Opcode); 3344 3345 // lds_direct register is defined so that it can be used 3346 // with 9-bit operands only. Ignore encodings which do not accept these. 3347 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3348 return true; 3349 3350 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3351 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3352 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3353 3354 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3355 3356 // lds_direct cannot be specified as either src1 or src2. 3357 for (int SrcIdx : SrcIndices) { 3358 if (SrcIdx == -1) break; 3359 const MCOperand &Src = Inst.getOperand(SrcIdx); 3360 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3361 return false; 3362 } 3363 } 3364 3365 if (Src0Idx == -1) 3366 return true; 3367 3368 const MCOperand &Src = Inst.getOperand(Src0Idx); 3369 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3370 return true; 3371 3372 // lds_direct is specified as src0. Check additional limitations. 3373 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3374 } 3375 3376 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3377 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3378 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3379 if (Op.isFlatOffset()) 3380 return Op.getStartLoc(); 3381 } 3382 return getLoc(); 3383 } 3384 3385 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3386 const OperandVector &Operands) { 3387 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3388 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3389 return true; 3390 3391 auto Opcode = Inst.getOpcode(); 3392 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3393 assert(OpNum != -1); 3394 3395 const auto &Op = Inst.getOperand(OpNum); 3396 if (!hasFlatOffsets() && Op.getImm() != 0) { 3397 Error(getFlatOffsetLoc(Operands), 3398 "flat offset modifier is not supported on this GPU"); 3399 return false; 3400 } 3401 3402 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3403 // For FLAT segment the offset must be positive; 3404 // MSB is ignored and forced to zero. 3405 unsigned OffsetSize = isGFX9() ? 13 : 12; 3406 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3407 if (!isIntN(OffsetSize, Op.getImm())) { 3408 Error(getFlatOffsetLoc(Operands), 3409 isGFX9() ? "expected a 13-bit signed offset" : 3410 "expected a 12-bit signed offset"); 3411 return false; 3412 } 3413 } else { 3414 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3415 Error(getFlatOffsetLoc(Operands), 3416 isGFX9() ? "expected a 12-bit unsigned offset" : 3417 "expected an 11-bit unsigned offset"); 3418 return false; 3419 } 3420 } 3421 3422 return true; 3423 } 3424 3425 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3426 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3427 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3428 if (Op.isSMEMOffset()) 3429 return Op.getStartLoc(); 3430 } 3431 return getLoc(); 3432 } 3433 3434 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3435 const OperandVector &Operands) { 3436 if (isCI() || isSI()) 3437 return true; 3438 3439 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3440 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3441 return true; 3442 3443 auto Opcode = Inst.getOpcode(); 3444 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3445 if (OpNum == -1) 3446 return true; 3447 3448 const auto &Op = Inst.getOperand(OpNum); 3449 if (!Op.isImm()) 3450 return true; 3451 3452 uint64_t Offset = Op.getImm(); 3453 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3454 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3455 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3456 return true; 3457 3458 Error(getSMEMOffsetLoc(Operands), 3459 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3460 "expected a 21-bit signed offset"); 3461 3462 return false; 3463 } 3464 3465 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3466 unsigned Opcode = Inst.getOpcode(); 3467 const MCInstrDesc &Desc = MII.get(Opcode); 3468 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3469 return true; 3470 3471 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3472 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3473 3474 const int OpIndices[] = { Src0Idx, Src1Idx }; 3475 3476 unsigned NumExprs = 0; 3477 unsigned NumLiterals = 0; 3478 uint32_t LiteralValue; 3479 3480 for (int OpIdx : OpIndices) { 3481 if (OpIdx == -1) break; 3482 3483 const MCOperand &MO = Inst.getOperand(OpIdx); 3484 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3485 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3486 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3487 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3488 if (NumLiterals == 0 || LiteralValue != Value) { 3489 LiteralValue = Value; 3490 ++NumLiterals; 3491 } 3492 } else if (MO.isExpr()) { 3493 ++NumExprs; 3494 } 3495 } 3496 } 3497 3498 return NumLiterals + NumExprs <= 1; 3499 } 3500 3501 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3502 const unsigned Opc = Inst.getOpcode(); 3503 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3504 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3505 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3506 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3507 3508 if (OpSel & ~3) 3509 return false; 3510 } 3511 return true; 3512 } 3513 3514 // Check if VCC register matches wavefront size 3515 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3516 auto FB = getFeatureBits(); 3517 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3518 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3519 } 3520 3521 // VOP3 literal is only allowed in GFX10+ and only one can be used 3522 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3523 unsigned Opcode = Inst.getOpcode(); 3524 const MCInstrDesc &Desc = MII.get(Opcode); 3525 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3526 return true; 3527 3528 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3529 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3530 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3531 3532 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3533 3534 unsigned NumExprs = 0; 3535 unsigned NumLiterals = 0; 3536 uint32_t LiteralValue; 3537 3538 for (int OpIdx : OpIndices) { 3539 if (OpIdx == -1) break; 3540 3541 const MCOperand &MO = Inst.getOperand(OpIdx); 3542 if (!MO.isImm() && !MO.isExpr()) 3543 continue; 3544 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3545 continue; 3546 3547 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3548 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3549 return false; 3550 3551 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3552 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3553 if (NumLiterals == 0 || LiteralValue != Value) { 3554 LiteralValue = Value; 3555 ++NumLiterals; 3556 } 3557 } else if (MO.isExpr()) { 3558 ++NumExprs; 3559 } 3560 } 3561 NumLiterals += NumExprs; 3562 3563 return !NumLiterals || 3564 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3565 } 3566 3567 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3568 const SMLoc &IDLoc, 3569 const OperandVector &Operands) { 3570 if (!validateLdsDirect(Inst)) { 3571 Error(IDLoc, 3572 "invalid use of lds_direct"); 3573 return false; 3574 } 3575 if (!validateSOPLiteral(Inst)) { 3576 Error(IDLoc, 3577 "only one literal operand is allowed"); 3578 return false; 3579 } 3580 if (!validateVOP3Literal(Inst)) { 3581 Error(IDLoc, 3582 "invalid literal operand"); 3583 return false; 3584 } 3585 if (!validateConstantBusLimitations(Inst)) { 3586 Error(IDLoc, 3587 "invalid operand (violates constant bus restrictions)"); 3588 return false; 3589 } 3590 if (!validateEarlyClobberLimitations(Inst)) { 3591 Error(IDLoc, 3592 "destination must be different than all sources"); 3593 return false; 3594 } 3595 if (!validateIntClampSupported(Inst)) { 3596 Error(IDLoc, 3597 "integer clamping is not supported on this GPU"); 3598 return false; 3599 } 3600 if (!validateOpSel(Inst)) { 3601 Error(IDLoc, 3602 "invalid op_sel operand"); 3603 return false; 3604 } 3605 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3606 if (!validateMIMGD16(Inst)) { 3607 Error(IDLoc, 3608 "d16 modifier is not supported on this GPU"); 3609 return false; 3610 } 3611 if (!validateMIMGDim(Inst)) { 3612 Error(IDLoc, "dim modifier is required on this GPU"); 3613 return false; 3614 } 3615 if (!validateMIMGDataSize(Inst)) { 3616 Error(IDLoc, 3617 "image data size does not match dmask and tfe"); 3618 return false; 3619 } 3620 if (!validateMIMGAddrSize(Inst)) { 3621 Error(IDLoc, 3622 "image address size does not match dim and a16"); 3623 return false; 3624 } 3625 if (!validateMIMGAtomicDMask(Inst)) { 3626 Error(IDLoc, 3627 "invalid atomic image dmask"); 3628 return false; 3629 } 3630 if (!validateMIMGGatherDMask(Inst)) { 3631 Error(IDLoc, 3632 "invalid image_gather dmask: only one bit must be set"); 3633 return false; 3634 } 3635 if (!validateMovrels(Inst)) { 3636 Error(IDLoc, "source operand must be a VGPR"); 3637 return false; 3638 } 3639 if (!validateFlatOffset(Inst, Operands)) { 3640 return false; 3641 } 3642 if (!validateSMEMOffset(Inst, Operands)) { 3643 return false; 3644 } 3645 if (!validateMAIAccWrite(Inst)) { 3646 return false; 3647 } 3648 3649 return true; 3650 } 3651 3652 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3653 const FeatureBitset &FBS, 3654 unsigned VariantID = 0); 3655 3656 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3657 OperandVector &Operands, 3658 MCStreamer &Out, 3659 uint64_t &ErrorInfo, 3660 bool MatchingInlineAsm) { 3661 MCInst Inst; 3662 unsigned Result = Match_Success; 3663 for (auto Variant : getMatchedVariants()) { 3664 uint64_t EI; 3665 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3666 Variant); 3667 // We order match statuses from least to most specific. We use most specific 3668 // status as resulting 3669 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3670 if ((R == Match_Success) || 3671 (R == Match_PreferE32) || 3672 (R == Match_MissingFeature && Result != Match_PreferE32) || 3673 (R == Match_InvalidOperand && Result != Match_MissingFeature 3674 && Result != Match_PreferE32) || 3675 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3676 && Result != Match_MissingFeature 3677 && Result != Match_PreferE32)) { 3678 Result = R; 3679 ErrorInfo = EI; 3680 } 3681 if (R == Match_Success) 3682 break; 3683 } 3684 3685 switch (Result) { 3686 default: break; 3687 case Match_Success: 3688 if (!validateInstruction(Inst, IDLoc, Operands)) { 3689 return true; 3690 } 3691 Inst.setLoc(IDLoc); 3692 Out.emitInstruction(Inst, getSTI()); 3693 return false; 3694 3695 case Match_MissingFeature: 3696 return Error(IDLoc, "instruction not supported on this GPU"); 3697 3698 case Match_MnemonicFail: { 3699 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3700 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3701 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3702 return Error(IDLoc, "invalid instruction" + Suggestion, 3703 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3704 } 3705 3706 case Match_InvalidOperand: { 3707 SMLoc ErrorLoc = IDLoc; 3708 if (ErrorInfo != ~0ULL) { 3709 if (ErrorInfo >= Operands.size()) { 3710 return Error(IDLoc, "too few operands for instruction"); 3711 } 3712 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3713 if (ErrorLoc == SMLoc()) 3714 ErrorLoc = IDLoc; 3715 } 3716 return Error(ErrorLoc, "invalid operand for instruction"); 3717 } 3718 3719 case Match_PreferE32: 3720 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3721 "should be encoded as e32"); 3722 } 3723 llvm_unreachable("Implement any new match types added!"); 3724 } 3725 3726 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3727 int64_t Tmp = -1; 3728 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3729 return true; 3730 } 3731 if (getParser().parseAbsoluteExpression(Tmp)) { 3732 return true; 3733 } 3734 Ret = static_cast<uint32_t>(Tmp); 3735 return false; 3736 } 3737 3738 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3739 uint32_t &Minor) { 3740 if (ParseAsAbsoluteExpression(Major)) 3741 return TokError("invalid major version"); 3742 3743 if (getLexer().isNot(AsmToken::Comma)) 3744 return TokError("minor version number required, comma expected"); 3745 Lex(); 3746 3747 if (ParseAsAbsoluteExpression(Minor)) 3748 return TokError("invalid minor version"); 3749 3750 return false; 3751 } 3752 3753 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3754 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3755 return TokError("directive only supported for amdgcn architecture"); 3756 3757 std::string Target; 3758 3759 SMLoc TargetStart = getTok().getLoc(); 3760 if (getParser().parseEscapedString(Target)) 3761 return true; 3762 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3763 3764 std::string ExpectedTarget; 3765 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3766 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3767 3768 if (Target != ExpectedTargetOS.str()) 3769 return getParser().Error(TargetRange.Start, "target must match options", 3770 TargetRange); 3771 3772 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3773 return false; 3774 } 3775 3776 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3777 return getParser().Error(Range.Start, "value out of range", Range); 3778 } 3779 3780 bool AMDGPUAsmParser::calculateGPRBlocks( 3781 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3782 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3783 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3784 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3785 // TODO(scott.linder): These calculations are duplicated from 3786 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3787 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3788 3789 unsigned NumVGPRs = NextFreeVGPR; 3790 unsigned NumSGPRs = NextFreeSGPR; 3791 3792 if (Version.Major >= 10) 3793 NumSGPRs = 0; 3794 else { 3795 unsigned MaxAddressableNumSGPRs = 3796 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3797 3798 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3799 NumSGPRs > MaxAddressableNumSGPRs) 3800 return OutOfRangeError(SGPRRange); 3801 3802 NumSGPRs += 3803 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3804 3805 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3806 NumSGPRs > MaxAddressableNumSGPRs) 3807 return OutOfRangeError(SGPRRange); 3808 3809 if (Features.test(FeatureSGPRInitBug)) 3810 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3811 } 3812 3813 VGPRBlocks = 3814 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3815 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3816 3817 return false; 3818 } 3819 3820 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3821 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3822 return TokError("directive only supported for amdgcn architecture"); 3823 3824 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3825 return TokError("directive only supported for amdhsa OS"); 3826 3827 StringRef KernelName; 3828 if (getParser().parseIdentifier(KernelName)) 3829 return true; 3830 3831 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3832 3833 StringSet<> Seen; 3834 3835 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3836 3837 SMRange VGPRRange; 3838 uint64_t NextFreeVGPR = 0; 3839 SMRange SGPRRange; 3840 uint64_t NextFreeSGPR = 0; 3841 unsigned UserSGPRCount = 0; 3842 bool ReserveVCC = true; 3843 bool ReserveFlatScr = true; 3844 bool ReserveXNACK = hasXNACK(); 3845 Optional<bool> EnableWavefrontSize32; 3846 3847 while (true) { 3848 while (getLexer().is(AsmToken::EndOfStatement)) 3849 Lex(); 3850 3851 if (getLexer().isNot(AsmToken::Identifier)) 3852 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3853 3854 StringRef ID = getTok().getIdentifier(); 3855 SMRange IDRange = getTok().getLocRange(); 3856 Lex(); 3857 3858 if (ID == ".end_amdhsa_kernel") 3859 break; 3860 3861 if (Seen.find(ID) != Seen.end()) 3862 return TokError(".amdhsa_ directives cannot be repeated"); 3863 Seen.insert(ID); 3864 3865 SMLoc ValStart = getTok().getLoc(); 3866 int64_t IVal; 3867 if (getParser().parseAbsoluteExpression(IVal)) 3868 return true; 3869 SMLoc ValEnd = getTok().getLoc(); 3870 SMRange ValRange = SMRange(ValStart, ValEnd); 3871 3872 if (IVal < 0) 3873 return OutOfRangeError(ValRange); 3874 3875 uint64_t Val = IVal; 3876 3877 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3878 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3879 return OutOfRangeError(RANGE); \ 3880 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3881 3882 if (ID == ".amdhsa_group_segment_fixed_size") { 3883 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3884 return OutOfRangeError(ValRange); 3885 KD.group_segment_fixed_size = Val; 3886 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3887 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3888 return OutOfRangeError(ValRange); 3889 KD.private_segment_fixed_size = Val; 3890 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3891 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3892 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3893 Val, ValRange); 3894 if (Val) 3895 UserSGPRCount += 4; 3896 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3897 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3898 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3899 ValRange); 3900 if (Val) 3901 UserSGPRCount += 2; 3902 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3903 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3904 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3905 ValRange); 3906 if (Val) 3907 UserSGPRCount += 2; 3908 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3909 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3910 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3911 Val, ValRange); 3912 if (Val) 3913 UserSGPRCount += 2; 3914 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3915 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3916 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3917 ValRange); 3918 if (Val) 3919 UserSGPRCount += 2; 3920 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3921 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3922 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3923 ValRange); 3924 if (Val) 3925 UserSGPRCount += 2; 3926 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3927 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3928 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3929 Val, ValRange); 3930 if (Val) 3931 UserSGPRCount += 1; 3932 } else if (ID == ".amdhsa_wavefront_size32") { 3933 if (IVersion.Major < 10) 3934 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3935 IDRange); 3936 EnableWavefrontSize32 = Val; 3937 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3938 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3939 Val, ValRange); 3940 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3941 PARSE_BITS_ENTRY( 3942 KD.compute_pgm_rsrc2, 3943 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3944 ValRange); 3945 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3946 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3947 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3948 ValRange); 3949 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3950 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3951 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3952 ValRange); 3953 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3954 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3955 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3956 ValRange); 3957 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3959 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3960 ValRange); 3961 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3963 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3964 ValRange); 3965 } else if (ID == ".amdhsa_next_free_vgpr") { 3966 VGPRRange = ValRange; 3967 NextFreeVGPR = Val; 3968 } else if (ID == ".amdhsa_next_free_sgpr") { 3969 SGPRRange = ValRange; 3970 NextFreeSGPR = Val; 3971 } else if (ID == ".amdhsa_reserve_vcc") { 3972 if (!isUInt<1>(Val)) 3973 return OutOfRangeError(ValRange); 3974 ReserveVCC = Val; 3975 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3976 if (IVersion.Major < 7) 3977 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3978 IDRange); 3979 if (!isUInt<1>(Val)) 3980 return OutOfRangeError(ValRange); 3981 ReserveFlatScr = Val; 3982 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3983 if (IVersion.Major < 8) 3984 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3985 IDRange); 3986 if (!isUInt<1>(Val)) 3987 return OutOfRangeError(ValRange); 3988 ReserveXNACK = Val; 3989 } else if (ID == ".amdhsa_float_round_mode_32") { 3990 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3991 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3992 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3993 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3994 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3995 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3997 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3998 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3999 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4000 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4001 ValRange); 4002 } else if (ID == ".amdhsa_dx10_clamp") { 4003 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4004 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4005 } else if (ID == ".amdhsa_ieee_mode") { 4006 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4007 Val, ValRange); 4008 } else if (ID == ".amdhsa_fp16_overflow") { 4009 if (IVersion.Major < 9) 4010 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4011 IDRange); 4012 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4013 ValRange); 4014 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4015 if (IVersion.Major < 10) 4016 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4017 IDRange); 4018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4019 ValRange); 4020 } else if (ID == ".amdhsa_memory_ordered") { 4021 if (IVersion.Major < 10) 4022 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4023 IDRange); 4024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4025 ValRange); 4026 } else if (ID == ".amdhsa_forward_progress") { 4027 if (IVersion.Major < 10) 4028 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4029 IDRange); 4030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4031 ValRange); 4032 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4033 PARSE_BITS_ENTRY( 4034 KD.compute_pgm_rsrc2, 4035 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4036 ValRange); 4037 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4039 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4040 Val, ValRange); 4041 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4042 PARSE_BITS_ENTRY( 4043 KD.compute_pgm_rsrc2, 4044 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4045 ValRange); 4046 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4047 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4048 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4049 Val, ValRange); 4050 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4051 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4052 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4053 Val, ValRange); 4054 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4056 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4057 Val, ValRange); 4058 } else if (ID == ".amdhsa_exception_int_div_zero") { 4059 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4060 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4061 Val, ValRange); 4062 } else { 4063 return getParser().Error(IDRange.Start, 4064 "unknown .amdhsa_kernel directive", IDRange); 4065 } 4066 4067 #undef PARSE_BITS_ENTRY 4068 } 4069 4070 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4071 return TokError(".amdhsa_next_free_vgpr directive is required"); 4072 4073 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4074 return TokError(".amdhsa_next_free_sgpr directive is required"); 4075 4076 unsigned VGPRBlocks; 4077 unsigned SGPRBlocks; 4078 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4079 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4080 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4081 SGPRBlocks)) 4082 return true; 4083 4084 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4085 VGPRBlocks)) 4086 return OutOfRangeError(VGPRRange); 4087 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4088 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4089 4090 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4091 SGPRBlocks)) 4092 return OutOfRangeError(SGPRRange); 4093 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4094 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4095 SGPRBlocks); 4096 4097 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4098 return TokError("too many user SGPRs enabled"); 4099 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4100 UserSGPRCount); 4101 4102 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4103 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4104 ReserveFlatScr, ReserveXNACK); 4105 return false; 4106 } 4107 4108 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4109 uint32_t Major; 4110 uint32_t Minor; 4111 4112 if (ParseDirectiveMajorMinor(Major, Minor)) 4113 return true; 4114 4115 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4116 return false; 4117 } 4118 4119 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4120 uint32_t Major; 4121 uint32_t Minor; 4122 uint32_t Stepping; 4123 StringRef VendorName; 4124 StringRef ArchName; 4125 4126 // If this directive has no arguments, then use the ISA version for the 4127 // targeted GPU. 4128 if (getLexer().is(AsmToken::EndOfStatement)) { 4129 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4130 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4131 ISA.Stepping, 4132 "AMD", "AMDGPU"); 4133 return false; 4134 } 4135 4136 if (ParseDirectiveMajorMinor(Major, Minor)) 4137 return true; 4138 4139 if (getLexer().isNot(AsmToken::Comma)) 4140 return TokError("stepping version number required, comma expected"); 4141 Lex(); 4142 4143 if (ParseAsAbsoluteExpression(Stepping)) 4144 return TokError("invalid stepping version"); 4145 4146 if (getLexer().isNot(AsmToken::Comma)) 4147 return TokError("vendor name required, comma expected"); 4148 Lex(); 4149 4150 if (getLexer().isNot(AsmToken::String)) 4151 return TokError("invalid vendor name"); 4152 4153 VendorName = getLexer().getTok().getStringContents(); 4154 Lex(); 4155 4156 if (getLexer().isNot(AsmToken::Comma)) 4157 return TokError("arch name required, comma expected"); 4158 Lex(); 4159 4160 if (getLexer().isNot(AsmToken::String)) 4161 return TokError("invalid arch name"); 4162 4163 ArchName = getLexer().getTok().getStringContents(); 4164 Lex(); 4165 4166 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4167 VendorName, ArchName); 4168 return false; 4169 } 4170 4171 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4172 amd_kernel_code_t &Header) { 4173 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4174 // assembly for backwards compatibility. 4175 if (ID == "max_scratch_backing_memory_byte_size") { 4176 Parser.eatToEndOfStatement(); 4177 return false; 4178 } 4179 4180 SmallString<40> ErrStr; 4181 raw_svector_ostream Err(ErrStr); 4182 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4183 return TokError(Err.str()); 4184 } 4185 Lex(); 4186 4187 if (ID == "enable_wavefront_size32") { 4188 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4189 if (!isGFX10()) 4190 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4191 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4192 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4193 } else { 4194 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4195 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4196 } 4197 } 4198 4199 if (ID == "wavefront_size") { 4200 if (Header.wavefront_size == 5) { 4201 if (!isGFX10()) 4202 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4203 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4204 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4205 } else if (Header.wavefront_size == 6) { 4206 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4207 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4208 } 4209 } 4210 4211 if (ID == "enable_wgp_mode") { 4212 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4213 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4214 } 4215 4216 if (ID == "enable_mem_ordered") { 4217 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4218 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4219 } 4220 4221 if (ID == "enable_fwd_progress") { 4222 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4223 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4224 } 4225 4226 return false; 4227 } 4228 4229 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4230 amd_kernel_code_t Header; 4231 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4232 4233 while (true) { 4234 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4235 // will set the current token to EndOfStatement. 4236 while(getLexer().is(AsmToken::EndOfStatement)) 4237 Lex(); 4238 4239 if (getLexer().isNot(AsmToken::Identifier)) 4240 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4241 4242 StringRef ID = getLexer().getTok().getIdentifier(); 4243 Lex(); 4244 4245 if (ID == ".end_amd_kernel_code_t") 4246 break; 4247 4248 if (ParseAMDKernelCodeTValue(ID, Header)) 4249 return true; 4250 } 4251 4252 getTargetStreamer().EmitAMDKernelCodeT(Header); 4253 4254 return false; 4255 } 4256 4257 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4258 if (getLexer().isNot(AsmToken::Identifier)) 4259 return TokError("expected symbol name"); 4260 4261 StringRef KernelName = Parser.getTok().getString(); 4262 4263 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4264 ELF::STT_AMDGPU_HSA_KERNEL); 4265 Lex(); 4266 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4267 KernelScope.initialize(getContext()); 4268 return false; 4269 } 4270 4271 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4272 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4273 return Error(getParser().getTok().getLoc(), 4274 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4275 "architectures"); 4276 } 4277 4278 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4279 4280 std::string ISAVersionStringFromSTI; 4281 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4282 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4283 4284 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4285 return Error(getParser().getTok().getLoc(), 4286 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4287 "arguments specified through the command line"); 4288 } 4289 4290 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4291 Lex(); 4292 4293 return false; 4294 } 4295 4296 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4297 const char *AssemblerDirectiveBegin; 4298 const char *AssemblerDirectiveEnd; 4299 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4300 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4301 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4302 HSAMD::V3::AssemblerDirectiveEnd) 4303 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4304 HSAMD::AssemblerDirectiveEnd); 4305 4306 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4307 return Error(getParser().getTok().getLoc(), 4308 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4309 "not available on non-amdhsa OSes")).str()); 4310 } 4311 4312 std::string HSAMetadataString; 4313 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4314 HSAMetadataString)) 4315 return true; 4316 4317 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4318 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4319 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4320 } else { 4321 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4322 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4323 } 4324 4325 return false; 4326 } 4327 4328 /// Common code to parse out a block of text (typically YAML) between start and 4329 /// end directives. 4330 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4331 const char *AssemblerDirectiveEnd, 4332 std::string &CollectString) { 4333 4334 raw_string_ostream CollectStream(CollectString); 4335 4336 getLexer().setSkipSpace(false); 4337 4338 bool FoundEnd = false; 4339 while (!getLexer().is(AsmToken::Eof)) { 4340 while (getLexer().is(AsmToken::Space)) { 4341 CollectStream << getLexer().getTok().getString(); 4342 Lex(); 4343 } 4344 4345 if (getLexer().is(AsmToken::Identifier)) { 4346 StringRef ID = getLexer().getTok().getIdentifier(); 4347 if (ID == AssemblerDirectiveEnd) { 4348 Lex(); 4349 FoundEnd = true; 4350 break; 4351 } 4352 } 4353 4354 CollectStream << Parser.parseStringToEndOfStatement() 4355 << getContext().getAsmInfo()->getSeparatorString(); 4356 4357 Parser.eatToEndOfStatement(); 4358 } 4359 4360 getLexer().setSkipSpace(true); 4361 4362 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4363 return TokError(Twine("expected directive ") + 4364 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4365 } 4366 4367 CollectStream.flush(); 4368 return false; 4369 } 4370 4371 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4372 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4373 std::string String; 4374 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4375 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4376 return true; 4377 4378 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4379 if (!PALMetadata->setFromString(String)) 4380 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4381 return false; 4382 } 4383 4384 /// Parse the assembler directive for old linear-format PAL metadata. 4385 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4386 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4387 return Error(getParser().getTok().getLoc(), 4388 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4389 "not available on non-amdpal OSes")).str()); 4390 } 4391 4392 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4393 PALMetadata->setLegacy(); 4394 for (;;) { 4395 uint32_t Key, Value; 4396 if (ParseAsAbsoluteExpression(Key)) { 4397 return TokError(Twine("invalid value in ") + 4398 Twine(PALMD::AssemblerDirective)); 4399 } 4400 if (getLexer().isNot(AsmToken::Comma)) { 4401 return TokError(Twine("expected an even number of values in ") + 4402 Twine(PALMD::AssemblerDirective)); 4403 } 4404 Lex(); 4405 if (ParseAsAbsoluteExpression(Value)) { 4406 return TokError(Twine("invalid value in ") + 4407 Twine(PALMD::AssemblerDirective)); 4408 } 4409 PALMetadata->setRegister(Key, Value); 4410 if (getLexer().isNot(AsmToken::Comma)) 4411 break; 4412 Lex(); 4413 } 4414 return false; 4415 } 4416 4417 /// ParseDirectiveAMDGPULDS 4418 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4419 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4420 if (getParser().checkForValidSection()) 4421 return true; 4422 4423 StringRef Name; 4424 SMLoc NameLoc = getLexer().getLoc(); 4425 if (getParser().parseIdentifier(Name)) 4426 return TokError("expected identifier in directive"); 4427 4428 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4429 if (parseToken(AsmToken::Comma, "expected ','")) 4430 return true; 4431 4432 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4433 4434 int64_t Size; 4435 SMLoc SizeLoc = getLexer().getLoc(); 4436 if (getParser().parseAbsoluteExpression(Size)) 4437 return true; 4438 if (Size < 0) 4439 return Error(SizeLoc, "size must be non-negative"); 4440 if (Size > LocalMemorySize) 4441 return Error(SizeLoc, "size is too large"); 4442 4443 int64_t Align = 4; 4444 if (getLexer().is(AsmToken::Comma)) { 4445 Lex(); 4446 SMLoc AlignLoc = getLexer().getLoc(); 4447 if (getParser().parseAbsoluteExpression(Align)) 4448 return true; 4449 if (Align < 0 || !isPowerOf2_64(Align)) 4450 return Error(AlignLoc, "alignment must be a power of two"); 4451 4452 // Alignment larger than the size of LDS is possible in theory, as long 4453 // as the linker manages to place to symbol at address 0, but we do want 4454 // to make sure the alignment fits nicely into a 32-bit integer. 4455 if (Align >= 1u << 31) 4456 return Error(AlignLoc, "alignment is too large"); 4457 } 4458 4459 if (parseToken(AsmToken::EndOfStatement, 4460 "unexpected token in '.amdgpu_lds' directive")) 4461 return true; 4462 4463 Symbol->redefineIfPossible(); 4464 if (!Symbol->isUndefined()) 4465 return Error(NameLoc, "invalid symbol redefinition"); 4466 4467 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4468 return false; 4469 } 4470 4471 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4472 StringRef IDVal = DirectiveID.getString(); 4473 4474 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4475 if (IDVal == ".amdgcn_target") 4476 return ParseDirectiveAMDGCNTarget(); 4477 4478 if (IDVal == ".amdhsa_kernel") 4479 return ParseDirectiveAMDHSAKernel(); 4480 4481 // TODO: Restructure/combine with PAL metadata directive. 4482 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4483 return ParseDirectiveHSAMetadata(); 4484 } else { 4485 if (IDVal == ".hsa_code_object_version") 4486 return ParseDirectiveHSACodeObjectVersion(); 4487 4488 if (IDVal == ".hsa_code_object_isa") 4489 return ParseDirectiveHSACodeObjectISA(); 4490 4491 if (IDVal == ".amd_kernel_code_t") 4492 return ParseDirectiveAMDKernelCodeT(); 4493 4494 if (IDVal == ".amdgpu_hsa_kernel") 4495 return ParseDirectiveAMDGPUHsaKernel(); 4496 4497 if (IDVal == ".amd_amdgpu_isa") 4498 return ParseDirectiveISAVersion(); 4499 4500 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4501 return ParseDirectiveHSAMetadata(); 4502 } 4503 4504 if (IDVal == ".amdgpu_lds") 4505 return ParseDirectiveAMDGPULDS(); 4506 4507 if (IDVal == PALMD::AssemblerDirectiveBegin) 4508 return ParseDirectivePALMetadataBegin(); 4509 4510 if (IDVal == PALMD::AssemblerDirective) 4511 return ParseDirectivePALMetadata(); 4512 4513 return true; 4514 } 4515 4516 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4517 unsigned RegNo) const { 4518 4519 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4520 R.isValid(); ++R) { 4521 if (*R == RegNo) 4522 return isGFX9() || isGFX10(); 4523 } 4524 4525 // GFX10 has 2 more SGPRs 104 and 105. 4526 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4527 R.isValid(); ++R) { 4528 if (*R == RegNo) 4529 return hasSGPR104_SGPR105(); 4530 } 4531 4532 switch (RegNo) { 4533 case AMDGPU::SRC_SHARED_BASE: 4534 case AMDGPU::SRC_SHARED_LIMIT: 4535 case AMDGPU::SRC_PRIVATE_BASE: 4536 case AMDGPU::SRC_PRIVATE_LIMIT: 4537 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4538 return !isCI() && !isSI() && !isVI(); 4539 case AMDGPU::TBA: 4540 case AMDGPU::TBA_LO: 4541 case AMDGPU::TBA_HI: 4542 case AMDGPU::TMA: 4543 case AMDGPU::TMA_LO: 4544 case AMDGPU::TMA_HI: 4545 return !isGFX9() && !isGFX10(); 4546 case AMDGPU::XNACK_MASK: 4547 case AMDGPU::XNACK_MASK_LO: 4548 case AMDGPU::XNACK_MASK_HI: 4549 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4550 case AMDGPU::SGPR_NULL: 4551 return isGFX10(); 4552 default: 4553 break; 4554 } 4555 4556 if (isCI()) 4557 return true; 4558 4559 if (isSI() || isGFX10()) { 4560 // No flat_scr on SI. 4561 // On GFX10 flat scratch is not a valid register operand and can only be 4562 // accessed with s_setreg/s_getreg. 4563 switch (RegNo) { 4564 case AMDGPU::FLAT_SCR: 4565 case AMDGPU::FLAT_SCR_LO: 4566 case AMDGPU::FLAT_SCR_HI: 4567 return false; 4568 default: 4569 return true; 4570 } 4571 } 4572 4573 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4574 // SI/CI have. 4575 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4576 R.isValid(); ++R) { 4577 if (*R == RegNo) 4578 return hasSGPR102_SGPR103(); 4579 } 4580 4581 return true; 4582 } 4583 4584 OperandMatchResultTy 4585 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4586 OperandMode Mode) { 4587 // Try to parse with a custom parser 4588 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4589 4590 // If we successfully parsed the operand or if there as an error parsing, 4591 // we are done. 4592 // 4593 // If we are parsing after we reach EndOfStatement then this means we 4594 // are appending default values to the Operands list. This is only done 4595 // by custom parser, so we shouldn't continue on to the generic parsing. 4596 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4597 getLexer().is(AsmToken::EndOfStatement)) 4598 return ResTy; 4599 4600 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4601 unsigned Prefix = Operands.size(); 4602 SMLoc LBraceLoc = getTok().getLoc(); 4603 Parser.Lex(); // eat the '[' 4604 4605 for (;;) { 4606 ResTy = parseReg(Operands); 4607 if (ResTy != MatchOperand_Success) 4608 return ResTy; 4609 4610 if (getLexer().is(AsmToken::RBrac)) 4611 break; 4612 4613 if (getLexer().isNot(AsmToken::Comma)) 4614 return MatchOperand_ParseFail; 4615 Parser.Lex(); 4616 } 4617 4618 if (Operands.size() - Prefix > 1) { 4619 Operands.insert(Operands.begin() + Prefix, 4620 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4621 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4622 getTok().getLoc())); 4623 } 4624 4625 Parser.Lex(); // eat the ']' 4626 return MatchOperand_Success; 4627 } 4628 4629 return parseRegOrImm(Operands); 4630 } 4631 4632 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4633 // Clear any forced encodings from the previous instruction. 4634 setForcedEncodingSize(0); 4635 setForcedDPP(false); 4636 setForcedSDWA(false); 4637 4638 if (Name.endswith("_e64")) { 4639 setForcedEncodingSize(64); 4640 return Name.substr(0, Name.size() - 4); 4641 } else if (Name.endswith("_e32")) { 4642 setForcedEncodingSize(32); 4643 return Name.substr(0, Name.size() - 4); 4644 } else if (Name.endswith("_dpp")) { 4645 setForcedDPP(true); 4646 return Name.substr(0, Name.size() - 4); 4647 } else if (Name.endswith("_sdwa")) { 4648 setForcedSDWA(true); 4649 return Name.substr(0, Name.size() - 5); 4650 } 4651 return Name; 4652 } 4653 4654 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4655 StringRef Name, 4656 SMLoc NameLoc, OperandVector &Operands) { 4657 // Add the instruction mnemonic 4658 Name = parseMnemonicSuffix(Name); 4659 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4660 4661 bool IsMIMG = Name.startswith("image_"); 4662 4663 while (!getLexer().is(AsmToken::EndOfStatement)) { 4664 OperandMode Mode = OperandMode_Default; 4665 if (IsMIMG && isGFX10() && Operands.size() == 2) 4666 Mode = OperandMode_NSA; 4667 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4668 4669 // Eat the comma or space if there is one. 4670 if (getLexer().is(AsmToken::Comma)) 4671 Parser.Lex(); 4672 4673 switch (Res) { 4674 case MatchOperand_Success: break; 4675 case MatchOperand_ParseFail: 4676 // FIXME: use real operand location rather than the current location. 4677 Error(getLexer().getLoc(), "failed parsing operand."); 4678 while (!getLexer().is(AsmToken::EndOfStatement)) { 4679 Parser.Lex(); 4680 } 4681 return true; 4682 case MatchOperand_NoMatch: 4683 // FIXME: use real operand location rather than the current location. 4684 Error(getLexer().getLoc(), "not a valid operand."); 4685 while (!getLexer().is(AsmToken::EndOfStatement)) { 4686 Parser.Lex(); 4687 } 4688 return true; 4689 } 4690 } 4691 4692 return false; 4693 } 4694 4695 //===----------------------------------------------------------------------===// 4696 // Utility functions 4697 //===----------------------------------------------------------------------===// 4698 4699 OperandMatchResultTy 4700 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4701 4702 if (!trySkipId(Prefix, AsmToken::Colon)) 4703 return MatchOperand_NoMatch; 4704 4705 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4706 } 4707 4708 OperandMatchResultTy 4709 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4710 AMDGPUOperand::ImmTy ImmTy, 4711 bool (*ConvertResult)(int64_t&)) { 4712 SMLoc S = getLoc(); 4713 int64_t Value = 0; 4714 4715 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4716 if (Res != MatchOperand_Success) 4717 return Res; 4718 4719 if (ConvertResult && !ConvertResult(Value)) { 4720 Error(S, "invalid " + StringRef(Prefix) + " value."); 4721 } 4722 4723 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4724 return MatchOperand_Success; 4725 } 4726 4727 OperandMatchResultTy 4728 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4729 OperandVector &Operands, 4730 AMDGPUOperand::ImmTy ImmTy, 4731 bool (*ConvertResult)(int64_t&)) { 4732 SMLoc S = getLoc(); 4733 if (!trySkipId(Prefix, AsmToken::Colon)) 4734 return MatchOperand_NoMatch; 4735 4736 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4737 return MatchOperand_ParseFail; 4738 4739 unsigned Val = 0; 4740 const unsigned MaxSize = 4; 4741 4742 // FIXME: How to verify the number of elements matches the number of src 4743 // operands? 4744 for (int I = 0; ; ++I) { 4745 int64_t Op; 4746 SMLoc Loc = getLoc(); 4747 if (!parseExpr(Op)) 4748 return MatchOperand_ParseFail; 4749 4750 if (Op != 0 && Op != 1) { 4751 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4752 return MatchOperand_ParseFail; 4753 } 4754 4755 Val |= (Op << I); 4756 4757 if (trySkipToken(AsmToken::RBrac)) 4758 break; 4759 4760 if (I + 1 == MaxSize) { 4761 Error(getLoc(), "expected a closing square bracket"); 4762 return MatchOperand_ParseFail; 4763 } 4764 4765 if (!skipToken(AsmToken::Comma, "expected a comma")) 4766 return MatchOperand_ParseFail; 4767 } 4768 4769 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4770 return MatchOperand_Success; 4771 } 4772 4773 OperandMatchResultTy 4774 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4775 AMDGPUOperand::ImmTy ImmTy) { 4776 int64_t Bit = 0; 4777 SMLoc S = Parser.getTok().getLoc(); 4778 4779 // We are at the end of the statement, and this is a default argument, so 4780 // use a default value. 4781 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4782 switch(getLexer().getKind()) { 4783 case AsmToken::Identifier: { 4784 StringRef Tok = Parser.getTok().getString(); 4785 if (Tok == Name) { 4786 if (Tok == "r128" && !hasMIMG_R128()) 4787 Error(S, "r128 modifier is not supported on this GPU"); 4788 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4789 Error(S, "a16 modifier is not supported on this GPU"); 4790 Bit = 1; 4791 Parser.Lex(); 4792 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4793 Bit = 0; 4794 Parser.Lex(); 4795 } else { 4796 return MatchOperand_NoMatch; 4797 } 4798 break; 4799 } 4800 default: 4801 return MatchOperand_NoMatch; 4802 } 4803 } 4804 4805 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4806 return MatchOperand_ParseFail; 4807 4808 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4809 ImmTy = AMDGPUOperand::ImmTyR128A16; 4810 4811 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4812 return MatchOperand_Success; 4813 } 4814 4815 static void addOptionalImmOperand( 4816 MCInst& Inst, const OperandVector& Operands, 4817 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4818 AMDGPUOperand::ImmTy ImmT, 4819 int64_t Default = 0) { 4820 auto i = OptionalIdx.find(ImmT); 4821 if (i != OptionalIdx.end()) { 4822 unsigned Idx = i->second; 4823 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4824 } else { 4825 Inst.addOperand(MCOperand::createImm(Default)); 4826 } 4827 } 4828 4829 OperandMatchResultTy 4830 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4831 if (getLexer().isNot(AsmToken::Identifier)) { 4832 return MatchOperand_NoMatch; 4833 } 4834 StringRef Tok = Parser.getTok().getString(); 4835 if (Tok != Prefix) { 4836 return MatchOperand_NoMatch; 4837 } 4838 4839 Parser.Lex(); 4840 if (getLexer().isNot(AsmToken::Colon)) { 4841 return MatchOperand_ParseFail; 4842 } 4843 4844 Parser.Lex(); 4845 if (getLexer().isNot(AsmToken::Identifier)) { 4846 return MatchOperand_ParseFail; 4847 } 4848 4849 Value = Parser.getTok().getString(); 4850 return MatchOperand_Success; 4851 } 4852 4853 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4854 // values to live in a joint format operand in the MCInst encoding. 4855 OperandMatchResultTy 4856 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4857 SMLoc S = Parser.getTok().getLoc(); 4858 int64_t Dfmt = 0, Nfmt = 0; 4859 // dfmt and nfmt can appear in either order, and each is optional. 4860 bool GotDfmt = false, GotNfmt = false; 4861 while (!GotDfmt || !GotNfmt) { 4862 if (!GotDfmt) { 4863 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4864 if (Res != MatchOperand_NoMatch) { 4865 if (Res != MatchOperand_Success) 4866 return Res; 4867 if (Dfmt >= 16) { 4868 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4869 return MatchOperand_ParseFail; 4870 } 4871 GotDfmt = true; 4872 Parser.Lex(); 4873 continue; 4874 } 4875 } 4876 if (!GotNfmt) { 4877 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4878 if (Res != MatchOperand_NoMatch) { 4879 if (Res != MatchOperand_Success) 4880 return Res; 4881 if (Nfmt >= 8) { 4882 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4883 return MatchOperand_ParseFail; 4884 } 4885 GotNfmt = true; 4886 Parser.Lex(); 4887 continue; 4888 } 4889 } 4890 break; 4891 } 4892 if (!GotDfmt && !GotNfmt) 4893 return MatchOperand_NoMatch; 4894 auto Format = Dfmt | Nfmt << 4; 4895 Operands.push_back( 4896 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4897 return MatchOperand_Success; 4898 } 4899 4900 //===----------------------------------------------------------------------===// 4901 // ds 4902 //===----------------------------------------------------------------------===// 4903 4904 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4905 const OperandVector &Operands) { 4906 OptionalImmIndexMap OptionalIdx; 4907 4908 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4909 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4910 4911 // Add the register arguments 4912 if (Op.isReg()) { 4913 Op.addRegOperands(Inst, 1); 4914 continue; 4915 } 4916 4917 // Handle optional arguments 4918 OptionalIdx[Op.getImmTy()] = i; 4919 } 4920 4921 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4922 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4924 4925 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4926 } 4927 4928 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4929 bool IsGdsHardcoded) { 4930 OptionalImmIndexMap OptionalIdx; 4931 4932 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4933 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4934 4935 // Add the register arguments 4936 if (Op.isReg()) { 4937 Op.addRegOperands(Inst, 1); 4938 continue; 4939 } 4940 4941 if (Op.isToken() && Op.getToken() == "gds") { 4942 IsGdsHardcoded = true; 4943 continue; 4944 } 4945 4946 // Handle optional arguments 4947 OptionalIdx[Op.getImmTy()] = i; 4948 } 4949 4950 AMDGPUOperand::ImmTy OffsetType = 4951 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4952 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4953 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4954 AMDGPUOperand::ImmTyOffset; 4955 4956 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4957 4958 if (!IsGdsHardcoded) { 4959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4960 } 4961 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4962 } 4963 4964 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4965 OptionalImmIndexMap OptionalIdx; 4966 4967 unsigned OperandIdx[4]; 4968 unsigned EnMask = 0; 4969 int SrcIdx = 0; 4970 4971 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4973 4974 // Add the register arguments 4975 if (Op.isReg()) { 4976 assert(SrcIdx < 4); 4977 OperandIdx[SrcIdx] = Inst.size(); 4978 Op.addRegOperands(Inst, 1); 4979 ++SrcIdx; 4980 continue; 4981 } 4982 4983 if (Op.isOff()) { 4984 assert(SrcIdx < 4); 4985 OperandIdx[SrcIdx] = Inst.size(); 4986 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4987 ++SrcIdx; 4988 continue; 4989 } 4990 4991 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4992 Op.addImmOperands(Inst, 1); 4993 continue; 4994 } 4995 4996 if (Op.isToken() && Op.getToken() == "done") 4997 continue; 4998 4999 // Handle optional arguments 5000 OptionalIdx[Op.getImmTy()] = i; 5001 } 5002 5003 assert(SrcIdx == 4); 5004 5005 bool Compr = false; 5006 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5007 Compr = true; 5008 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5009 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5010 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5011 } 5012 5013 for (auto i = 0; i < SrcIdx; ++i) { 5014 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5015 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5016 } 5017 } 5018 5019 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5020 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5021 5022 Inst.addOperand(MCOperand::createImm(EnMask)); 5023 } 5024 5025 //===----------------------------------------------------------------------===// 5026 // s_waitcnt 5027 //===----------------------------------------------------------------------===// 5028 5029 static bool 5030 encodeCnt( 5031 const AMDGPU::IsaVersion ISA, 5032 int64_t &IntVal, 5033 int64_t CntVal, 5034 bool Saturate, 5035 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5036 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5037 { 5038 bool Failed = false; 5039 5040 IntVal = encode(ISA, IntVal, CntVal); 5041 if (CntVal != decode(ISA, IntVal)) { 5042 if (Saturate) { 5043 IntVal = encode(ISA, IntVal, -1); 5044 } else { 5045 Failed = true; 5046 } 5047 } 5048 return Failed; 5049 } 5050 5051 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5052 5053 SMLoc CntLoc = getLoc(); 5054 StringRef CntName = getTokenStr(); 5055 5056 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5057 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5058 return false; 5059 5060 int64_t CntVal; 5061 SMLoc ValLoc = getLoc(); 5062 if (!parseExpr(CntVal)) 5063 return false; 5064 5065 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5066 5067 bool Failed = true; 5068 bool Sat = CntName.endswith("_sat"); 5069 5070 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5071 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5072 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5073 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5074 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5075 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5076 } else { 5077 Error(CntLoc, "invalid counter name " + CntName); 5078 return false; 5079 } 5080 5081 if (Failed) { 5082 Error(ValLoc, "too large value for " + CntName); 5083 return false; 5084 } 5085 5086 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5087 return false; 5088 5089 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5090 if (isToken(AsmToken::EndOfStatement)) { 5091 Error(getLoc(), "expected a counter name"); 5092 return false; 5093 } 5094 } 5095 5096 return true; 5097 } 5098 5099 OperandMatchResultTy 5100 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5101 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5102 int64_t Waitcnt = getWaitcntBitMask(ISA); 5103 SMLoc S = getLoc(); 5104 5105 // If parse failed, do not return error code 5106 // to avoid excessive error messages. 5107 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5108 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5109 } else { 5110 parseExpr(Waitcnt); 5111 } 5112 5113 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5114 return MatchOperand_Success; 5115 } 5116 5117 bool 5118 AMDGPUOperand::isSWaitCnt() const { 5119 return isImm(); 5120 } 5121 5122 //===----------------------------------------------------------------------===// 5123 // hwreg 5124 //===----------------------------------------------------------------------===// 5125 5126 bool 5127 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5128 int64_t &Offset, 5129 int64_t &Width) { 5130 using namespace llvm::AMDGPU::Hwreg; 5131 5132 // The register may be specified by name or using a numeric code 5133 if (isToken(AsmToken::Identifier) && 5134 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5135 HwReg.IsSymbolic = true; 5136 lex(); // skip message name 5137 } else if (!parseExpr(HwReg.Id)) { 5138 return false; 5139 } 5140 5141 if (trySkipToken(AsmToken::RParen)) 5142 return true; 5143 5144 // parse optional params 5145 return 5146 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5147 parseExpr(Offset) && 5148 skipToken(AsmToken::Comma, "expected a comma") && 5149 parseExpr(Width) && 5150 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5151 } 5152 5153 bool 5154 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5155 const int64_t Offset, 5156 const int64_t Width, 5157 const SMLoc Loc) { 5158 5159 using namespace llvm::AMDGPU::Hwreg; 5160 5161 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5162 Error(Loc, "specified hardware register is not supported on this GPU"); 5163 return false; 5164 } else if (!isValidHwreg(HwReg.Id)) { 5165 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5166 return false; 5167 } else if (!isValidHwregOffset(Offset)) { 5168 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5169 return false; 5170 } else if (!isValidHwregWidth(Width)) { 5171 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5172 return false; 5173 } 5174 return true; 5175 } 5176 5177 OperandMatchResultTy 5178 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5179 using namespace llvm::AMDGPU::Hwreg; 5180 5181 int64_t ImmVal = 0; 5182 SMLoc Loc = getLoc(); 5183 5184 // If parse failed, do not return error code 5185 // to avoid excessive error messages. 5186 if (trySkipId("hwreg", AsmToken::LParen)) { 5187 OperandInfoTy HwReg(ID_UNKNOWN_); 5188 int64_t Offset = OFFSET_DEFAULT_; 5189 int64_t Width = WIDTH_DEFAULT_; 5190 if (parseHwregBody(HwReg, Offset, Width) && 5191 validateHwreg(HwReg, Offset, Width, Loc)) { 5192 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5193 } 5194 } else if (parseExpr(ImmVal)) { 5195 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5196 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5197 } 5198 5199 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5200 return MatchOperand_Success; 5201 } 5202 5203 bool AMDGPUOperand::isHwreg() const { 5204 return isImmTy(ImmTyHwreg); 5205 } 5206 5207 //===----------------------------------------------------------------------===// 5208 // sendmsg 5209 //===----------------------------------------------------------------------===// 5210 5211 bool 5212 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5213 OperandInfoTy &Op, 5214 OperandInfoTy &Stream) { 5215 using namespace llvm::AMDGPU::SendMsg; 5216 5217 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5218 Msg.IsSymbolic = true; 5219 lex(); // skip message name 5220 } else if (!parseExpr(Msg.Id)) { 5221 return false; 5222 } 5223 5224 if (trySkipToken(AsmToken::Comma)) { 5225 Op.IsDefined = true; 5226 if (isToken(AsmToken::Identifier) && 5227 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5228 lex(); // skip operation name 5229 } else if (!parseExpr(Op.Id)) { 5230 return false; 5231 } 5232 5233 if (trySkipToken(AsmToken::Comma)) { 5234 Stream.IsDefined = true; 5235 if (!parseExpr(Stream.Id)) 5236 return false; 5237 } 5238 } 5239 5240 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5241 } 5242 5243 bool 5244 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5245 const OperandInfoTy &Op, 5246 const OperandInfoTy &Stream, 5247 const SMLoc S) { 5248 using namespace llvm::AMDGPU::SendMsg; 5249 5250 // Validation strictness depends on whether message is specified 5251 // in a symbolc or in a numeric form. In the latter case 5252 // only encoding possibility is checked. 5253 bool Strict = Msg.IsSymbolic; 5254 5255 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5256 Error(S, "invalid message id"); 5257 return false; 5258 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5259 Error(S, Op.IsDefined ? 5260 "message does not support operations" : 5261 "missing message operation"); 5262 return false; 5263 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5264 Error(S, "invalid operation id"); 5265 return false; 5266 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5267 Error(S, "message operation does not support streams"); 5268 return false; 5269 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5270 Error(S, "invalid message stream id"); 5271 return false; 5272 } 5273 return true; 5274 } 5275 5276 OperandMatchResultTy 5277 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5278 using namespace llvm::AMDGPU::SendMsg; 5279 5280 int64_t ImmVal = 0; 5281 SMLoc Loc = getLoc(); 5282 5283 // If parse failed, do not return error code 5284 // to avoid excessive error messages. 5285 if (trySkipId("sendmsg", AsmToken::LParen)) { 5286 OperandInfoTy Msg(ID_UNKNOWN_); 5287 OperandInfoTy Op(OP_NONE_); 5288 OperandInfoTy Stream(STREAM_ID_NONE_); 5289 if (parseSendMsgBody(Msg, Op, Stream) && 5290 validateSendMsg(Msg, Op, Stream, Loc)) { 5291 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5292 } 5293 } else if (parseExpr(ImmVal)) { 5294 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5295 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5296 } 5297 5298 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5299 return MatchOperand_Success; 5300 } 5301 5302 bool AMDGPUOperand::isSendMsg() const { 5303 return isImmTy(ImmTySendMsg); 5304 } 5305 5306 //===----------------------------------------------------------------------===// 5307 // v_interp 5308 //===----------------------------------------------------------------------===// 5309 5310 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5311 if (getLexer().getKind() != AsmToken::Identifier) 5312 return MatchOperand_NoMatch; 5313 5314 StringRef Str = Parser.getTok().getString(); 5315 int Slot = StringSwitch<int>(Str) 5316 .Case("p10", 0) 5317 .Case("p20", 1) 5318 .Case("p0", 2) 5319 .Default(-1); 5320 5321 SMLoc S = Parser.getTok().getLoc(); 5322 if (Slot == -1) 5323 return MatchOperand_ParseFail; 5324 5325 Parser.Lex(); 5326 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5327 AMDGPUOperand::ImmTyInterpSlot)); 5328 return MatchOperand_Success; 5329 } 5330 5331 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5332 if (getLexer().getKind() != AsmToken::Identifier) 5333 return MatchOperand_NoMatch; 5334 5335 StringRef Str = Parser.getTok().getString(); 5336 if (!Str.startswith("attr")) 5337 return MatchOperand_NoMatch; 5338 5339 StringRef Chan = Str.take_back(2); 5340 int AttrChan = StringSwitch<int>(Chan) 5341 .Case(".x", 0) 5342 .Case(".y", 1) 5343 .Case(".z", 2) 5344 .Case(".w", 3) 5345 .Default(-1); 5346 if (AttrChan == -1) 5347 return MatchOperand_ParseFail; 5348 5349 Str = Str.drop_back(2).drop_front(4); 5350 5351 uint8_t Attr; 5352 if (Str.getAsInteger(10, Attr)) 5353 return MatchOperand_ParseFail; 5354 5355 SMLoc S = Parser.getTok().getLoc(); 5356 Parser.Lex(); 5357 if (Attr > 63) { 5358 Error(S, "out of bounds attr"); 5359 return MatchOperand_Success; 5360 } 5361 5362 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5363 5364 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5365 AMDGPUOperand::ImmTyInterpAttr)); 5366 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5367 AMDGPUOperand::ImmTyAttrChan)); 5368 return MatchOperand_Success; 5369 } 5370 5371 //===----------------------------------------------------------------------===// 5372 // exp 5373 //===----------------------------------------------------------------------===// 5374 5375 void AMDGPUAsmParser::errorExpTgt() { 5376 Error(Parser.getTok().getLoc(), "invalid exp target"); 5377 } 5378 5379 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5380 uint8_t &Val) { 5381 if (Str == "null") { 5382 Val = 9; 5383 return MatchOperand_Success; 5384 } 5385 5386 if (Str.startswith("mrt")) { 5387 Str = Str.drop_front(3); 5388 if (Str == "z") { // == mrtz 5389 Val = 8; 5390 return MatchOperand_Success; 5391 } 5392 5393 if (Str.getAsInteger(10, Val)) 5394 return MatchOperand_ParseFail; 5395 5396 if (Val > 7) 5397 errorExpTgt(); 5398 5399 return MatchOperand_Success; 5400 } 5401 5402 if (Str.startswith("pos")) { 5403 Str = Str.drop_front(3); 5404 if (Str.getAsInteger(10, Val)) 5405 return MatchOperand_ParseFail; 5406 5407 if (Val > 4 || (Val == 4 && !isGFX10())) 5408 errorExpTgt(); 5409 5410 Val += 12; 5411 return MatchOperand_Success; 5412 } 5413 5414 if (isGFX10() && Str == "prim") { 5415 Val = 20; 5416 return MatchOperand_Success; 5417 } 5418 5419 if (Str.startswith("param")) { 5420 Str = Str.drop_front(5); 5421 if (Str.getAsInteger(10, Val)) 5422 return MatchOperand_ParseFail; 5423 5424 if (Val >= 32) 5425 errorExpTgt(); 5426 5427 Val += 32; 5428 return MatchOperand_Success; 5429 } 5430 5431 if (Str.startswith("invalid_target_")) { 5432 Str = Str.drop_front(15); 5433 if (Str.getAsInteger(10, Val)) 5434 return MatchOperand_ParseFail; 5435 5436 errorExpTgt(); 5437 return MatchOperand_Success; 5438 } 5439 5440 return MatchOperand_NoMatch; 5441 } 5442 5443 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5444 uint8_t Val; 5445 StringRef Str = Parser.getTok().getString(); 5446 5447 auto Res = parseExpTgtImpl(Str, Val); 5448 if (Res != MatchOperand_Success) 5449 return Res; 5450 5451 SMLoc S = Parser.getTok().getLoc(); 5452 Parser.Lex(); 5453 5454 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5455 AMDGPUOperand::ImmTyExpTgt)); 5456 return MatchOperand_Success; 5457 } 5458 5459 //===----------------------------------------------------------------------===// 5460 // parser helpers 5461 //===----------------------------------------------------------------------===// 5462 5463 bool 5464 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5465 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5466 } 5467 5468 bool 5469 AMDGPUAsmParser::isId(const StringRef Id) const { 5470 return isId(getToken(), Id); 5471 } 5472 5473 bool 5474 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5475 return getTokenKind() == Kind; 5476 } 5477 5478 bool 5479 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5480 if (isId(Id)) { 5481 lex(); 5482 return true; 5483 } 5484 return false; 5485 } 5486 5487 bool 5488 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5489 if (isId(Id) && peekToken().is(Kind)) { 5490 lex(); 5491 lex(); 5492 return true; 5493 } 5494 return false; 5495 } 5496 5497 bool 5498 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5499 if (isToken(Kind)) { 5500 lex(); 5501 return true; 5502 } 5503 return false; 5504 } 5505 5506 bool 5507 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5508 const StringRef ErrMsg) { 5509 if (!trySkipToken(Kind)) { 5510 Error(getLoc(), ErrMsg); 5511 return false; 5512 } 5513 return true; 5514 } 5515 5516 bool 5517 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5518 return !getParser().parseAbsoluteExpression(Imm); 5519 } 5520 5521 bool 5522 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5523 SMLoc S = getLoc(); 5524 5525 const MCExpr *Expr; 5526 if (Parser.parseExpression(Expr)) 5527 return false; 5528 5529 int64_t IntVal; 5530 if (Expr->evaluateAsAbsolute(IntVal)) { 5531 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5532 } else { 5533 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5534 } 5535 return true; 5536 } 5537 5538 bool 5539 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5540 if (isToken(AsmToken::String)) { 5541 Val = getToken().getStringContents(); 5542 lex(); 5543 return true; 5544 } else { 5545 Error(getLoc(), ErrMsg); 5546 return false; 5547 } 5548 } 5549 5550 AsmToken 5551 AMDGPUAsmParser::getToken() const { 5552 return Parser.getTok(); 5553 } 5554 5555 AsmToken 5556 AMDGPUAsmParser::peekToken() { 5557 return getLexer().peekTok(); 5558 } 5559 5560 void 5561 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5562 auto TokCount = getLexer().peekTokens(Tokens); 5563 5564 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5565 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5566 } 5567 5568 AsmToken::TokenKind 5569 AMDGPUAsmParser::getTokenKind() const { 5570 return getLexer().getKind(); 5571 } 5572 5573 SMLoc 5574 AMDGPUAsmParser::getLoc() const { 5575 return getToken().getLoc(); 5576 } 5577 5578 StringRef 5579 AMDGPUAsmParser::getTokenStr() const { 5580 return getToken().getString(); 5581 } 5582 5583 void 5584 AMDGPUAsmParser::lex() { 5585 Parser.Lex(); 5586 } 5587 5588 //===----------------------------------------------------------------------===// 5589 // swizzle 5590 //===----------------------------------------------------------------------===// 5591 5592 LLVM_READNONE 5593 static unsigned 5594 encodeBitmaskPerm(const unsigned AndMask, 5595 const unsigned OrMask, 5596 const unsigned XorMask) { 5597 using namespace llvm::AMDGPU::Swizzle; 5598 5599 return BITMASK_PERM_ENC | 5600 (AndMask << BITMASK_AND_SHIFT) | 5601 (OrMask << BITMASK_OR_SHIFT) | 5602 (XorMask << BITMASK_XOR_SHIFT); 5603 } 5604 5605 bool 5606 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5607 const unsigned MinVal, 5608 const unsigned MaxVal, 5609 const StringRef ErrMsg) { 5610 for (unsigned i = 0; i < OpNum; ++i) { 5611 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5612 return false; 5613 } 5614 SMLoc ExprLoc = Parser.getTok().getLoc(); 5615 if (!parseExpr(Op[i])) { 5616 return false; 5617 } 5618 if (Op[i] < MinVal || Op[i] > MaxVal) { 5619 Error(ExprLoc, ErrMsg); 5620 return false; 5621 } 5622 } 5623 5624 return true; 5625 } 5626 5627 bool 5628 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5629 using namespace llvm::AMDGPU::Swizzle; 5630 5631 int64_t Lane[LANE_NUM]; 5632 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5633 "expected a 2-bit lane id")) { 5634 Imm = QUAD_PERM_ENC; 5635 for (unsigned I = 0; I < LANE_NUM; ++I) { 5636 Imm |= Lane[I] << (LANE_SHIFT * I); 5637 } 5638 return true; 5639 } 5640 return false; 5641 } 5642 5643 bool 5644 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5645 using namespace llvm::AMDGPU::Swizzle; 5646 5647 SMLoc S = Parser.getTok().getLoc(); 5648 int64_t GroupSize; 5649 int64_t LaneIdx; 5650 5651 if (!parseSwizzleOperands(1, &GroupSize, 5652 2, 32, 5653 "group size must be in the interval [2,32]")) { 5654 return false; 5655 } 5656 if (!isPowerOf2_64(GroupSize)) { 5657 Error(S, "group size must be a power of two"); 5658 return false; 5659 } 5660 if (parseSwizzleOperands(1, &LaneIdx, 5661 0, GroupSize - 1, 5662 "lane id must be in the interval [0,group size - 1]")) { 5663 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5664 return true; 5665 } 5666 return false; 5667 } 5668 5669 bool 5670 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5671 using namespace llvm::AMDGPU::Swizzle; 5672 5673 SMLoc S = Parser.getTok().getLoc(); 5674 int64_t GroupSize; 5675 5676 if (!parseSwizzleOperands(1, &GroupSize, 5677 2, 32, "group size must be in the interval [2,32]")) { 5678 return false; 5679 } 5680 if (!isPowerOf2_64(GroupSize)) { 5681 Error(S, "group size must be a power of two"); 5682 return false; 5683 } 5684 5685 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5686 return true; 5687 } 5688 5689 bool 5690 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5691 using namespace llvm::AMDGPU::Swizzle; 5692 5693 SMLoc S = Parser.getTok().getLoc(); 5694 int64_t GroupSize; 5695 5696 if (!parseSwizzleOperands(1, &GroupSize, 5697 1, 16, "group size must be in the interval [1,16]")) { 5698 return false; 5699 } 5700 if (!isPowerOf2_64(GroupSize)) { 5701 Error(S, "group size must be a power of two"); 5702 return false; 5703 } 5704 5705 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5706 return true; 5707 } 5708 5709 bool 5710 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5711 using namespace llvm::AMDGPU::Swizzle; 5712 5713 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5714 return false; 5715 } 5716 5717 StringRef Ctl; 5718 SMLoc StrLoc = Parser.getTok().getLoc(); 5719 if (!parseString(Ctl)) { 5720 return false; 5721 } 5722 if (Ctl.size() != BITMASK_WIDTH) { 5723 Error(StrLoc, "expected a 5-character mask"); 5724 return false; 5725 } 5726 5727 unsigned AndMask = 0; 5728 unsigned OrMask = 0; 5729 unsigned XorMask = 0; 5730 5731 for (size_t i = 0; i < Ctl.size(); ++i) { 5732 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5733 switch(Ctl[i]) { 5734 default: 5735 Error(StrLoc, "invalid mask"); 5736 return false; 5737 case '0': 5738 break; 5739 case '1': 5740 OrMask |= Mask; 5741 break; 5742 case 'p': 5743 AndMask |= Mask; 5744 break; 5745 case 'i': 5746 AndMask |= Mask; 5747 XorMask |= Mask; 5748 break; 5749 } 5750 } 5751 5752 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5753 return true; 5754 } 5755 5756 bool 5757 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5758 5759 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5760 5761 if (!parseExpr(Imm)) { 5762 return false; 5763 } 5764 if (!isUInt<16>(Imm)) { 5765 Error(OffsetLoc, "expected a 16-bit offset"); 5766 return false; 5767 } 5768 return true; 5769 } 5770 5771 bool 5772 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5773 using namespace llvm::AMDGPU::Swizzle; 5774 5775 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5776 5777 SMLoc ModeLoc = Parser.getTok().getLoc(); 5778 bool Ok = false; 5779 5780 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5781 Ok = parseSwizzleQuadPerm(Imm); 5782 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5783 Ok = parseSwizzleBitmaskPerm(Imm); 5784 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5785 Ok = parseSwizzleBroadcast(Imm); 5786 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5787 Ok = parseSwizzleSwap(Imm); 5788 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5789 Ok = parseSwizzleReverse(Imm); 5790 } else { 5791 Error(ModeLoc, "expected a swizzle mode"); 5792 } 5793 5794 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5795 } 5796 5797 return false; 5798 } 5799 5800 OperandMatchResultTy 5801 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5802 SMLoc S = Parser.getTok().getLoc(); 5803 int64_t Imm = 0; 5804 5805 if (trySkipId("offset")) { 5806 5807 bool Ok = false; 5808 if (skipToken(AsmToken::Colon, "expected a colon")) { 5809 if (trySkipId("swizzle")) { 5810 Ok = parseSwizzleMacro(Imm); 5811 } else { 5812 Ok = parseSwizzleOffset(Imm); 5813 } 5814 } 5815 5816 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5817 5818 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5819 } else { 5820 // Swizzle "offset" operand is optional. 5821 // If it is omitted, try parsing other optional operands. 5822 return parseOptionalOpr(Operands); 5823 } 5824 } 5825 5826 bool 5827 AMDGPUOperand::isSwizzle() const { 5828 return isImmTy(ImmTySwizzle); 5829 } 5830 5831 //===----------------------------------------------------------------------===// 5832 // VGPR Index Mode 5833 //===----------------------------------------------------------------------===// 5834 5835 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5836 5837 using namespace llvm::AMDGPU::VGPRIndexMode; 5838 5839 if (trySkipToken(AsmToken::RParen)) { 5840 return OFF; 5841 } 5842 5843 int64_t Imm = 0; 5844 5845 while (true) { 5846 unsigned Mode = 0; 5847 SMLoc S = Parser.getTok().getLoc(); 5848 5849 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5850 if (trySkipId(IdSymbolic[ModeId])) { 5851 Mode = 1 << ModeId; 5852 break; 5853 } 5854 } 5855 5856 if (Mode == 0) { 5857 Error(S, (Imm == 0)? 5858 "expected a VGPR index mode or a closing parenthesis" : 5859 "expected a VGPR index mode"); 5860 break; 5861 } 5862 5863 if (Imm & Mode) { 5864 Error(S, "duplicate VGPR index mode"); 5865 break; 5866 } 5867 Imm |= Mode; 5868 5869 if (trySkipToken(AsmToken::RParen)) 5870 break; 5871 if (!skipToken(AsmToken::Comma, 5872 "expected a comma or a closing parenthesis")) 5873 break; 5874 } 5875 5876 return Imm; 5877 } 5878 5879 OperandMatchResultTy 5880 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5881 5882 int64_t Imm = 0; 5883 SMLoc S = Parser.getTok().getLoc(); 5884 5885 if (getLexer().getKind() == AsmToken::Identifier && 5886 Parser.getTok().getString() == "gpr_idx" && 5887 getLexer().peekTok().is(AsmToken::LParen)) { 5888 5889 Parser.Lex(); 5890 Parser.Lex(); 5891 5892 // If parse failed, trigger an error but do not return error code 5893 // to avoid excessive error messages. 5894 Imm = parseGPRIdxMacro(); 5895 5896 } else { 5897 if (getParser().parseAbsoluteExpression(Imm)) 5898 return MatchOperand_NoMatch; 5899 if (Imm < 0 || !isUInt<4>(Imm)) { 5900 Error(S, "invalid immediate: only 4-bit values are legal"); 5901 } 5902 } 5903 5904 Operands.push_back( 5905 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5906 return MatchOperand_Success; 5907 } 5908 5909 bool AMDGPUOperand::isGPRIdxMode() const { 5910 return isImmTy(ImmTyGprIdxMode); 5911 } 5912 5913 //===----------------------------------------------------------------------===// 5914 // sopp branch targets 5915 //===----------------------------------------------------------------------===// 5916 5917 OperandMatchResultTy 5918 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5919 5920 // Make sure we are not parsing something 5921 // that looks like a label or an expression but is not. 5922 // This will improve error messages. 5923 if (isRegister() || isModifier()) 5924 return MatchOperand_NoMatch; 5925 5926 if (parseExpr(Operands)) { 5927 5928 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5929 assert(Opr.isImm() || Opr.isExpr()); 5930 SMLoc Loc = Opr.getStartLoc(); 5931 5932 // Currently we do not support arbitrary expressions as branch targets. 5933 // Only labels and absolute expressions are accepted. 5934 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5935 Error(Loc, "expected an absolute expression or a label"); 5936 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5937 Error(Loc, "expected a 16-bit signed jump offset"); 5938 } 5939 } 5940 5941 return MatchOperand_Success; // avoid excessive error messages 5942 } 5943 5944 //===----------------------------------------------------------------------===// 5945 // Boolean holding registers 5946 //===----------------------------------------------------------------------===// 5947 5948 OperandMatchResultTy 5949 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5950 return parseReg(Operands); 5951 } 5952 5953 //===----------------------------------------------------------------------===// 5954 // mubuf 5955 //===----------------------------------------------------------------------===// 5956 5957 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5958 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5959 } 5960 5961 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5962 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5963 } 5964 5965 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5966 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5967 } 5968 5969 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5970 const OperandVector &Operands, 5971 bool IsAtomic, 5972 bool IsAtomicReturn, 5973 bool IsLds) { 5974 bool IsLdsOpcode = IsLds; 5975 bool HasLdsModifier = false; 5976 OptionalImmIndexMap OptionalIdx; 5977 assert(IsAtomicReturn ? IsAtomic : true); 5978 unsigned FirstOperandIdx = 1; 5979 5980 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5981 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5982 5983 // Add the register arguments 5984 if (Op.isReg()) { 5985 Op.addRegOperands(Inst, 1); 5986 // Insert a tied src for atomic return dst. 5987 // This cannot be postponed as subsequent calls to 5988 // addImmOperands rely on correct number of MC operands. 5989 if (IsAtomicReturn && i == FirstOperandIdx) 5990 Op.addRegOperands(Inst, 1); 5991 continue; 5992 } 5993 5994 // Handle the case where soffset is an immediate 5995 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5996 Op.addImmOperands(Inst, 1); 5997 continue; 5998 } 5999 6000 HasLdsModifier |= Op.isLDS(); 6001 6002 // Handle tokens like 'offen' which are sometimes hard-coded into the 6003 // asm string. There are no MCInst operands for these. 6004 if (Op.isToken()) { 6005 continue; 6006 } 6007 assert(Op.isImm()); 6008 6009 // Handle optional arguments 6010 OptionalIdx[Op.getImmTy()] = i; 6011 } 6012 6013 // This is a workaround for an llvm quirk which may result in an 6014 // incorrect instruction selection. Lds and non-lds versions of 6015 // MUBUF instructions are identical except that lds versions 6016 // have mandatory 'lds' modifier. However this modifier follows 6017 // optional modifiers and llvm asm matcher regards this 'lds' 6018 // modifier as an optional one. As a result, an lds version 6019 // of opcode may be selected even if it has no 'lds' modifier. 6020 if (IsLdsOpcode && !HasLdsModifier) { 6021 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6022 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6023 Inst.setOpcode(NoLdsOpcode); 6024 IsLdsOpcode = false; 6025 } 6026 } 6027 6028 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6029 if (!IsAtomic) { // glc is hard-coded. 6030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6031 } 6032 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6033 6034 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6036 } 6037 6038 if (isGFX10()) 6039 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6040 } 6041 6042 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6043 OptionalImmIndexMap OptionalIdx; 6044 6045 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6046 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6047 6048 // Add the register arguments 6049 if (Op.isReg()) { 6050 Op.addRegOperands(Inst, 1); 6051 continue; 6052 } 6053 6054 // Handle the case where soffset is an immediate 6055 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6056 Op.addImmOperands(Inst, 1); 6057 continue; 6058 } 6059 6060 // Handle tokens like 'offen' which are sometimes hard-coded into the 6061 // asm string. There are no MCInst operands for these. 6062 if (Op.isToken()) { 6063 continue; 6064 } 6065 assert(Op.isImm()); 6066 6067 // Handle optional arguments 6068 OptionalIdx[Op.getImmTy()] = i; 6069 } 6070 6071 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6072 AMDGPUOperand::ImmTyOffset); 6073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6077 6078 if (isGFX10()) 6079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6080 } 6081 6082 //===----------------------------------------------------------------------===// 6083 // mimg 6084 //===----------------------------------------------------------------------===// 6085 6086 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6087 bool IsAtomic) { 6088 unsigned I = 1; 6089 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6090 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6091 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6092 } 6093 6094 if (IsAtomic) { 6095 // Add src, same as dst 6096 assert(Desc.getNumDefs() == 1); 6097 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6098 } 6099 6100 OptionalImmIndexMap OptionalIdx; 6101 6102 for (unsigned E = Operands.size(); I != E; ++I) { 6103 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6104 6105 // Add the register arguments 6106 if (Op.isReg()) { 6107 Op.addRegOperands(Inst, 1); 6108 } else if (Op.isImmModifier()) { 6109 OptionalIdx[Op.getImmTy()] = I; 6110 } else if (!Op.isToken()) { 6111 llvm_unreachable("unexpected operand type"); 6112 } 6113 } 6114 6115 bool IsGFX10 = isGFX10(); 6116 6117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6118 if (IsGFX10) 6119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6121 if (IsGFX10) 6122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6126 if (IsGFX10) 6127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6130 if (!IsGFX10) 6131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6133 } 6134 6135 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6136 cvtMIMG(Inst, Operands, true); 6137 } 6138 6139 //===----------------------------------------------------------------------===// 6140 // smrd 6141 //===----------------------------------------------------------------------===// 6142 6143 bool AMDGPUOperand::isSMRDOffset8() const { 6144 return isImm() && isUInt<8>(getImm()); 6145 } 6146 6147 bool AMDGPUOperand::isSMEMOffset() const { 6148 return isImm(); // Offset range is checked later by validator. 6149 } 6150 6151 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6152 // 32-bit literals are only supported on CI and we only want to use them 6153 // when the offset is > 8-bits. 6154 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6155 } 6156 6157 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6158 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6159 } 6160 6161 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6162 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6163 } 6164 6165 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6166 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6167 } 6168 6169 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6170 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6171 } 6172 6173 //===----------------------------------------------------------------------===// 6174 // vop3 6175 //===----------------------------------------------------------------------===// 6176 6177 static bool ConvertOmodMul(int64_t &Mul) { 6178 if (Mul != 1 && Mul != 2 && Mul != 4) 6179 return false; 6180 6181 Mul >>= 1; 6182 return true; 6183 } 6184 6185 static bool ConvertOmodDiv(int64_t &Div) { 6186 if (Div == 1) { 6187 Div = 0; 6188 return true; 6189 } 6190 6191 if (Div == 2) { 6192 Div = 3; 6193 return true; 6194 } 6195 6196 return false; 6197 } 6198 6199 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6200 if (BoundCtrl == 0) { 6201 BoundCtrl = 1; 6202 return true; 6203 } 6204 6205 if (BoundCtrl == -1) { 6206 BoundCtrl = 0; 6207 return true; 6208 } 6209 6210 return false; 6211 } 6212 6213 // Note: the order in this table matches the order of operands in AsmString. 6214 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6215 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6216 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6217 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6218 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6219 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6220 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6221 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6222 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6223 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6224 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6225 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6226 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6227 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6228 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6229 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6230 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6231 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6232 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6233 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6234 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6235 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6236 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6237 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6238 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6239 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6240 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6241 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6242 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6243 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6244 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6245 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6246 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6247 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6248 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6249 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6250 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6251 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6252 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6253 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6254 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6255 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6256 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6257 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6258 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6259 }; 6260 6261 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6262 6263 OperandMatchResultTy res = parseOptionalOpr(Operands); 6264 6265 // This is a hack to enable hardcoded mandatory operands which follow 6266 // optional operands. 6267 // 6268 // Current design assumes that all operands after the first optional operand 6269 // are also optional. However implementation of some instructions violates 6270 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6271 // 6272 // To alleviate this problem, we have to (implicitly) parse extra operands 6273 // to make sure autogenerated parser of custom operands never hit hardcoded 6274 // mandatory operands. 6275 6276 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6277 if (res != MatchOperand_Success || 6278 isToken(AsmToken::EndOfStatement)) 6279 break; 6280 6281 trySkipToken(AsmToken::Comma); 6282 res = parseOptionalOpr(Operands); 6283 } 6284 6285 return res; 6286 } 6287 6288 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6289 OperandMatchResultTy res; 6290 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6291 // try to parse any optional operand here 6292 if (Op.IsBit) { 6293 res = parseNamedBit(Op.Name, Operands, Op.Type); 6294 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6295 res = parseOModOperand(Operands); 6296 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6297 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6298 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6299 res = parseSDWASel(Operands, Op.Name, Op.Type); 6300 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6301 res = parseSDWADstUnused(Operands); 6302 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6303 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6304 Op.Type == AMDGPUOperand::ImmTyNegLo || 6305 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6306 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6307 Op.ConvertResult); 6308 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6309 res = parseDim(Operands); 6310 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6311 res = parseDfmtNfmt(Operands); 6312 } else { 6313 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6314 } 6315 if (res != MatchOperand_NoMatch) { 6316 return res; 6317 } 6318 } 6319 return MatchOperand_NoMatch; 6320 } 6321 6322 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6323 StringRef Name = Parser.getTok().getString(); 6324 if (Name == "mul") { 6325 return parseIntWithPrefix("mul", Operands, 6326 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6327 } 6328 6329 if (Name == "div") { 6330 return parseIntWithPrefix("div", Operands, 6331 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6332 } 6333 6334 return MatchOperand_NoMatch; 6335 } 6336 6337 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6338 cvtVOP3P(Inst, Operands); 6339 6340 int Opc = Inst.getOpcode(); 6341 6342 int SrcNum; 6343 const int Ops[] = { AMDGPU::OpName::src0, 6344 AMDGPU::OpName::src1, 6345 AMDGPU::OpName::src2 }; 6346 for (SrcNum = 0; 6347 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6348 ++SrcNum); 6349 assert(SrcNum > 0); 6350 6351 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6352 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6353 6354 if ((OpSel & (1 << SrcNum)) != 0) { 6355 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6356 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6357 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6358 } 6359 } 6360 6361 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6362 // 1. This operand is input modifiers 6363 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6364 // 2. This is not last operand 6365 && Desc.NumOperands > (OpNum + 1) 6366 // 3. Next operand is register class 6367 && Desc.OpInfo[OpNum + 1].RegClass != -1 6368 // 4. Next register is not tied to any other operand 6369 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6370 } 6371 6372 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6373 { 6374 OptionalImmIndexMap OptionalIdx; 6375 unsigned Opc = Inst.getOpcode(); 6376 6377 unsigned I = 1; 6378 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6379 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6380 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6381 } 6382 6383 for (unsigned E = Operands.size(); I != E; ++I) { 6384 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6385 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6386 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6387 } else if (Op.isInterpSlot() || 6388 Op.isInterpAttr() || 6389 Op.isAttrChan()) { 6390 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6391 } else if (Op.isImmModifier()) { 6392 OptionalIdx[Op.getImmTy()] = I; 6393 } else { 6394 llvm_unreachable("unhandled operand type"); 6395 } 6396 } 6397 6398 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6400 } 6401 6402 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6404 } 6405 6406 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6407 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6408 } 6409 } 6410 6411 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6412 OptionalImmIndexMap &OptionalIdx) { 6413 unsigned Opc = Inst.getOpcode(); 6414 6415 unsigned I = 1; 6416 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6417 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6418 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6419 } 6420 6421 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6422 // This instruction has src modifiers 6423 for (unsigned E = Operands.size(); I != E; ++I) { 6424 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6425 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6426 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6427 } else if (Op.isImmModifier()) { 6428 OptionalIdx[Op.getImmTy()] = I; 6429 } else if (Op.isRegOrImm()) { 6430 Op.addRegOrImmOperands(Inst, 1); 6431 } else { 6432 llvm_unreachable("unhandled operand type"); 6433 } 6434 } 6435 } else { 6436 // No src modifiers 6437 for (unsigned E = Operands.size(); I != E; ++I) { 6438 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6439 if (Op.isMod()) { 6440 OptionalIdx[Op.getImmTy()] = I; 6441 } else { 6442 Op.addRegOrImmOperands(Inst, 1); 6443 } 6444 } 6445 } 6446 6447 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6449 } 6450 6451 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6453 } 6454 6455 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6456 // it has src2 register operand that is tied to dst operand 6457 // we don't allow modifiers for this operand in assembler so src2_modifiers 6458 // should be 0. 6459 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6460 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6461 Opc == AMDGPU::V_MAC_F32_e64_vi || 6462 Opc == AMDGPU::V_MAC_F16_e64_vi || 6463 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6464 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6465 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6466 auto it = Inst.begin(); 6467 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6468 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6469 ++it; 6470 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6471 } 6472 } 6473 6474 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6475 OptionalImmIndexMap OptionalIdx; 6476 cvtVOP3(Inst, Operands, OptionalIdx); 6477 } 6478 6479 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6480 const OperandVector &Operands) { 6481 OptionalImmIndexMap OptIdx; 6482 const int Opc = Inst.getOpcode(); 6483 const MCInstrDesc &Desc = MII.get(Opc); 6484 6485 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6486 6487 cvtVOP3(Inst, Operands, OptIdx); 6488 6489 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6490 assert(!IsPacked); 6491 Inst.addOperand(Inst.getOperand(0)); 6492 } 6493 6494 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6495 // instruction, and then figure out where to actually put the modifiers 6496 6497 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6498 6499 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6500 if (OpSelHiIdx != -1) { 6501 int DefaultVal = IsPacked ? -1 : 0; 6502 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6503 DefaultVal); 6504 } 6505 6506 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6507 if (NegLoIdx != -1) { 6508 assert(IsPacked); 6509 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6510 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6511 } 6512 6513 const int Ops[] = { AMDGPU::OpName::src0, 6514 AMDGPU::OpName::src1, 6515 AMDGPU::OpName::src2 }; 6516 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6517 AMDGPU::OpName::src1_modifiers, 6518 AMDGPU::OpName::src2_modifiers }; 6519 6520 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6521 6522 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6523 unsigned OpSelHi = 0; 6524 unsigned NegLo = 0; 6525 unsigned NegHi = 0; 6526 6527 if (OpSelHiIdx != -1) { 6528 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6529 } 6530 6531 if (NegLoIdx != -1) { 6532 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6533 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6534 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6535 } 6536 6537 for (int J = 0; J < 3; ++J) { 6538 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6539 if (OpIdx == -1) 6540 break; 6541 6542 uint32_t ModVal = 0; 6543 6544 if ((OpSel & (1 << J)) != 0) 6545 ModVal |= SISrcMods::OP_SEL_0; 6546 6547 if ((OpSelHi & (1 << J)) != 0) 6548 ModVal |= SISrcMods::OP_SEL_1; 6549 6550 if ((NegLo & (1 << J)) != 0) 6551 ModVal |= SISrcMods::NEG; 6552 6553 if ((NegHi & (1 << J)) != 0) 6554 ModVal |= SISrcMods::NEG_HI; 6555 6556 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6557 6558 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6559 } 6560 } 6561 6562 //===----------------------------------------------------------------------===// 6563 // dpp 6564 //===----------------------------------------------------------------------===// 6565 6566 bool AMDGPUOperand::isDPP8() const { 6567 return isImmTy(ImmTyDPP8); 6568 } 6569 6570 bool AMDGPUOperand::isDPPCtrl() const { 6571 using namespace AMDGPU::DPP; 6572 6573 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6574 if (result) { 6575 int64_t Imm = getImm(); 6576 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6577 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6578 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6579 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6580 (Imm == DppCtrl::WAVE_SHL1) || 6581 (Imm == DppCtrl::WAVE_ROL1) || 6582 (Imm == DppCtrl::WAVE_SHR1) || 6583 (Imm == DppCtrl::WAVE_ROR1) || 6584 (Imm == DppCtrl::ROW_MIRROR) || 6585 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6586 (Imm == DppCtrl::BCAST15) || 6587 (Imm == DppCtrl::BCAST31) || 6588 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6589 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6590 } 6591 return false; 6592 } 6593 6594 //===----------------------------------------------------------------------===// 6595 // mAI 6596 //===----------------------------------------------------------------------===// 6597 6598 bool AMDGPUOperand::isBLGP() const { 6599 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6600 } 6601 6602 bool AMDGPUOperand::isCBSZ() const { 6603 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6604 } 6605 6606 bool AMDGPUOperand::isABID() const { 6607 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6608 } 6609 6610 bool AMDGPUOperand::isS16Imm() const { 6611 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6612 } 6613 6614 bool AMDGPUOperand::isU16Imm() const { 6615 return isImm() && isUInt<16>(getImm()); 6616 } 6617 6618 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6619 if (!isGFX10()) 6620 return MatchOperand_NoMatch; 6621 6622 SMLoc S = Parser.getTok().getLoc(); 6623 6624 if (getLexer().isNot(AsmToken::Identifier)) 6625 return MatchOperand_NoMatch; 6626 if (getLexer().getTok().getString() != "dim") 6627 return MatchOperand_NoMatch; 6628 6629 Parser.Lex(); 6630 if (getLexer().isNot(AsmToken::Colon)) 6631 return MatchOperand_ParseFail; 6632 6633 Parser.Lex(); 6634 6635 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6636 // integer. 6637 std::string Token; 6638 if (getLexer().is(AsmToken::Integer)) { 6639 SMLoc Loc = getLexer().getTok().getEndLoc(); 6640 Token = std::string(getLexer().getTok().getString()); 6641 Parser.Lex(); 6642 if (getLexer().getTok().getLoc() != Loc) 6643 return MatchOperand_ParseFail; 6644 } 6645 if (getLexer().isNot(AsmToken::Identifier)) 6646 return MatchOperand_ParseFail; 6647 Token += getLexer().getTok().getString(); 6648 6649 StringRef DimId = Token; 6650 if (DimId.startswith("SQ_RSRC_IMG_")) 6651 DimId = DimId.substr(12); 6652 6653 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6654 if (!DimInfo) 6655 return MatchOperand_ParseFail; 6656 6657 Parser.Lex(); 6658 6659 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6660 AMDGPUOperand::ImmTyDim)); 6661 return MatchOperand_Success; 6662 } 6663 6664 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6665 SMLoc S = Parser.getTok().getLoc(); 6666 StringRef Prefix; 6667 6668 if (getLexer().getKind() == AsmToken::Identifier) { 6669 Prefix = Parser.getTok().getString(); 6670 } else { 6671 return MatchOperand_NoMatch; 6672 } 6673 6674 if (Prefix != "dpp8") 6675 return parseDPPCtrl(Operands); 6676 if (!isGFX10()) 6677 return MatchOperand_NoMatch; 6678 6679 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6680 6681 int64_t Sels[8]; 6682 6683 Parser.Lex(); 6684 if (getLexer().isNot(AsmToken::Colon)) 6685 return MatchOperand_ParseFail; 6686 6687 Parser.Lex(); 6688 if (getLexer().isNot(AsmToken::LBrac)) 6689 return MatchOperand_ParseFail; 6690 6691 Parser.Lex(); 6692 if (getParser().parseAbsoluteExpression(Sels[0])) 6693 return MatchOperand_ParseFail; 6694 if (0 > Sels[0] || 7 < Sels[0]) 6695 return MatchOperand_ParseFail; 6696 6697 for (size_t i = 1; i < 8; ++i) { 6698 if (getLexer().isNot(AsmToken::Comma)) 6699 return MatchOperand_ParseFail; 6700 6701 Parser.Lex(); 6702 if (getParser().parseAbsoluteExpression(Sels[i])) 6703 return MatchOperand_ParseFail; 6704 if (0 > Sels[i] || 7 < Sels[i]) 6705 return MatchOperand_ParseFail; 6706 } 6707 6708 if (getLexer().isNot(AsmToken::RBrac)) 6709 return MatchOperand_ParseFail; 6710 Parser.Lex(); 6711 6712 unsigned DPP8 = 0; 6713 for (size_t i = 0; i < 8; ++i) 6714 DPP8 |= (Sels[i] << (i * 3)); 6715 6716 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6717 return MatchOperand_Success; 6718 } 6719 6720 OperandMatchResultTy 6721 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6722 using namespace AMDGPU::DPP; 6723 6724 SMLoc S = Parser.getTok().getLoc(); 6725 StringRef Prefix; 6726 int64_t Int; 6727 6728 if (getLexer().getKind() == AsmToken::Identifier) { 6729 Prefix = Parser.getTok().getString(); 6730 } else { 6731 return MatchOperand_NoMatch; 6732 } 6733 6734 if (Prefix == "row_mirror") { 6735 Int = DppCtrl::ROW_MIRROR; 6736 Parser.Lex(); 6737 } else if (Prefix == "row_half_mirror") { 6738 Int = DppCtrl::ROW_HALF_MIRROR; 6739 Parser.Lex(); 6740 } else { 6741 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6742 if (Prefix != "quad_perm" 6743 && Prefix != "row_shl" 6744 && Prefix != "row_shr" 6745 && Prefix != "row_ror" 6746 && Prefix != "wave_shl" 6747 && Prefix != "wave_rol" 6748 && Prefix != "wave_shr" 6749 && Prefix != "wave_ror" 6750 && Prefix != "row_bcast" 6751 && Prefix != "row_share" 6752 && Prefix != "row_xmask") { 6753 return MatchOperand_NoMatch; 6754 } 6755 6756 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6757 return MatchOperand_NoMatch; 6758 6759 if (!isVI() && !isGFX9() && 6760 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6761 Prefix == "wave_rol" || Prefix == "wave_ror" || 6762 Prefix == "row_bcast")) 6763 return MatchOperand_NoMatch; 6764 6765 Parser.Lex(); 6766 if (getLexer().isNot(AsmToken::Colon)) 6767 return MatchOperand_ParseFail; 6768 6769 if (Prefix == "quad_perm") { 6770 // quad_perm:[%d,%d,%d,%d] 6771 Parser.Lex(); 6772 if (getLexer().isNot(AsmToken::LBrac)) 6773 return MatchOperand_ParseFail; 6774 Parser.Lex(); 6775 6776 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6777 return MatchOperand_ParseFail; 6778 6779 for (int i = 0; i < 3; ++i) { 6780 if (getLexer().isNot(AsmToken::Comma)) 6781 return MatchOperand_ParseFail; 6782 Parser.Lex(); 6783 6784 int64_t Temp; 6785 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6786 return MatchOperand_ParseFail; 6787 const int shift = i*2 + 2; 6788 Int += (Temp << shift); 6789 } 6790 6791 if (getLexer().isNot(AsmToken::RBrac)) 6792 return MatchOperand_ParseFail; 6793 Parser.Lex(); 6794 } else { 6795 // sel:%d 6796 Parser.Lex(); 6797 if (getParser().parseAbsoluteExpression(Int)) 6798 return MatchOperand_ParseFail; 6799 6800 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6801 Int |= DppCtrl::ROW_SHL0; 6802 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6803 Int |= DppCtrl::ROW_SHR0; 6804 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6805 Int |= DppCtrl::ROW_ROR0; 6806 } else if (Prefix == "wave_shl" && 1 == Int) { 6807 Int = DppCtrl::WAVE_SHL1; 6808 } else if (Prefix == "wave_rol" && 1 == Int) { 6809 Int = DppCtrl::WAVE_ROL1; 6810 } else if (Prefix == "wave_shr" && 1 == Int) { 6811 Int = DppCtrl::WAVE_SHR1; 6812 } else if (Prefix == "wave_ror" && 1 == Int) { 6813 Int = DppCtrl::WAVE_ROR1; 6814 } else if (Prefix == "row_bcast") { 6815 if (Int == 15) { 6816 Int = DppCtrl::BCAST15; 6817 } else if (Int == 31) { 6818 Int = DppCtrl::BCAST31; 6819 } else { 6820 return MatchOperand_ParseFail; 6821 } 6822 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6823 Int |= DppCtrl::ROW_SHARE_FIRST; 6824 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6825 Int |= DppCtrl::ROW_XMASK_FIRST; 6826 } else { 6827 return MatchOperand_ParseFail; 6828 } 6829 } 6830 } 6831 6832 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6833 return MatchOperand_Success; 6834 } 6835 6836 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6837 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6838 } 6839 6840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6841 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6842 } 6843 6844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6845 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6846 } 6847 6848 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6849 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6850 } 6851 6852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6853 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6854 } 6855 6856 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6857 OptionalImmIndexMap OptionalIdx; 6858 6859 unsigned I = 1; 6860 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6861 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6862 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6863 } 6864 6865 int Fi = 0; 6866 for (unsigned E = Operands.size(); I != E; ++I) { 6867 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6868 MCOI::TIED_TO); 6869 if (TiedTo != -1) { 6870 assert((unsigned)TiedTo < Inst.getNumOperands()); 6871 // handle tied old or src2 for MAC instructions 6872 Inst.addOperand(Inst.getOperand(TiedTo)); 6873 } 6874 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6875 // Add the register arguments 6876 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6877 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6878 // Skip it. 6879 continue; 6880 } 6881 6882 if (IsDPP8) { 6883 if (Op.isDPP8()) { 6884 Op.addImmOperands(Inst, 1); 6885 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6886 Op.addRegWithFPInputModsOperands(Inst, 2); 6887 } else if (Op.isFI()) { 6888 Fi = Op.getImm(); 6889 } else if (Op.isReg()) { 6890 Op.addRegOperands(Inst, 1); 6891 } else { 6892 llvm_unreachable("Invalid operand type"); 6893 } 6894 } else { 6895 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6896 Op.addRegWithFPInputModsOperands(Inst, 2); 6897 } else if (Op.isDPPCtrl()) { 6898 Op.addImmOperands(Inst, 1); 6899 } else if (Op.isImm()) { 6900 // Handle optional arguments 6901 OptionalIdx[Op.getImmTy()] = I; 6902 } else { 6903 llvm_unreachable("Invalid operand type"); 6904 } 6905 } 6906 } 6907 6908 if (IsDPP8) { 6909 using namespace llvm::AMDGPU::DPP; 6910 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6911 } else { 6912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6915 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6916 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6917 } 6918 } 6919 } 6920 6921 //===----------------------------------------------------------------------===// 6922 // sdwa 6923 //===----------------------------------------------------------------------===// 6924 6925 OperandMatchResultTy 6926 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6927 AMDGPUOperand::ImmTy Type) { 6928 using namespace llvm::AMDGPU::SDWA; 6929 6930 SMLoc S = Parser.getTok().getLoc(); 6931 StringRef Value; 6932 OperandMatchResultTy res; 6933 6934 res = parseStringWithPrefix(Prefix, Value); 6935 if (res != MatchOperand_Success) { 6936 return res; 6937 } 6938 6939 int64_t Int; 6940 Int = StringSwitch<int64_t>(Value) 6941 .Case("BYTE_0", SdwaSel::BYTE_0) 6942 .Case("BYTE_1", SdwaSel::BYTE_1) 6943 .Case("BYTE_2", SdwaSel::BYTE_2) 6944 .Case("BYTE_3", SdwaSel::BYTE_3) 6945 .Case("WORD_0", SdwaSel::WORD_0) 6946 .Case("WORD_1", SdwaSel::WORD_1) 6947 .Case("DWORD", SdwaSel::DWORD) 6948 .Default(0xffffffff); 6949 Parser.Lex(); // eat last token 6950 6951 if (Int == 0xffffffff) { 6952 return MatchOperand_ParseFail; 6953 } 6954 6955 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6956 return MatchOperand_Success; 6957 } 6958 6959 OperandMatchResultTy 6960 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6961 using namespace llvm::AMDGPU::SDWA; 6962 6963 SMLoc S = Parser.getTok().getLoc(); 6964 StringRef Value; 6965 OperandMatchResultTy res; 6966 6967 res = parseStringWithPrefix("dst_unused", Value); 6968 if (res != MatchOperand_Success) { 6969 return res; 6970 } 6971 6972 int64_t Int; 6973 Int = StringSwitch<int64_t>(Value) 6974 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6975 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6976 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6977 .Default(0xffffffff); 6978 Parser.Lex(); // eat last token 6979 6980 if (Int == 0xffffffff) { 6981 return MatchOperand_ParseFail; 6982 } 6983 6984 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6985 return MatchOperand_Success; 6986 } 6987 6988 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6989 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6990 } 6991 6992 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6993 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6994 } 6995 6996 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6997 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6998 } 6999 7000 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7001 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7002 } 7003 7004 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7005 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7006 } 7007 7008 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7009 uint64_t BasicInstType, 7010 bool SkipDstVcc, 7011 bool SkipSrcVcc) { 7012 using namespace llvm::AMDGPU::SDWA; 7013 7014 OptionalImmIndexMap OptionalIdx; 7015 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7016 bool SkippedVcc = false; 7017 7018 unsigned I = 1; 7019 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7020 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7021 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7022 } 7023 7024 for (unsigned E = Operands.size(); I != E; ++I) { 7025 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7026 if (SkipVcc && !SkippedVcc && Op.isReg() && 7027 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7028 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7029 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7030 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7031 // Skip VCC only if we didn't skip it on previous iteration. 7032 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7033 if (BasicInstType == SIInstrFlags::VOP2 && 7034 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7035 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7036 SkippedVcc = true; 7037 continue; 7038 } else if (BasicInstType == SIInstrFlags::VOPC && 7039 Inst.getNumOperands() == 0) { 7040 SkippedVcc = true; 7041 continue; 7042 } 7043 } 7044 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7045 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7046 } else if (Op.isImm()) { 7047 // Handle optional arguments 7048 OptionalIdx[Op.getImmTy()] = I; 7049 } else { 7050 llvm_unreachable("Invalid operand type"); 7051 } 7052 SkippedVcc = false; 7053 } 7054 7055 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7056 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7057 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7058 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7059 switch (BasicInstType) { 7060 case SIInstrFlags::VOP1: 7061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7062 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7063 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7064 } 7065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7068 break; 7069 7070 case SIInstrFlags::VOP2: 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7072 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7074 } 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7077 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7079 break; 7080 7081 case SIInstrFlags::VOPC: 7082 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7086 break; 7087 7088 default: 7089 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7090 } 7091 } 7092 7093 // special case v_mac_{f16, f32}: 7094 // it has src2 register operand that is tied to dst operand 7095 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7096 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7097 auto it = Inst.begin(); 7098 std::advance( 7099 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7100 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7101 } 7102 } 7103 7104 //===----------------------------------------------------------------------===// 7105 // mAI 7106 //===----------------------------------------------------------------------===// 7107 7108 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7109 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7110 } 7111 7112 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7113 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7114 } 7115 7116 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7117 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7118 } 7119 7120 /// Force static initialization. 7121 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7122 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7123 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7124 } 7125 7126 #define GET_REGISTER_MATCHER 7127 #define GET_MATCHER_IMPLEMENTATION 7128 #define GET_MNEMONIC_SPELL_CHECKER 7129 #include "AMDGPUGenAsmMatcher.inc" 7130 7131 // This fuction should be defined after auto-generated include so that we have 7132 // MatchClassKind enum defined 7133 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7134 unsigned Kind) { 7135 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7136 // But MatchInstructionImpl() expects to meet token and fails to validate 7137 // operand. This method checks if we are given immediate operand but expect to 7138 // get corresponding token. 7139 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7140 switch (Kind) { 7141 case MCK_addr64: 7142 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7143 case MCK_gds: 7144 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7145 case MCK_lds: 7146 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7147 case MCK_glc: 7148 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7149 case MCK_idxen: 7150 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7151 case MCK_offen: 7152 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7153 case MCK_SSrcB32: 7154 // When operands have expression values, they will return true for isToken, 7155 // because it is not possible to distinguish between a token and an 7156 // expression at parse time. MatchInstructionImpl() will always try to 7157 // match an operand as a token, when isToken returns true, and when the 7158 // name of the expression is not a valid token, the match will fail, 7159 // so we need to handle it here. 7160 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7161 case MCK_SSrcF32: 7162 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7163 case MCK_SoppBrTarget: 7164 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7165 case MCK_VReg32OrOff: 7166 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7167 case MCK_InterpSlot: 7168 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7169 case MCK_Attr: 7170 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7171 case MCK_AttrChan: 7172 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7173 case MCK_ImmSMEMOffset: 7174 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7175 case MCK_SReg_64: 7176 case MCK_SReg_64_XEXEC: 7177 // Null is defined as a 32-bit register but 7178 // it should also be enabled with 64-bit operands. 7179 // The following code enables it for SReg_64 operands 7180 // used as source and destination. Remaining source 7181 // operands are handled in isInlinableImm. 7182 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7183 default: 7184 return Match_InvalidOperand; 7185 } 7186 } 7187 7188 //===----------------------------------------------------------------------===// 7189 // endpgm 7190 //===----------------------------------------------------------------------===// 7191 7192 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7193 SMLoc S = Parser.getTok().getLoc(); 7194 int64_t Imm = 0; 7195 7196 if (!parseExpr(Imm)) { 7197 // The operand is optional, if not present default to 0 7198 Imm = 0; 7199 } 7200 7201 if (!isUInt<16>(Imm)) { 7202 Error(S, "expected a 16-bit value"); 7203 return MatchOperand_ParseFail; 7204 } 7205 7206 Operands.push_back( 7207 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7208 return MatchOperand_Success; 7209 } 7210 7211 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7212