1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isSymbolRefExpr(); 224 } 225 226 bool isSymbolRefExpr() const { 227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 228 } 229 230 bool isImm() const override { 231 return Kind == Immediate; 232 } 233 234 bool isInlinableImm(MVT type) const; 235 bool isLiteralImm(MVT type) const; 236 237 bool isRegKind() const { 238 return Kind == Register; 239 } 240 241 bool isReg() const override { 242 return isRegKind() && !hasModifiers(); 243 } 244 245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 247 } 248 249 bool isRegOrImmWithInt16InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 251 } 252 253 bool isRegOrImmWithInt32InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 255 } 256 257 bool isRegOrImmWithInt64InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 259 } 260 261 bool isRegOrImmWithFP16InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 263 } 264 265 bool isRegOrImmWithFP32InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 267 } 268 269 bool isRegOrImmWithFP64InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 271 } 272 273 bool isVReg() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID) || 275 isRegClass(AMDGPU::VReg_64RegClassID) || 276 isRegClass(AMDGPU::VReg_96RegClassID) || 277 isRegClass(AMDGPU::VReg_128RegClassID) || 278 isRegClass(AMDGPU::VReg_256RegClassID) || 279 isRegClass(AMDGPU::VReg_512RegClassID); 280 } 281 282 bool isVReg32() const { 283 return isRegClass(AMDGPU::VGPR_32RegClassID); 284 } 285 286 bool isVReg32OrOff() const { 287 return isOff() || isVReg32(); 288 } 289 290 bool isSDWAOperand(MVT type) const; 291 bool isSDWAFP16Operand() const; 292 bool isSDWAFP32Operand() const; 293 bool isSDWAInt16Operand() const; 294 bool isSDWAInt32Operand() const; 295 296 bool isImmTy(ImmTy ImmT) const { 297 return isImm() && Imm.Type == ImmT; 298 } 299 300 bool isImmModifier() const { 301 return isImm() && Imm.Type != ImmTyNone; 302 } 303 304 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 305 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 306 bool isDMask() const { return isImmTy(ImmTyDMask); } 307 bool isDim() const { return isImmTy(ImmTyDim); } 308 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 309 bool isDA() const { return isImmTy(ImmTyDA); } 310 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 311 bool isLWE() const { return isImmTy(ImmTyLWE); } 312 bool isOff() const { return isImmTy(ImmTyOff); } 313 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 314 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 315 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 316 bool isOffen() const { return isImmTy(ImmTyOffen); } 317 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 318 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 319 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 320 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 321 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 322 323 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 324 bool isGDS() const { return isImmTy(ImmTyGDS); } 325 bool isLDS() const { return isImmTy(ImmTyLDS); } 326 bool isDLC() const { return isImmTy(ImmTyDLC); } 327 bool isGLC() const { return isImmTy(ImmTyGLC); } 328 bool isSLC() const { return isImmTy(ImmTySLC); } 329 bool isTFE() const { return isImmTy(ImmTyTFE); } 330 bool isD16() const { return isImmTy(ImmTyD16); } 331 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 332 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 333 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 334 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 335 bool isFI() const { return isImmTy(ImmTyDppFi); } 336 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 337 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 338 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 339 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 340 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 341 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 342 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 343 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 344 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 345 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 346 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 347 bool isHigh() const { return isImmTy(ImmTyHigh); } 348 349 bool isMod() const { 350 return isClampSI() || isOModSI(); 351 } 352 353 bool isRegOrImm() const { 354 return isReg() || isImm(); 355 } 356 357 bool isRegClass(unsigned RCID) const; 358 359 bool isInlineValue() const; 360 361 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 362 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 363 } 364 365 bool isSCSrcB16() const { 366 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 367 } 368 369 bool isSCSrcV2B16() const { 370 return isSCSrcB16(); 371 } 372 373 bool isSCSrcB32() const { 374 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 375 } 376 377 bool isSCSrcB64() const { 378 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 379 } 380 381 bool isBoolReg() const; 382 383 bool isSCSrcF16() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 385 } 386 387 bool isSCSrcV2F16() const { 388 return isSCSrcF16(); 389 } 390 391 bool isSCSrcF32() const { 392 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 393 } 394 395 bool isSCSrcF64() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 397 } 398 399 bool isSSrcB32() const { 400 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 401 } 402 403 bool isSSrcB16() const { 404 return isSCSrcB16() || isLiteralImm(MVT::i16); 405 } 406 407 bool isSSrcV2B16() const { 408 llvm_unreachable("cannot happen"); 409 return isSSrcB16(); 410 } 411 412 bool isSSrcB64() const { 413 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 414 // See isVSrc64(). 415 return isSCSrcB64() || isLiteralImm(MVT::i64); 416 } 417 418 bool isSSrcF32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 420 } 421 422 bool isSSrcF64() const { 423 return isSCSrcB64() || isLiteralImm(MVT::f64); 424 } 425 426 bool isSSrcF16() const { 427 return isSCSrcB16() || isLiteralImm(MVT::f16); 428 } 429 430 bool isSSrcV2F16() const { 431 llvm_unreachable("cannot happen"); 432 return isSSrcF16(); 433 } 434 435 bool isSSrcOrLdsB32() const { 436 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 437 isLiteralImm(MVT::i32) || isExpr(); 438 } 439 440 bool isVCSrcB32() const { 441 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 442 } 443 444 bool isVCSrcB64() const { 445 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 446 } 447 448 bool isVCSrcB16() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 450 } 451 452 bool isVCSrcV2B16() const { 453 return isVCSrcB16(); 454 } 455 456 bool isVCSrcF32() const { 457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 458 } 459 460 bool isVCSrcF64() const { 461 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 462 } 463 464 bool isVCSrcF16() const { 465 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 466 } 467 468 bool isVCSrcV2F16() const { 469 return isVCSrcF16(); 470 } 471 472 bool isVSrcB32() const { 473 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 474 } 475 476 bool isVSrcB64() const { 477 return isVCSrcF64() || isLiteralImm(MVT::i64); 478 } 479 480 bool isVSrcB16() const { 481 return isVCSrcF16() || isLiteralImm(MVT::i16); 482 } 483 484 bool isVSrcV2B16() const { 485 return isVSrcB16() || isLiteralImm(MVT::v2i16); 486 } 487 488 bool isVSrcF32() const { 489 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 490 } 491 492 bool isVSrcF64() const { 493 return isVCSrcF64() || isLiteralImm(MVT::f64); 494 } 495 496 bool isVSrcF16() const { 497 return isVCSrcF16() || isLiteralImm(MVT::f16); 498 } 499 500 bool isVSrcV2F16() const { 501 return isVSrcF16() || isLiteralImm(MVT::v2f16); 502 } 503 504 bool isVISrcB32() const { 505 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 506 } 507 508 bool isVISrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 510 } 511 512 bool isVISrcV2B16() const { 513 return isVISrcB16(); 514 } 515 516 bool isVISrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 518 } 519 520 bool isVISrcF16() const { 521 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 522 } 523 524 bool isVISrcV2F16() const { 525 return isVISrcF16() || isVISrcB32(); 526 } 527 528 bool isAISrcB32() const { 529 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 530 } 531 532 bool isAISrcB16() const { 533 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 534 } 535 536 bool isAISrcV2B16() const { 537 return isAISrcB16(); 538 } 539 540 bool isAISrcF32() const { 541 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 542 } 543 544 bool isAISrcF16() const { 545 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 546 } 547 548 bool isAISrcV2F16() const { 549 return isAISrcF16() || isAISrcB32(); 550 } 551 552 bool isAISrc_128B32() const { 553 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 554 } 555 556 bool isAISrc_128B16() const { 557 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 558 } 559 560 bool isAISrc_128V2B16() const { 561 return isAISrc_128B16(); 562 } 563 564 bool isAISrc_128F32() const { 565 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 566 } 567 568 bool isAISrc_128F16() const { 569 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 570 } 571 572 bool isAISrc_128V2F16() const { 573 return isAISrc_128F16() || isAISrc_128B32(); 574 } 575 576 bool isAISrc_512B32() const { 577 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 578 } 579 580 bool isAISrc_512B16() const { 581 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 582 } 583 584 bool isAISrc_512V2B16() const { 585 return isAISrc_512B16(); 586 } 587 588 bool isAISrc_512F32() const { 589 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 590 } 591 592 bool isAISrc_512F16() const { 593 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 594 } 595 596 bool isAISrc_512V2F16() const { 597 return isAISrc_512F16() || isAISrc_512B32(); 598 } 599 600 bool isAISrc_1024B32() const { 601 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 602 } 603 604 bool isAISrc_1024B16() const { 605 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 606 } 607 608 bool isAISrc_1024V2B16() const { 609 return isAISrc_1024B16(); 610 } 611 612 bool isAISrc_1024F32() const { 613 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 614 } 615 616 bool isAISrc_1024F16() const { 617 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 618 } 619 620 bool isAISrc_1024V2F16() const { 621 return isAISrc_1024F16() || isAISrc_1024B32(); 622 } 623 624 bool isKImmFP32() const { 625 return isLiteralImm(MVT::f32); 626 } 627 628 bool isKImmFP16() const { 629 return isLiteralImm(MVT::f16); 630 } 631 632 bool isMem() const override { 633 return false; 634 } 635 636 bool isExpr() const { 637 return Kind == Expression; 638 } 639 640 bool isSoppBrTarget() const { 641 return isExpr() || isImm(); 642 } 643 644 bool isSWaitCnt() const; 645 bool isHwreg() const; 646 bool isSendMsg() const; 647 bool isSwizzle() const; 648 bool isSMRDOffset8() const; 649 bool isSMRDOffset20() const; 650 bool isSMRDLiteralOffset() const; 651 bool isDPP8() const; 652 bool isDPPCtrl() const; 653 bool isBLGP() const; 654 bool isCBSZ() const; 655 bool isABID() const; 656 bool isGPRIdxMode() const; 657 bool isS16Imm() const; 658 bool isU16Imm() const; 659 bool isEndpgm() const; 660 661 StringRef getExpressionAsToken() const { 662 assert(isExpr()); 663 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 664 return S->getSymbol().getName(); 665 } 666 667 StringRef getToken() const { 668 assert(isToken()); 669 670 if (Kind == Expression) 671 return getExpressionAsToken(); 672 673 return StringRef(Tok.Data, Tok.Length); 674 } 675 676 int64_t getImm() const { 677 assert(isImm()); 678 return Imm.Val; 679 } 680 681 ImmTy getImmTy() const { 682 assert(isImm()); 683 return Imm.Type; 684 } 685 686 unsigned getReg() const override { 687 assert(isRegKind()); 688 return Reg.RegNo; 689 } 690 691 SMLoc getStartLoc() const override { 692 return StartLoc; 693 } 694 695 SMLoc getEndLoc() const override { 696 return EndLoc; 697 } 698 699 SMRange getLocRange() const { 700 return SMRange(StartLoc, EndLoc); 701 } 702 703 Modifiers getModifiers() const { 704 assert(isRegKind() || isImmTy(ImmTyNone)); 705 return isRegKind() ? Reg.Mods : Imm.Mods; 706 } 707 708 void setModifiers(Modifiers Mods) { 709 assert(isRegKind() || isImmTy(ImmTyNone)); 710 if (isRegKind()) 711 Reg.Mods = Mods; 712 else 713 Imm.Mods = Mods; 714 } 715 716 bool hasModifiers() const { 717 return getModifiers().hasModifiers(); 718 } 719 720 bool hasFPModifiers() const { 721 return getModifiers().hasFPModifiers(); 722 } 723 724 bool hasIntModifiers() const { 725 return getModifiers().hasIntModifiers(); 726 } 727 728 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 729 730 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 731 732 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 733 734 template <unsigned Bitwidth> 735 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 736 737 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 738 addKImmFPOperands<16>(Inst, N); 739 } 740 741 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 742 addKImmFPOperands<32>(Inst, N); 743 } 744 745 void addRegOperands(MCInst &Inst, unsigned N) const; 746 747 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 748 addRegOperands(Inst, N); 749 } 750 751 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 752 if (isRegKind()) 753 addRegOperands(Inst, N); 754 else if (isExpr()) 755 Inst.addOperand(MCOperand::createExpr(Expr)); 756 else 757 addImmOperands(Inst, N); 758 } 759 760 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 761 Modifiers Mods = getModifiers(); 762 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 763 if (isRegKind()) { 764 addRegOperands(Inst, N); 765 } else { 766 addImmOperands(Inst, N, false); 767 } 768 } 769 770 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 771 assert(!hasIntModifiers()); 772 addRegOrImmWithInputModsOperands(Inst, N); 773 } 774 775 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 776 assert(!hasFPModifiers()); 777 addRegOrImmWithInputModsOperands(Inst, N); 778 } 779 780 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 781 Modifiers Mods = getModifiers(); 782 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 783 assert(isRegKind()); 784 addRegOperands(Inst, N); 785 } 786 787 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 788 assert(!hasIntModifiers()); 789 addRegWithInputModsOperands(Inst, N); 790 } 791 792 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 793 assert(!hasFPModifiers()); 794 addRegWithInputModsOperands(Inst, N); 795 } 796 797 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 798 if (isImm()) 799 addImmOperands(Inst, N); 800 else { 801 assert(isExpr()); 802 Inst.addOperand(MCOperand::createExpr(Expr)); 803 } 804 } 805 806 static void printImmTy(raw_ostream& OS, ImmTy Type) { 807 switch (Type) { 808 case ImmTyNone: OS << "None"; break; 809 case ImmTyGDS: OS << "GDS"; break; 810 case ImmTyLDS: OS << "LDS"; break; 811 case ImmTyOffen: OS << "Offen"; break; 812 case ImmTyIdxen: OS << "Idxen"; break; 813 case ImmTyAddr64: OS << "Addr64"; break; 814 case ImmTyOffset: OS << "Offset"; break; 815 case ImmTyInstOffset: OS << "InstOffset"; break; 816 case ImmTyOffset0: OS << "Offset0"; break; 817 case ImmTyOffset1: OS << "Offset1"; break; 818 case ImmTyDLC: OS << "DLC"; break; 819 case ImmTyGLC: OS << "GLC"; break; 820 case ImmTySLC: OS << "SLC"; break; 821 case ImmTyTFE: OS << "TFE"; break; 822 case ImmTyD16: OS << "D16"; break; 823 case ImmTyFORMAT: OS << "FORMAT"; break; 824 case ImmTyClampSI: OS << "ClampSI"; break; 825 case ImmTyOModSI: OS << "OModSI"; break; 826 case ImmTyDPP8: OS << "DPP8"; break; 827 case ImmTyDppCtrl: OS << "DppCtrl"; break; 828 case ImmTyDppRowMask: OS << "DppRowMask"; break; 829 case ImmTyDppBankMask: OS << "DppBankMask"; break; 830 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 831 case ImmTyDppFi: OS << "FI"; break; 832 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 833 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 834 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 835 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 836 case ImmTyDMask: OS << "DMask"; break; 837 case ImmTyDim: OS << "Dim"; break; 838 case ImmTyUNorm: OS << "UNorm"; break; 839 case ImmTyDA: OS << "DA"; break; 840 case ImmTyR128A16: OS << "R128A16"; break; 841 case ImmTyLWE: OS << "LWE"; break; 842 case ImmTyOff: OS << "Off"; break; 843 case ImmTyExpTgt: OS << "ExpTgt"; break; 844 case ImmTyExpCompr: OS << "ExpCompr"; break; 845 case ImmTyExpVM: OS << "ExpVM"; break; 846 case ImmTyHwreg: OS << "Hwreg"; break; 847 case ImmTySendMsg: OS << "SendMsg"; break; 848 case ImmTyInterpSlot: OS << "InterpSlot"; break; 849 case ImmTyInterpAttr: OS << "InterpAttr"; break; 850 case ImmTyAttrChan: OS << "AttrChan"; break; 851 case ImmTyOpSel: OS << "OpSel"; break; 852 case ImmTyOpSelHi: OS << "OpSelHi"; break; 853 case ImmTyNegLo: OS << "NegLo"; break; 854 case ImmTyNegHi: OS << "NegHi"; break; 855 case ImmTySwizzle: OS << "Swizzle"; break; 856 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 857 case ImmTyHigh: OS << "High"; break; 858 case ImmTyBLGP: OS << "BLGP"; break; 859 case ImmTyCBSZ: OS << "CBSZ"; break; 860 case ImmTyABID: OS << "ABID"; break; 861 case ImmTyEndpgm: OS << "Endpgm"; break; 862 } 863 } 864 865 void print(raw_ostream &OS) const override { 866 switch (Kind) { 867 case Register: 868 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 869 break; 870 case Immediate: 871 OS << '<' << getImm(); 872 if (getImmTy() != ImmTyNone) { 873 OS << " type: "; printImmTy(OS, getImmTy()); 874 } 875 OS << " mods: " << Imm.Mods << '>'; 876 break; 877 case Token: 878 OS << '\'' << getToken() << '\''; 879 break; 880 case Expression: 881 OS << "<expr " << *Expr << '>'; 882 break; 883 } 884 } 885 886 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 887 int64_t Val, SMLoc Loc, 888 ImmTy Type = ImmTyNone, 889 bool IsFPImm = false) { 890 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 891 Op->Imm.Val = Val; 892 Op->Imm.IsFPImm = IsFPImm; 893 Op->Imm.Type = Type; 894 Op->Imm.Mods = Modifiers(); 895 Op->StartLoc = Loc; 896 Op->EndLoc = Loc; 897 return Op; 898 } 899 900 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 901 StringRef Str, SMLoc Loc, 902 bool HasExplicitEncodingSize = true) { 903 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 904 Res->Tok.Data = Str.data(); 905 Res->Tok.Length = Str.size(); 906 Res->StartLoc = Loc; 907 Res->EndLoc = Loc; 908 return Res; 909 } 910 911 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 912 unsigned RegNo, SMLoc S, 913 SMLoc E) { 914 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 915 Op->Reg.RegNo = RegNo; 916 Op->Reg.Mods = Modifiers(); 917 Op->StartLoc = S; 918 Op->EndLoc = E; 919 return Op; 920 } 921 922 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 923 const class MCExpr *Expr, SMLoc S) { 924 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 925 Op->Expr = Expr; 926 Op->StartLoc = S; 927 Op->EndLoc = S; 928 return Op; 929 } 930 }; 931 932 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 933 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 934 return OS; 935 } 936 937 //===----------------------------------------------------------------------===// 938 // AsmParser 939 //===----------------------------------------------------------------------===// 940 941 // Holds info related to the current kernel, e.g. count of SGPRs used. 942 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 943 // .amdgpu_hsa_kernel or at EOF. 944 class KernelScopeInfo { 945 int SgprIndexUnusedMin = -1; 946 int VgprIndexUnusedMin = -1; 947 MCContext *Ctx = nullptr; 948 949 void usesSgprAt(int i) { 950 if (i >= SgprIndexUnusedMin) { 951 SgprIndexUnusedMin = ++i; 952 if (Ctx) { 953 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 954 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 955 } 956 } 957 } 958 959 void usesVgprAt(int i) { 960 if (i >= VgprIndexUnusedMin) { 961 VgprIndexUnusedMin = ++i; 962 if (Ctx) { 963 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 964 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 965 } 966 } 967 } 968 969 public: 970 KernelScopeInfo() = default; 971 972 void initialize(MCContext &Context) { 973 Ctx = &Context; 974 usesSgprAt(SgprIndexUnusedMin = -1); 975 usesVgprAt(VgprIndexUnusedMin = -1); 976 } 977 978 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 979 switch (RegKind) { 980 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 981 case IS_AGPR: // fall through 982 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 983 default: break; 984 } 985 } 986 }; 987 988 class AMDGPUAsmParser : public MCTargetAsmParser { 989 MCAsmParser &Parser; 990 991 // Number of extra operands parsed after the first optional operand. 992 // This may be necessary to skip hardcoded mandatory operands. 993 static const unsigned MAX_OPR_LOOKAHEAD = 8; 994 995 unsigned ForcedEncodingSize = 0; 996 bool ForcedDPP = false; 997 bool ForcedSDWA = false; 998 KernelScopeInfo KernelScope; 999 1000 /// @name Auto-generated Match Functions 1001 /// { 1002 1003 #define GET_ASSEMBLER_HEADER 1004 #include "AMDGPUGenAsmMatcher.inc" 1005 1006 /// } 1007 1008 private: 1009 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1010 bool OutOfRangeError(SMRange Range); 1011 /// Calculate VGPR/SGPR blocks required for given target, reserved 1012 /// registers, and user-specified NextFreeXGPR values. 1013 /// 1014 /// \param Features [in] Target features, used for bug corrections. 1015 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1016 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1017 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1018 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1019 /// descriptor field, if valid. 1020 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1021 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1022 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1023 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1024 /// \param VGPRBlocks [out] Result VGPR block count. 1025 /// \param SGPRBlocks [out] Result SGPR block count. 1026 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1027 bool FlatScrUsed, bool XNACKUsed, 1028 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1029 SMRange VGPRRange, unsigned NextFreeSGPR, 1030 SMRange SGPRRange, unsigned &VGPRBlocks, 1031 unsigned &SGPRBlocks); 1032 bool ParseDirectiveAMDGCNTarget(); 1033 bool ParseDirectiveAMDHSAKernel(); 1034 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1035 bool ParseDirectiveHSACodeObjectVersion(); 1036 bool ParseDirectiveHSACodeObjectISA(); 1037 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1038 bool ParseDirectiveAMDKernelCodeT(); 1039 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1040 bool ParseDirectiveAMDGPUHsaKernel(); 1041 1042 bool ParseDirectiveISAVersion(); 1043 bool ParseDirectiveHSAMetadata(); 1044 bool ParseDirectivePALMetadataBegin(); 1045 bool ParseDirectivePALMetadata(); 1046 bool ParseDirectiveAMDGPULDS(); 1047 1048 /// Common code to parse out a block of text (typically YAML) between start and 1049 /// end directives. 1050 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1051 const char *AssemblerDirectiveEnd, 1052 std::string &CollectString); 1053 1054 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1055 RegisterKind RegKind, unsigned Reg1, 1056 unsigned RegNum); 1057 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1058 unsigned& RegNum, unsigned& RegWidth, 1059 unsigned *DwordRegIndex); 1060 bool isRegister(); 1061 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1062 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1063 void initializeGprCountSymbol(RegisterKind RegKind); 1064 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1065 unsigned RegWidth); 1066 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1067 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1068 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1069 bool IsGdsHardcoded); 1070 1071 public: 1072 enum AMDGPUMatchResultTy { 1073 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1074 }; 1075 enum OperandMode { 1076 OperandMode_Default, 1077 OperandMode_NSA, 1078 }; 1079 1080 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1081 1082 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1083 const MCInstrInfo &MII, 1084 const MCTargetOptions &Options) 1085 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1086 MCAsmParserExtension::Initialize(Parser); 1087 1088 if (getFeatureBits().none()) { 1089 // Set default features. 1090 copySTI().ToggleFeature("southern-islands"); 1091 } 1092 1093 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1094 1095 { 1096 // TODO: make those pre-defined variables read-only. 1097 // Currently there is none suitable machinery in the core llvm-mc for this. 1098 // MCSymbol::isRedefinable is intended for another purpose, and 1099 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1100 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1101 MCContext &Ctx = getContext(); 1102 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1103 MCSymbol *Sym = 1104 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1105 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1106 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1110 } else { 1111 MCSymbol *Sym = 1112 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1113 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1114 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1118 } 1119 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1120 initializeGprCountSymbol(IS_VGPR); 1121 initializeGprCountSymbol(IS_SGPR); 1122 } else 1123 KernelScope.initialize(getContext()); 1124 } 1125 } 1126 1127 bool hasXNACK() const { 1128 return AMDGPU::hasXNACK(getSTI()); 1129 } 1130 1131 bool hasMIMG_R128() const { 1132 return AMDGPU::hasMIMG_R128(getSTI()); 1133 } 1134 1135 bool hasPackedD16() const { 1136 return AMDGPU::hasPackedD16(getSTI()); 1137 } 1138 1139 bool isSI() const { 1140 return AMDGPU::isSI(getSTI()); 1141 } 1142 1143 bool isCI() const { 1144 return AMDGPU::isCI(getSTI()); 1145 } 1146 1147 bool isVI() const { 1148 return AMDGPU::isVI(getSTI()); 1149 } 1150 1151 bool isGFX9() const { 1152 return AMDGPU::isGFX9(getSTI()); 1153 } 1154 1155 bool isGFX10() const { 1156 return AMDGPU::isGFX10(getSTI()); 1157 } 1158 1159 bool hasInv2PiInlineImm() const { 1160 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1161 } 1162 1163 bool hasFlatOffsets() const { 1164 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1165 } 1166 1167 bool hasSGPR102_SGPR103() const { 1168 return !isVI() && !isGFX9(); 1169 } 1170 1171 bool hasSGPR104_SGPR105() const { 1172 return isGFX10(); 1173 } 1174 1175 bool hasIntClamp() const { 1176 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1177 } 1178 1179 AMDGPUTargetStreamer &getTargetStreamer() { 1180 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1181 return static_cast<AMDGPUTargetStreamer &>(TS); 1182 } 1183 1184 const MCRegisterInfo *getMRI() const { 1185 // We need this const_cast because for some reason getContext() is not const 1186 // in MCAsmParser. 1187 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1188 } 1189 1190 const MCInstrInfo *getMII() const { 1191 return &MII; 1192 } 1193 1194 const FeatureBitset &getFeatureBits() const { 1195 return getSTI().getFeatureBits(); 1196 } 1197 1198 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1199 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1200 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1201 1202 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1203 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1204 bool isForcedDPP() const { return ForcedDPP; } 1205 bool isForcedSDWA() const { return ForcedSDWA; } 1206 ArrayRef<unsigned> getMatchedVariants() const; 1207 1208 std::unique_ptr<AMDGPUOperand> parseRegister(); 1209 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1210 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1211 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1212 unsigned Kind) override; 1213 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1214 OperandVector &Operands, MCStreamer &Out, 1215 uint64_t &ErrorInfo, 1216 bool MatchingInlineAsm) override; 1217 bool ParseDirective(AsmToken DirectiveID) override; 1218 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1219 OperandMode Mode = OperandMode_Default); 1220 StringRef parseMnemonicSuffix(StringRef Name); 1221 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1222 SMLoc NameLoc, OperandVector &Operands) override; 1223 //bool ProcessInstruction(MCInst &Inst); 1224 1225 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1226 1227 OperandMatchResultTy 1228 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1229 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1230 bool (*ConvertResult)(int64_t &) = nullptr); 1231 1232 OperandMatchResultTy 1233 parseOperandArrayWithPrefix(const char *Prefix, 1234 OperandVector &Operands, 1235 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1236 bool (*ConvertResult)(int64_t&) = nullptr); 1237 1238 OperandMatchResultTy 1239 parseNamedBit(const char *Name, OperandVector &Operands, 1240 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1241 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1242 StringRef &Value); 1243 1244 bool isModifier(); 1245 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1246 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1247 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1248 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1249 bool parseSP3NegModifier(); 1250 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1251 OperandMatchResultTy parseReg(OperandVector &Operands); 1252 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1253 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1254 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1255 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1256 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1257 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1258 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1259 1260 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1261 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1262 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1263 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1264 1265 bool parseCnt(int64_t &IntVal); 1266 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1267 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1268 1269 private: 1270 struct OperandInfoTy { 1271 int64_t Id; 1272 bool IsSymbolic = false; 1273 bool IsDefined = false; 1274 1275 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1276 }; 1277 1278 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1279 bool validateSendMsg(const OperandInfoTy &Msg, 1280 const OperandInfoTy &Op, 1281 const OperandInfoTy &Stream, 1282 const SMLoc Loc); 1283 1284 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1285 bool validateHwreg(const OperandInfoTy &HwReg, 1286 const int64_t Offset, 1287 const int64_t Width, 1288 const SMLoc Loc); 1289 1290 void errorExpTgt(); 1291 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1292 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1293 1294 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1295 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1296 bool validateSOPLiteral(const MCInst &Inst) const; 1297 bool validateConstantBusLimitations(const MCInst &Inst); 1298 bool validateEarlyClobberLimitations(const MCInst &Inst); 1299 bool validateIntClampSupported(const MCInst &Inst); 1300 bool validateMIMGAtomicDMask(const MCInst &Inst); 1301 bool validateMIMGGatherDMask(const MCInst &Inst); 1302 bool validateMIMGDataSize(const MCInst &Inst); 1303 bool validateMIMGAddrSize(const MCInst &Inst); 1304 bool validateMIMGD16(const MCInst &Inst); 1305 bool validateMIMGDim(const MCInst &Inst); 1306 bool validateLdsDirect(const MCInst &Inst); 1307 bool validateOpSel(const MCInst &Inst); 1308 bool validateVccOperand(unsigned Reg) const; 1309 bool validateVOP3Literal(const MCInst &Inst) const; 1310 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1311 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1312 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1313 1314 bool isId(const StringRef Id) const; 1315 bool isId(const AsmToken &Token, const StringRef Id) const; 1316 bool isToken(const AsmToken::TokenKind Kind) const; 1317 bool trySkipId(const StringRef Id); 1318 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1319 bool trySkipToken(const AsmToken::TokenKind Kind); 1320 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1321 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1322 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1323 AsmToken::TokenKind getTokenKind() const; 1324 bool parseExpr(int64_t &Imm); 1325 bool parseExpr(OperandVector &Operands); 1326 StringRef getTokenStr() const; 1327 AsmToken peekToken(); 1328 AsmToken getToken() const; 1329 SMLoc getLoc() const; 1330 void lex(); 1331 1332 public: 1333 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1334 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1335 1336 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1337 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1338 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1339 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1340 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1341 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1342 1343 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1344 const unsigned MinVal, 1345 const unsigned MaxVal, 1346 const StringRef ErrMsg); 1347 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1348 bool parseSwizzleOffset(int64_t &Imm); 1349 bool parseSwizzleMacro(int64_t &Imm); 1350 bool parseSwizzleQuadPerm(int64_t &Imm); 1351 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1352 bool parseSwizzleBroadcast(int64_t &Imm); 1353 bool parseSwizzleSwap(int64_t &Imm); 1354 bool parseSwizzleReverse(int64_t &Imm); 1355 1356 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1357 int64_t parseGPRIdxMacro(); 1358 1359 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1360 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1361 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1362 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1363 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1364 1365 AMDGPUOperand::Ptr defaultDLC() const; 1366 AMDGPUOperand::Ptr defaultGLC() const; 1367 AMDGPUOperand::Ptr defaultSLC() const; 1368 1369 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1370 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1371 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1372 AMDGPUOperand::Ptr defaultFlatOffset() const; 1373 1374 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1375 1376 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1377 OptionalImmIndexMap &OptionalIdx); 1378 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1379 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1380 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1381 1382 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1383 1384 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1385 bool IsAtomic = false); 1386 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1387 1388 OperandMatchResultTy parseDim(OperandVector &Operands); 1389 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1390 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1391 AMDGPUOperand::Ptr defaultRowMask() const; 1392 AMDGPUOperand::Ptr defaultBankMask() const; 1393 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1394 AMDGPUOperand::Ptr defaultFI() const; 1395 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1396 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1397 1398 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1399 AMDGPUOperand::ImmTy Type); 1400 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1401 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1402 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1403 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1404 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1405 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1406 uint64_t BasicInstType, bool skipVcc = false); 1407 1408 AMDGPUOperand::Ptr defaultBLGP() const; 1409 AMDGPUOperand::Ptr defaultCBSZ() const; 1410 AMDGPUOperand::Ptr defaultABID() const; 1411 1412 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1413 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1414 }; 1415 1416 struct OptionalOperand { 1417 const char *Name; 1418 AMDGPUOperand::ImmTy Type; 1419 bool IsBit; 1420 bool (*ConvertResult)(int64_t&); 1421 }; 1422 1423 } // end anonymous namespace 1424 1425 // May be called with integer type with equivalent bitwidth. 1426 static const fltSemantics *getFltSemantics(unsigned Size) { 1427 switch (Size) { 1428 case 4: 1429 return &APFloat::IEEEsingle(); 1430 case 8: 1431 return &APFloat::IEEEdouble(); 1432 case 2: 1433 return &APFloat::IEEEhalf(); 1434 default: 1435 llvm_unreachable("unsupported fp type"); 1436 } 1437 } 1438 1439 static const fltSemantics *getFltSemantics(MVT VT) { 1440 return getFltSemantics(VT.getSizeInBits() / 8); 1441 } 1442 1443 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1444 switch (OperandType) { 1445 case AMDGPU::OPERAND_REG_IMM_INT32: 1446 case AMDGPU::OPERAND_REG_IMM_FP32: 1447 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1448 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1449 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1450 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1451 return &APFloat::IEEEsingle(); 1452 case AMDGPU::OPERAND_REG_IMM_INT64: 1453 case AMDGPU::OPERAND_REG_IMM_FP64: 1454 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1455 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1456 return &APFloat::IEEEdouble(); 1457 case AMDGPU::OPERAND_REG_IMM_INT16: 1458 case AMDGPU::OPERAND_REG_IMM_FP16: 1459 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1460 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1461 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1462 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1463 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1464 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1465 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1466 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1467 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1468 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1469 return &APFloat::IEEEhalf(); 1470 default: 1471 llvm_unreachable("unsupported fp type"); 1472 } 1473 } 1474 1475 //===----------------------------------------------------------------------===// 1476 // Operand 1477 //===----------------------------------------------------------------------===// 1478 1479 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1480 bool Lost; 1481 1482 // Convert literal to single precision 1483 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1484 APFloat::rmNearestTiesToEven, 1485 &Lost); 1486 // We allow precision lost but not overflow or underflow 1487 if (Status != APFloat::opOK && 1488 Lost && 1489 ((Status & APFloat::opOverflow) != 0 || 1490 (Status & APFloat::opUnderflow) != 0)) { 1491 return false; 1492 } 1493 1494 return true; 1495 } 1496 1497 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1498 return isUIntN(Size, Val) || isIntN(Size, Val); 1499 } 1500 1501 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1502 1503 // This is a hack to enable named inline values like 1504 // shared_base with both 32-bit and 64-bit operands. 1505 // Note that these values are defined as 1506 // 32-bit operands only. 1507 if (isInlineValue()) { 1508 return true; 1509 } 1510 1511 if (!isImmTy(ImmTyNone)) { 1512 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1513 return false; 1514 } 1515 // TODO: We should avoid using host float here. It would be better to 1516 // check the float bit values which is what a few other places do. 1517 // We've had bot failures before due to weird NaN support on mips hosts. 1518 1519 APInt Literal(64, Imm.Val); 1520 1521 if (Imm.IsFPImm) { // We got fp literal token 1522 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1523 return AMDGPU::isInlinableLiteral64(Imm.Val, 1524 AsmParser->hasInv2PiInlineImm()); 1525 } 1526 1527 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1528 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1529 return false; 1530 1531 if (type.getScalarSizeInBits() == 16) { 1532 return AMDGPU::isInlinableLiteral16( 1533 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1534 AsmParser->hasInv2PiInlineImm()); 1535 } 1536 1537 // Check if single precision literal is inlinable 1538 return AMDGPU::isInlinableLiteral32( 1539 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1540 AsmParser->hasInv2PiInlineImm()); 1541 } 1542 1543 // We got int literal token. 1544 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1545 return AMDGPU::isInlinableLiteral64(Imm.Val, 1546 AsmParser->hasInv2PiInlineImm()); 1547 } 1548 1549 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1550 return false; 1551 } 1552 1553 if (type.getScalarSizeInBits() == 16) { 1554 return AMDGPU::isInlinableLiteral16( 1555 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1556 AsmParser->hasInv2PiInlineImm()); 1557 } 1558 1559 return AMDGPU::isInlinableLiteral32( 1560 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1561 AsmParser->hasInv2PiInlineImm()); 1562 } 1563 1564 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1565 // Check that this immediate can be added as literal 1566 if (!isImmTy(ImmTyNone)) { 1567 return false; 1568 } 1569 1570 if (!Imm.IsFPImm) { 1571 // We got int literal token. 1572 1573 if (type == MVT::f64 && hasFPModifiers()) { 1574 // Cannot apply fp modifiers to int literals preserving the same semantics 1575 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1576 // disable these cases. 1577 return false; 1578 } 1579 1580 unsigned Size = type.getSizeInBits(); 1581 if (Size == 64) 1582 Size = 32; 1583 1584 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1585 // types. 1586 return isSafeTruncation(Imm.Val, Size); 1587 } 1588 1589 // We got fp literal token 1590 if (type == MVT::f64) { // Expected 64-bit fp operand 1591 // We would set low 64-bits of literal to zeroes but we accept this literals 1592 return true; 1593 } 1594 1595 if (type == MVT::i64) { // Expected 64-bit int operand 1596 // We don't allow fp literals in 64-bit integer instructions. It is 1597 // unclear how we should encode them. 1598 return false; 1599 } 1600 1601 // We allow fp literals with f16x2 operands assuming that the specified 1602 // literal goes into the lower half and the upper half is zero. We also 1603 // require that the literal may be losslesly converted to f16. 1604 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1605 (type == MVT::v2i16)? MVT::i16 : type; 1606 1607 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1608 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1609 } 1610 1611 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1612 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1613 } 1614 1615 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1616 if (AsmParser->isVI()) 1617 return isVReg32(); 1618 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1619 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1620 else 1621 return false; 1622 } 1623 1624 bool AMDGPUOperand::isSDWAFP16Operand() const { 1625 return isSDWAOperand(MVT::f16); 1626 } 1627 1628 bool AMDGPUOperand::isSDWAFP32Operand() const { 1629 return isSDWAOperand(MVT::f32); 1630 } 1631 1632 bool AMDGPUOperand::isSDWAInt16Operand() const { 1633 return isSDWAOperand(MVT::i16); 1634 } 1635 1636 bool AMDGPUOperand::isSDWAInt32Operand() const { 1637 return isSDWAOperand(MVT::i32); 1638 } 1639 1640 bool AMDGPUOperand::isBoolReg() const { 1641 return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? 1642 isSCSrcB64() : isSCSrcB32(); 1643 } 1644 1645 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1646 { 1647 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1648 assert(Size == 2 || Size == 4 || Size == 8); 1649 1650 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1651 1652 if (Imm.Mods.Abs) { 1653 Val &= ~FpSignMask; 1654 } 1655 if (Imm.Mods.Neg) { 1656 Val ^= FpSignMask; 1657 } 1658 1659 return Val; 1660 } 1661 1662 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1663 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1664 Inst.getNumOperands())) { 1665 addLiteralImmOperand(Inst, Imm.Val, 1666 ApplyModifiers & 1667 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1668 } else { 1669 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1670 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1671 } 1672 } 1673 1674 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1675 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1676 auto OpNum = Inst.getNumOperands(); 1677 // Check that this operand accepts literals 1678 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1679 1680 if (ApplyModifiers) { 1681 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1682 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1683 Val = applyInputFPModifiers(Val, Size); 1684 } 1685 1686 APInt Literal(64, Val); 1687 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1688 1689 if (Imm.IsFPImm) { // We got fp literal token 1690 switch (OpTy) { 1691 case AMDGPU::OPERAND_REG_IMM_INT64: 1692 case AMDGPU::OPERAND_REG_IMM_FP64: 1693 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1694 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1695 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1696 AsmParser->hasInv2PiInlineImm())) { 1697 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1698 return; 1699 } 1700 1701 // Non-inlineable 1702 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1703 // For fp operands we check if low 32 bits are zeros 1704 if (Literal.getLoBits(32) != 0) { 1705 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1706 "Can't encode literal as exact 64-bit floating-point operand. " 1707 "Low 32-bits will be set to zero"); 1708 } 1709 1710 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1711 return; 1712 } 1713 1714 // We don't allow fp literals in 64-bit integer instructions. It is 1715 // unclear how we should encode them. This case should be checked earlier 1716 // in predicate methods (isLiteralImm()) 1717 llvm_unreachable("fp literal in 64-bit integer instruction."); 1718 1719 case AMDGPU::OPERAND_REG_IMM_INT32: 1720 case AMDGPU::OPERAND_REG_IMM_FP32: 1721 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1722 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1723 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1724 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1725 case AMDGPU::OPERAND_REG_IMM_INT16: 1726 case AMDGPU::OPERAND_REG_IMM_FP16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1735 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1736 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1737 bool lost; 1738 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1739 // Convert literal to single precision 1740 FPLiteral.convert(*getOpFltSemantics(OpTy), 1741 APFloat::rmNearestTiesToEven, &lost); 1742 // We allow precision lost but not overflow or underflow. This should be 1743 // checked earlier in isLiteralImm() 1744 1745 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1746 Inst.addOperand(MCOperand::createImm(ImmVal)); 1747 return; 1748 } 1749 default: 1750 llvm_unreachable("invalid operand size"); 1751 } 1752 1753 return; 1754 } 1755 1756 // We got int literal token. 1757 // Only sign extend inline immediates. 1758 switch (OpTy) { 1759 case AMDGPU::OPERAND_REG_IMM_INT32: 1760 case AMDGPU::OPERAND_REG_IMM_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1763 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1764 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1765 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1766 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1767 if (isSafeTruncation(Val, 32) && 1768 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1769 AsmParser->hasInv2PiInlineImm())) { 1770 Inst.addOperand(MCOperand::createImm(Val)); 1771 return; 1772 } 1773 1774 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1775 return; 1776 1777 case AMDGPU::OPERAND_REG_IMM_INT64: 1778 case AMDGPU::OPERAND_REG_IMM_FP64: 1779 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1780 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1781 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1782 Inst.addOperand(MCOperand::createImm(Val)); 1783 return; 1784 } 1785 1786 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1787 return; 1788 1789 case AMDGPU::OPERAND_REG_IMM_INT16: 1790 case AMDGPU::OPERAND_REG_IMM_FP16: 1791 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1792 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1793 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1794 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1795 if (isSafeTruncation(Val, 16) && 1796 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1797 AsmParser->hasInv2PiInlineImm())) { 1798 Inst.addOperand(MCOperand::createImm(Val)); 1799 return; 1800 } 1801 1802 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1803 return; 1804 1805 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1806 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1809 assert(isSafeTruncation(Val, 16)); 1810 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1811 AsmParser->hasInv2PiInlineImm())); 1812 1813 Inst.addOperand(MCOperand::createImm(Val)); 1814 return; 1815 } 1816 default: 1817 llvm_unreachable("invalid operand size"); 1818 } 1819 } 1820 1821 template <unsigned Bitwidth> 1822 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1823 APInt Literal(64, Imm.Val); 1824 1825 if (!Imm.IsFPImm) { 1826 // We got int literal token. 1827 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1828 return; 1829 } 1830 1831 bool Lost; 1832 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1833 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1834 APFloat::rmNearestTiesToEven, &Lost); 1835 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1836 } 1837 1838 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1839 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1840 } 1841 1842 static bool isInlineValue(unsigned Reg) { 1843 switch (Reg) { 1844 case AMDGPU::SRC_SHARED_BASE: 1845 case AMDGPU::SRC_SHARED_LIMIT: 1846 case AMDGPU::SRC_PRIVATE_BASE: 1847 case AMDGPU::SRC_PRIVATE_LIMIT: 1848 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1849 return true; 1850 case AMDGPU::SRC_VCCZ: 1851 case AMDGPU::SRC_EXECZ: 1852 case AMDGPU::SRC_SCC: 1853 return true; 1854 default: 1855 return false; 1856 } 1857 } 1858 1859 bool AMDGPUOperand::isInlineValue() const { 1860 return isRegKind() && ::isInlineValue(getReg()); 1861 } 1862 1863 //===----------------------------------------------------------------------===// 1864 // AsmParser 1865 //===----------------------------------------------------------------------===// 1866 1867 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1868 if (Is == IS_VGPR) { 1869 switch (RegWidth) { 1870 default: return -1; 1871 case 1: return AMDGPU::VGPR_32RegClassID; 1872 case 2: return AMDGPU::VReg_64RegClassID; 1873 case 3: return AMDGPU::VReg_96RegClassID; 1874 case 4: return AMDGPU::VReg_128RegClassID; 1875 case 8: return AMDGPU::VReg_256RegClassID; 1876 case 16: return AMDGPU::VReg_512RegClassID; 1877 } 1878 } else if (Is == IS_TTMP) { 1879 switch (RegWidth) { 1880 default: return -1; 1881 case 1: return AMDGPU::TTMP_32RegClassID; 1882 case 2: return AMDGPU::TTMP_64RegClassID; 1883 case 4: return AMDGPU::TTMP_128RegClassID; 1884 case 8: return AMDGPU::TTMP_256RegClassID; 1885 case 16: return AMDGPU::TTMP_512RegClassID; 1886 } 1887 } else if (Is == IS_SGPR) { 1888 switch (RegWidth) { 1889 default: return -1; 1890 case 1: return AMDGPU::SGPR_32RegClassID; 1891 case 2: return AMDGPU::SGPR_64RegClassID; 1892 case 4: return AMDGPU::SGPR_128RegClassID; 1893 case 8: return AMDGPU::SGPR_256RegClassID; 1894 case 16: return AMDGPU::SGPR_512RegClassID; 1895 } 1896 } else if (Is == IS_AGPR) { 1897 switch (RegWidth) { 1898 default: return -1; 1899 case 1: return AMDGPU::AGPR_32RegClassID; 1900 case 2: return AMDGPU::AReg_64RegClassID; 1901 case 4: return AMDGPU::AReg_128RegClassID; 1902 case 16: return AMDGPU::AReg_512RegClassID; 1903 case 32: return AMDGPU::AReg_1024RegClassID; 1904 } 1905 } 1906 return -1; 1907 } 1908 1909 static unsigned getSpecialRegForName(StringRef RegName) { 1910 return StringSwitch<unsigned>(RegName) 1911 .Case("exec", AMDGPU::EXEC) 1912 .Case("vcc", AMDGPU::VCC) 1913 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1914 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1915 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1916 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1917 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1918 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1919 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1920 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1921 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1922 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1923 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1924 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1925 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1926 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1927 .Case("m0", AMDGPU::M0) 1928 .Case("vccz", AMDGPU::SRC_VCCZ) 1929 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1930 .Case("execz", AMDGPU::SRC_EXECZ) 1931 .Case("src_execz", AMDGPU::SRC_EXECZ) 1932 .Case("scc", AMDGPU::SRC_SCC) 1933 .Case("src_scc", AMDGPU::SRC_SCC) 1934 .Case("tba", AMDGPU::TBA) 1935 .Case("tma", AMDGPU::TMA) 1936 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1937 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1938 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1939 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1940 .Case("vcc_lo", AMDGPU::VCC_LO) 1941 .Case("vcc_hi", AMDGPU::VCC_HI) 1942 .Case("exec_lo", AMDGPU::EXEC_LO) 1943 .Case("exec_hi", AMDGPU::EXEC_HI) 1944 .Case("tma_lo", AMDGPU::TMA_LO) 1945 .Case("tma_hi", AMDGPU::TMA_HI) 1946 .Case("tba_lo", AMDGPU::TBA_LO) 1947 .Case("tba_hi", AMDGPU::TBA_HI) 1948 .Case("null", AMDGPU::SGPR_NULL) 1949 .Default(0); 1950 } 1951 1952 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1953 SMLoc &EndLoc) { 1954 auto R = parseRegister(); 1955 if (!R) return true; 1956 assert(R->isReg()); 1957 RegNo = R->getReg(); 1958 StartLoc = R->getStartLoc(); 1959 EndLoc = R->getEndLoc(); 1960 return false; 1961 } 1962 1963 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1964 RegisterKind RegKind, unsigned Reg1, 1965 unsigned RegNum) { 1966 switch (RegKind) { 1967 case IS_SPECIAL: 1968 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1969 Reg = AMDGPU::EXEC; 1970 RegWidth = 2; 1971 return true; 1972 } 1973 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1974 Reg = AMDGPU::FLAT_SCR; 1975 RegWidth = 2; 1976 return true; 1977 } 1978 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1979 Reg = AMDGPU::XNACK_MASK; 1980 RegWidth = 2; 1981 return true; 1982 } 1983 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1984 Reg = AMDGPU::VCC; 1985 RegWidth = 2; 1986 return true; 1987 } 1988 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1989 Reg = AMDGPU::TBA; 1990 RegWidth = 2; 1991 return true; 1992 } 1993 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1994 Reg = AMDGPU::TMA; 1995 RegWidth = 2; 1996 return true; 1997 } 1998 return false; 1999 case IS_VGPR: 2000 case IS_SGPR: 2001 case IS_AGPR: 2002 case IS_TTMP: 2003 if (Reg1 != Reg + RegWidth) { 2004 return false; 2005 } 2006 RegWidth++; 2007 return true; 2008 default: 2009 llvm_unreachable("unexpected register kind"); 2010 } 2011 } 2012 2013 static const StringRef Registers[] = { 2014 { "v" }, 2015 { "s" }, 2016 { "ttmp" }, 2017 { "acc" }, 2018 { "a" }, 2019 }; 2020 2021 bool 2022 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2023 const AsmToken &NextToken) const { 2024 2025 // A list of consecutive registers: [s0,s1,s2,s3] 2026 if (Token.is(AsmToken::LBrac)) 2027 return true; 2028 2029 if (!Token.is(AsmToken::Identifier)) 2030 return false; 2031 2032 // A single register like s0 or a range of registers like s[0:1] 2033 2034 StringRef RegName = Token.getString(); 2035 2036 for (StringRef Reg : Registers) { 2037 if (RegName.startswith(Reg)) { 2038 if (Reg.size() < RegName.size()) { 2039 unsigned RegNum; 2040 // A single register with an index: rXX 2041 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 2042 return true; 2043 } else { 2044 // A range of registers: r[XX:YY]. 2045 if (NextToken.is(AsmToken::LBrac)) 2046 return true; 2047 } 2048 } 2049 } 2050 2051 return getSpecialRegForName(RegName); 2052 } 2053 2054 bool 2055 AMDGPUAsmParser::isRegister() 2056 { 2057 return isRegister(getToken(), peekToken()); 2058 } 2059 2060 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2061 unsigned &RegNum, unsigned &RegWidth, 2062 unsigned *DwordRegIndex) { 2063 if (DwordRegIndex) { *DwordRegIndex = 0; } 2064 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2065 if (getLexer().is(AsmToken::Identifier)) { 2066 StringRef RegName = Parser.getTok().getString(); 2067 if ((Reg = getSpecialRegForName(RegName))) { 2068 Parser.Lex(); 2069 RegKind = IS_SPECIAL; 2070 } else { 2071 unsigned RegNumIndex = 0; 2072 if (RegName[0] == 'v') { 2073 RegNumIndex = 1; 2074 RegKind = IS_VGPR; 2075 } else if (RegName[0] == 's') { 2076 RegNumIndex = 1; 2077 RegKind = IS_SGPR; 2078 } else if (RegName[0] == 'a') { 2079 RegNumIndex = RegName.startswith("acc") ? 3 : 1; 2080 RegKind = IS_AGPR; 2081 } else if (RegName.startswith("ttmp")) { 2082 RegNumIndex = strlen("ttmp"); 2083 RegKind = IS_TTMP; 2084 } else { 2085 return false; 2086 } 2087 if (RegName.size() > RegNumIndex) { 2088 // Single 32-bit register: vXX. 2089 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 2090 return false; 2091 Parser.Lex(); 2092 RegWidth = 1; 2093 } else { 2094 // Range of registers: v[XX:YY]. ":YY" is optional. 2095 Parser.Lex(); 2096 int64_t RegLo, RegHi; 2097 if (getLexer().isNot(AsmToken::LBrac)) 2098 return false; 2099 Parser.Lex(); 2100 2101 if (getParser().parseAbsoluteExpression(RegLo)) 2102 return false; 2103 2104 const bool isRBrace = getLexer().is(AsmToken::RBrac); 2105 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 2106 return false; 2107 Parser.Lex(); 2108 2109 if (isRBrace) { 2110 RegHi = RegLo; 2111 } else { 2112 if (getParser().parseAbsoluteExpression(RegHi)) 2113 return false; 2114 2115 if (getLexer().isNot(AsmToken::RBrac)) 2116 return false; 2117 Parser.Lex(); 2118 } 2119 RegNum = (unsigned) RegLo; 2120 RegWidth = (RegHi - RegLo) + 1; 2121 } 2122 } 2123 } else if (getLexer().is(AsmToken::LBrac)) { 2124 // List of consecutive registers: [s0,s1,s2,s3] 2125 Parser.Lex(); 2126 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 2127 return false; 2128 if (RegWidth != 1) 2129 return false; 2130 RegisterKind RegKind1; 2131 unsigned Reg1, RegNum1, RegWidth1; 2132 do { 2133 if (getLexer().is(AsmToken::Comma)) { 2134 Parser.Lex(); 2135 } else if (getLexer().is(AsmToken::RBrac)) { 2136 Parser.Lex(); 2137 break; 2138 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 2139 if (RegWidth1 != 1) { 2140 return false; 2141 } 2142 if (RegKind1 != RegKind) { 2143 return false; 2144 } 2145 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 2146 return false; 2147 } 2148 } else { 2149 return false; 2150 } 2151 } while (true); 2152 } else { 2153 return false; 2154 } 2155 switch (RegKind) { 2156 case IS_SPECIAL: 2157 RegNum = 0; 2158 RegWidth = 1; 2159 break; 2160 case IS_VGPR: 2161 case IS_SGPR: 2162 case IS_AGPR: 2163 case IS_TTMP: 2164 { 2165 unsigned Size = 1; 2166 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2167 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2168 Size = std::min(RegWidth, 4u); 2169 } 2170 if (RegNum % Size != 0) 2171 return false; 2172 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2173 RegNum = RegNum / Size; 2174 int RCID = getRegClass(RegKind, RegWidth); 2175 if (RCID == -1) 2176 return false; 2177 const MCRegisterClass RC = TRI->getRegClass(RCID); 2178 if (RegNum >= RC.getNumRegs()) 2179 return false; 2180 Reg = RC.getRegister(RegNum); 2181 break; 2182 } 2183 2184 default: 2185 llvm_unreachable("unexpected register kind"); 2186 } 2187 2188 if (!subtargetHasRegister(*TRI, Reg)) 2189 return false; 2190 return true; 2191 } 2192 2193 Optional<StringRef> 2194 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2195 switch (RegKind) { 2196 case IS_VGPR: 2197 return StringRef(".amdgcn.next_free_vgpr"); 2198 case IS_SGPR: 2199 return StringRef(".amdgcn.next_free_sgpr"); 2200 default: 2201 return None; 2202 } 2203 } 2204 2205 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2206 auto SymbolName = getGprCountSymbolName(RegKind); 2207 assert(SymbolName && "initializing invalid register kind"); 2208 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2209 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2210 } 2211 2212 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2213 unsigned DwordRegIndex, 2214 unsigned RegWidth) { 2215 // Symbols are only defined for GCN targets 2216 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2217 return true; 2218 2219 auto SymbolName = getGprCountSymbolName(RegKind); 2220 if (!SymbolName) 2221 return true; 2222 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2223 2224 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2225 int64_t OldCount; 2226 2227 if (!Sym->isVariable()) 2228 return !Error(getParser().getTok().getLoc(), 2229 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2230 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2231 return !Error( 2232 getParser().getTok().getLoc(), 2233 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2234 2235 if (OldCount <= NewMax) 2236 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2237 2238 return true; 2239 } 2240 2241 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2242 const auto &Tok = Parser.getTok(); 2243 SMLoc StartLoc = Tok.getLoc(); 2244 SMLoc EndLoc = Tok.getEndLoc(); 2245 RegisterKind RegKind; 2246 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2247 2248 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2249 //FIXME: improve error messages (bug 41303). 2250 Error(StartLoc, "not a valid operand."); 2251 return nullptr; 2252 } 2253 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2254 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2255 return nullptr; 2256 } else 2257 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2258 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2259 } 2260 2261 OperandMatchResultTy 2262 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2263 // TODO: add syntactic sugar for 1/(2*PI) 2264 2265 assert(!isRegister()); 2266 assert(!isModifier()); 2267 2268 const auto& Tok = getToken(); 2269 const auto& NextTok = peekToken(); 2270 bool IsReal = Tok.is(AsmToken::Real); 2271 SMLoc S = getLoc(); 2272 bool Negate = false; 2273 2274 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2275 lex(); 2276 IsReal = true; 2277 Negate = true; 2278 } 2279 2280 if (IsReal) { 2281 // Floating-point expressions are not supported. 2282 // Can only allow floating-point literals with an 2283 // optional sign. 2284 2285 StringRef Num = getTokenStr(); 2286 lex(); 2287 2288 APFloat RealVal(APFloat::IEEEdouble()); 2289 auto roundMode = APFloat::rmNearestTiesToEven; 2290 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2291 return MatchOperand_ParseFail; 2292 } 2293 if (Negate) 2294 RealVal.changeSign(); 2295 2296 Operands.push_back( 2297 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2298 AMDGPUOperand::ImmTyNone, true)); 2299 2300 return MatchOperand_Success; 2301 2302 } else { 2303 int64_t IntVal; 2304 const MCExpr *Expr; 2305 SMLoc S = getLoc(); 2306 2307 if (HasSP3AbsModifier) { 2308 // This is a workaround for handling expressions 2309 // as arguments of SP3 'abs' modifier, for example: 2310 // |1.0| 2311 // |-1| 2312 // |1+x| 2313 // This syntax is not compatible with syntax of standard 2314 // MC expressions (due to the trailing '|'). 2315 SMLoc EndLoc; 2316 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2317 return MatchOperand_ParseFail; 2318 } else { 2319 if (Parser.parseExpression(Expr)) 2320 return MatchOperand_ParseFail; 2321 } 2322 2323 if (Expr->evaluateAsAbsolute(IntVal)) { 2324 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2325 } else { 2326 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2327 } 2328 2329 return MatchOperand_Success; 2330 } 2331 2332 return MatchOperand_NoMatch; 2333 } 2334 2335 OperandMatchResultTy 2336 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2337 if (!isRegister()) 2338 return MatchOperand_NoMatch; 2339 2340 if (auto R = parseRegister()) { 2341 assert(R->isReg()); 2342 Operands.push_back(std::move(R)); 2343 return MatchOperand_Success; 2344 } 2345 return MatchOperand_ParseFail; 2346 } 2347 2348 OperandMatchResultTy 2349 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2350 auto res = parseReg(Operands); 2351 if (res != MatchOperand_NoMatch) { 2352 return res; 2353 } else if (isModifier()) { 2354 return MatchOperand_NoMatch; 2355 } else { 2356 return parseImm(Operands, HasSP3AbsMod); 2357 } 2358 } 2359 2360 bool 2361 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2362 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2363 const auto &str = Token.getString(); 2364 return str == "abs" || str == "neg" || str == "sext"; 2365 } 2366 return false; 2367 } 2368 2369 bool 2370 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2371 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2372 } 2373 2374 bool 2375 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2376 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2377 } 2378 2379 bool 2380 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2381 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2382 } 2383 2384 // Check if this is an operand modifier or an opcode modifier 2385 // which may look like an expression but it is not. We should 2386 // avoid parsing these modifiers as expressions. Currently 2387 // recognized sequences are: 2388 // |...| 2389 // abs(...) 2390 // neg(...) 2391 // sext(...) 2392 // -reg 2393 // -|...| 2394 // -abs(...) 2395 // name:... 2396 // Note that simple opcode modifiers like 'gds' may be parsed as 2397 // expressions; this is a special case. See getExpressionAsToken. 2398 // 2399 bool 2400 AMDGPUAsmParser::isModifier() { 2401 2402 AsmToken Tok = getToken(); 2403 AsmToken NextToken[2]; 2404 peekTokens(NextToken); 2405 2406 return isOperandModifier(Tok, NextToken[0]) || 2407 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2408 isOpcodeModifierWithVal(Tok, NextToken[0]); 2409 } 2410 2411 // Check if the current token is an SP3 'neg' modifier. 2412 // Currently this modifier is allowed in the following context: 2413 // 2414 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2415 // 2. Before an 'abs' modifier: -abs(...) 2416 // 3. Before an SP3 'abs' modifier: -|...| 2417 // 2418 // In all other cases "-" is handled as a part 2419 // of an expression that follows the sign. 2420 // 2421 // Note: When "-" is followed by an integer literal, 2422 // this is interpreted as integer negation rather 2423 // than a floating-point NEG modifier applied to N. 2424 // Beside being contr-intuitive, such use of floating-point 2425 // NEG modifier would have resulted in different meaning 2426 // of integer literals used with VOP1/2/C and VOP3, 2427 // for example: 2428 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2429 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2430 // Negative fp literals with preceding "-" are 2431 // handled likewise for unifomtity 2432 // 2433 bool 2434 AMDGPUAsmParser::parseSP3NegModifier() { 2435 2436 AsmToken NextToken[2]; 2437 peekTokens(NextToken); 2438 2439 if (isToken(AsmToken::Minus) && 2440 (isRegister(NextToken[0], NextToken[1]) || 2441 NextToken[0].is(AsmToken::Pipe) || 2442 isId(NextToken[0], "abs"))) { 2443 lex(); 2444 return true; 2445 } 2446 2447 return false; 2448 } 2449 2450 OperandMatchResultTy 2451 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2452 bool AllowImm) { 2453 bool Neg, SP3Neg; 2454 bool Abs, SP3Abs; 2455 SMLoc Loc; 2456 2457 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2458 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2459 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2460 return MatchOperand_ParseFail; 2461 } 2462 2463 SP3Neg = parseSP3NegModifier(); 2464 2465 Loc = getLoc(); 2466 Neg = trySkipId("neg"); 2467 if (Neg && SP3Neg) { 2468 Error(Loc, "expected register or immediate"); 2469 return MatchOperand_ParseFail; 2470 } 2471 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2472 return MatchOperand_ParseFail; 2473 2474 Abs = trySkipId("abs"); 2475 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2476 return MatchOperand_ParseFail; 2477 2478 Loc = getLoc(); 2479 SP3Abs = trySkipToken(AsmToken::Pipe); 2480 if (Abs && SP3Abs) { 2481 Error(Loc, "expected register or immediate"); 2482 return MatchOperand_ParseFail; 2483 } 2484 2485 OperandMatchResultTy Res; 2486 if (AllowImm) { 2487 Res = parseRegOrImm(Operands, SP3Abs); 2488 } else { 2489 Res = parseReg(Operands); 2490 } 2491 if (Res != MatchOperand_Success) { 2492 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2493 } 2494 2495 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2496 return MatchOperand_ParseFail; 2497 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2498 return MatchOperand_ParseFail; 2499 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2500 return MatchOperand_ParseFail; 2501 2502 AMDGPUOperand::Modifiers Mods; 2503 Mods.Abs = Abs || SP3Abs; 2504 Mods.Neg = Neg || SP3Neg; 2505 2506 if (Mods.hasFPModifiers()) { 2507 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2508 if (Op.isExpr()) { 2509 Error(Op.getStartLoc(), "expected an absolute expression"); 2510 return MatchOperand_ParseFail; 2511 } 2512 Op.setModifiers(Mods); 2513 } 2514 return MatchOperand_Success; 2515 } 2516 2517 OperandMatchResultTy 2518 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2519 bool AllowImm) { 2520 bool Sext = trySkipId("sext"); 2521 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2522 return MatchOperand_ParseFail; 2523 2524 OperandMatchResultTy Res; 2525 if (AllowImm) { 2526 Res = parseRegOrImm(Operands); 2527 } else { 2528 Res = parseReg(Operands); 2529 } 2530 if (Res != MatchOperand_Success) { 2531 return Sext? MatchOperand_ParseFail : Res; 2532 } 2533 2534 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2535 return MatchOperand_ParseFail; 2536 2537 AMDGPUOperand::Modifiers Mods; 2538 Mods.Sext = Sext; 2539 2540 if (Mods.hasIntModifiers()) { 2541 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2542 if (Op.isExpr()) { 2543 Error(Op.getStartLoc(), "expected an absolute expression"); 2544 return MatchOperand_ParseFail; 2545 } 2546 Op.setModifiers(Mods); 2547 } 2548 2549 return MatchOperand_Success; 2550 } 2551 2552 OperandMatchResultTy 2553 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2554 return parseRegOrImmWithFPInputMods(Operands, false); 2555 } 2556 2557 OperandMatchResultTy 2558 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2559 return parseRegOrImmWithIntInputMods(Operands, false); 2560 } 2561 2562 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2563 auto Loc = getLoc(); 2564 if (trySkipId("off")) { 2565 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2566 AMDGPUOperand::ImmTyOff, false)); 2567 return MatchOperand_Success; 2568 } 2569 2570 if (!isRegister()) 2571 return MatchOperand_NoMatch; 2572 2573 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2574 if (Reg) { 2575 Operands.push_back(std::move(Reg)); 2576 return MatchOperand_Success; 2577 } 2578 2579 return MatchOperand_ParseFail; 2580 2581 } 2582 2583 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2584 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2585 2586 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2587 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2588 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2589 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2590 return Match_InvalidOperand; 2591 2592 if ((TSFlags & SIInstrFlags::VOP3) && 2593 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2594 getForcedEncodingSize() != 64) 2595 return Match_PreferE32; 2596 2597 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2598 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2599 // v_mac_f32/16 allow only dst_sel == DWORD; 2600 auto OpNum = 2601 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2602 const auto &Op = Inst.getOperand(OpNum); 2603 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2604 return Match_InvalidOperand; 2605 } 2606 } 2607 2608 return Match_Success; 2609 } 2610 2611 // What asm variants we should check 2612 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2613 if (getForcedEncodingSize() == 32) { 2614 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2615 return makeArrayRef(Variants); 2616 } 2617 2618 if (isForcedVOP3()) { 2619 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2620 return makeArrayRef(Variants); 2621 } 2622 2623 if (isForcedSDWA()) { 2624 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2625 AMDGPUAsmVariants::SDWA9}; 2626 return makeArrayRef(Variants); 2627 } 2628 2629 if (isForcedDPP()) { 2630 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2631 return makeArrayRef(Variants); 2632 } 2633 2634 static const unsigned Variants[] = { 2635 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2636 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2637 }; 2638 2639 return makeArrayRef(Variants); 2640 } 2641 2642 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2643 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2644 const unsigned Num = Desc.getNumImplicitUses(); 2645 for (unsigned i = 0; i < Num; ++i) { 2646 unsigned Reg = Desc.ImplicitUses[i]; 2647 switch (Reg) { 2648 case AMDGPU::FLAT_SCR: 2649 case AMDGPU::VCC: 2650 case AMDGPU::VCC_LO: 2651 case AMDGPU::VCC_HI: 2652 case AMDGPU::M0: 2653 case AMDGPU::SGPR_NULL: 2654 return Reg; 2655 default: 2656 break; 2657 } 2658 } 2659 return AMDGPU::NoRegister; 2660 } 2661 2662 // NB: This code is correct only when used to check constant 2663 // bus limitations because GFX7 support no f16 inline constants. 2664 // Note that there are no cases when a GFX7 opcode violates 2665 // constant bus limitations due to the use of an f16 constant. 2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2667 unsigned OpIdx) const { 2668 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2669 2670 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2671 return false; 2672 } 2673 2674 const MCOperand &MO = Inst.getOperand(OpIdx); 2675 2676 int64_t Val = MO.getImm(); 2677 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2678 2679 switch (OpSize) { // expected operand size 2680 case 8: 2681 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2682 case 4: 2683 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2684 case 2: { 2685 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2686 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2687 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2688 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2689 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2690 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2691 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2692 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2693 } else { 2694 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2695 } 2696 } 2697 default: 2698 llvm_unreachable("invalid operand size"); 2699 } 2700 } 2701 2702 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2703 const MCOperand &MO = Inst.getOperand(OpIdx); 2704 if (MO.isImm()) { 2705 return !isInlineConstant(Inst, OpIdx); 2706 } 2707 return !MO.isReg() || 2708 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2709 } 2710 2711 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2712 const unsigned Opcode = Inst.getOpcode(); 2713 const MCInstrDesc &Desc = MII.get(Opcode); 2714 unsigned ConstantBusUseCount = 0; 2715 unsigned NumLiterals = 0; 2716 unsigned LiteralSize; 2717 2718 if (Desc.TSFlags & 2719 (SIInstrFlags::VOPC | 2720 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2721 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2722 SIInstrFlags::SDWA)) { 2723 // Check special imm operands (used by madmk, etc) 2724 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2725 ++ConstantBusUseCount; 2726 } 2727 2728 SmallDenseSet<unsigned> SGPRsUsed; 2729 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2730 if (SGPRUsed != AMDGPU::NoRegister) { 2731 SGPRsUsed.insert(SGPRUsed); 2732 ++ConstantBusUseCount; 2733 } 2734 2735 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2736 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2737 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2738 2739 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2740 2741 for (int OpIdx : OpIndices) { 2742 if (OpIdx == -1) break; 2743 2744 const MCOperand &MO = Inst.getOperand(OpIdx); 2745 if (usesConstantBus(Inst, OpIdx)) { 2746 if (MO.isReg()) { 2747 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2748 // Pairs of registers with a partial intersections like these 2749 // s0, s[0:1] 2750 // flat_scratch_lo, flat_scratch 2751 // flat_scratch_lo, flat_scratch_hi 2752 // are theoretically valid but they are disabled anyway. 2753 // Note that this code mimics SIInstrInfo::verifyInstruction 2754 if (!SGPRsUsed.count(Reg)) { 2755 SGPRsUsed.insert(Reg); 2756 ++ConstantBusUseCount; 2757 } 2758 } else { // Expression or a literal 2759 2760 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2761 continue; // special operand like VINTERP attr_chan 2762 2763 // An instruction may use only one literal. 2764 // This has been validated on the previous step. 2765 // See validateVOP3Literal. 2766 // This literal may be used as more than one operand. 2767 // If all these operands are of the same size, 2768 // this literal counts as one scalar value. 2769 // Otherwise it counts as 2 scalar values. 2770 // See "GFX10 Shader Programming", section 3.6.2.3. 2771 2772 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2773 if (Size < 4) Size = 4; 2774 2775 if (NumLiterals == 0) { 2776 NumLiterals = 1; 2777 LiteralSize = Size; 2778 } else if (LiteralSize != Size) { 2779 NumLiterals = 2; 2780 } 2781 } 2782 } 2783 } 2784 } 2785 ConstantBusUseCount += NumLiterals; 2786 2787 if (isGFX10()) 2788 return ConstantBusUseCount <= 2; 2789 2790 return ConstantBusUseCount <= 1; 2791 } 2792 2793 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2794 const unsigned Opcode = Inst.getOpcode(); 2795 const MCInstrDesc &Desc = MII.get(Opcode); 2796 2797 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2798 if (DstIdx == -1 || 2799 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2800 return true; 2801 } 2802 2803 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2804 2805 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2806 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2807 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2808 2809 assert(DstIdx != -1); 2810 const MCOperand &Dst = Inst.getOperand(DstIdx); 2811 assert(Dst.isReg()); 2812 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2813 2814 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2815 2816 for (int SrcIdx : SrcIndices) { 2817 if (SrcIdx == -1) break; 2818 const MCOperand &Src = Inst.getOperand(SrcIdx); 2819 if (Src.isReg()) { 2820 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2821 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2822 return false; 2823 } 2824 } 2825 } 2826 2827 return true; 2828 } 2829 2830 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2831 2832 const unsigned Opc = Inst.getOpcode(); 2833 const MCInstrDesc &Desc = MII.get(Opc); 2834 2835 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2836 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2837 assert(ClampIdx != -1); 2838 return Inst.getOperand(ClampIdx).getImm() == 0; 2839 } 2840 2841 return true; 2842 } 2843 2844 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2845 2846 const unsigned Opc = Inst.getOpcode(); 2847 const MCInstrDesc &Desc = MII.get(Opc); 2848 2849 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2850 return true; 2851 2852 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2853 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2854 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2855 2856 assert(VDataIdx != -1); 2857 assert(DMaskIdx != -1); 2858 assert(TFEIdx != -1); 2859 2860 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2861 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2862 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2863 if (DMask == 0) 2864 DMask = 1; 2865 2866 unsigned DataSize = 2867 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2868 if (hasPackedD16()) { 2869 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2870 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2871 DataSize = (DataSize + 1) / 2; 2872 } 2873 2874 return (VDataSize / 4) == DataSize + TFESize; 2875 } 2876 2877 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2878 const unsigned Opc = Inst.getOpcode(); 2879 const MCInstrDesc &Desc = MII.get(Opc); 2880 2881 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2882 return true; 2883 2884 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2885 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2886 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2887 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2888 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2889 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2890 2891 assert(VAddr0Idx != -1); 2892 assert(SrsrcIdx != -1); 2893 assert(DimIdx != -1); 2894 assert(SrsrcIdx > VAddr0Idx); 2895 2896 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2897 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2898 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2899 unsigned VAddrSize = 2900 IsNSA ? SrsrcIdx - VAddr0Idx 2901 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2902 2903 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2904 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2905 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2906 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2907 if (!IsNSA) { 2908 if (AddrSize > 8) 2909 AddrSize = 16; 2910 else if (AddrSize > 4) 2911 AddrSize = 8; 2912 } 2913 2914 return VAddrSize == AddrSize; 2915 } 2916 2917 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2918 2919 const unsigned Opc = Inst.getOpcode(); 2920 const MCInstrDesc &Desc = MII.get(Opc); 2921 2922 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2923 return true; 2924 if (!Desc.mayLoad() || !Desc.mayStore()) 2925 return true; // Not atomic 2926 2927 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2928 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2929 2930 // This is an incomplete check because image_atomic_cmpswap 2931 // may only use 0x3 and 0xf while other atomic operations 2932 // may use 0x1 and 0x3. However these limitations are 2933 // verified when we check that dmask matches dst size. 2934 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2935 } 2936 2937 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2938 2939 const unsigned Opc = Inst.getOpcode(); 2940 const MCInstrDesc &Desc = MII.get(Opc); 2941 2942 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2943 return true; 2944 2945 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2946 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2947 2948 // GATHER4 instructions use dmask in a different fashion compared to 2949 // other MIMG instructions. The only useful DMASK values are 2950 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2951 // (red,red,red,red) etc.) The ISA document doesn't mention 2952 // this. 2953 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2954 } 2955 2956 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2957 2958 const unsigned Opc = Inst.getOpcode(); 2959 const MCInstrDesc &Desc = MII.get(Opc); 2960 2961 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2962 return true; 2963 2964 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2965 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2966 if (isCI() || isSI()) 2967 return false; 2968 } 2969 2970 return true; 2971 } 2972 2973 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2974 const unsigned Opc = Inst.getOpcode(); 2975 const MCInstrDesc &Desc = MII.get(Opc); 2976 2977 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2978 return true; 2979 2980 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2981 if (DimIdx < 0) 2982 return true; 2983 2984 long Imm = Inst.getOperand(DimIdx).getImm(); 2985 if (Imm < 0 || Imm >= 8) 2986 return false; 2987 2988 return true; 2989 } 2990 2991 static bool IsRevOpcode(const unsigned Opcode) 2992 { 2993 switch (Opcode) { 2994 case AMDGPU::V_SUBREV_F32_e32: 2995 case AMDGPU::V_SUBREV_F32_e64: 2996 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2997 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2998 case AMDGPU::V_SUBREV_F32_e32_vi: 2999 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3000 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3001 case AMDGPU::V_SUBREV_F32_e64_vi: 3002 3003 case AMDGPU::V_SUBREV_I32_e32: 3004 case AMDGPU::V_SUBREV_I32_e64: 3005 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3006 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3007 3008 case AMDGPU::V_SUBBREV_U32_e32: 3009 case AMDGPU::V_SUBBREV_U32_e64: 3010 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3011 case AMDGPU::V_SUBBREV_U32_e32_vi: 3012 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3013 case AMDGPU::V_SUBBREV_U32_e64_vi: 3014 3015 case AMDGPU::V_SUBREV_U32_e32: 3016 case AMDGPU::V_SUBREV_U32_e64: 3017 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3018 case AMDGPU::V_SUBREV_U32_e32_vi: 3019 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3020 case AMDGPU::V_SUBREV_U32_e64_vi: 3021 3022 case AMDGPU::V_SUBREV_F16_e32: 3023 case AMDGPU::V_SUBREV_F16_e64: 3024 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3025 case AMDGPU::V_SUBREV_F16_e32_vi: 3026 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3027 case AMDGPU::V_SUBREV_F16_e64_vi: 3028 3029 case AMDGPU::V_SUBREV_U16_e32: 3030 case AMDGPU::V_SUBREV_U16_e64: 3031 case AMDGPU::V_SUBREV_U16_e32_vi: 3032 case AMDGPU::V_SUBREV_U16_e64_vi: 3033 3034 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3035 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3036 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3037 3038 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3039 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3040 3041 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3042 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3043 3044 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3045 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3046 3047 case AMDGPU::V_LSHRREV_B32_e32: 3048 case AMDGPU::V_LSHRREV_B32_e64: 3049 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3050 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3051 case AMDGPU::V_LSHRREV_B32_e32_vi: 3052 case AMDGPU::V_LSHRREV_B32_e64_vi: 3053 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3054 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3055 3056 case AMDGPU::V_ASHRREV_I32_e32: 3057 case AMDGPU::V_ASHRREV_I32_e64: 3058 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3059 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3060 case AMDGPU::V_ASHRREV_I32_e32_vi: 3061 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3062 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3063 case AMDGPU::V_ASHRREV_I32_e64_vi: 3064 3065 case AMDGPU::V_LSHLREV_B32_e32: 3066 case AMDGPU::V_LSHLREV_B32_e64: 3067 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3068 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3069 case AMDGPU::V_LSHLREV_B32_e32_vi: 3070 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3071 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3072 case AMDGPU::V_LSHLREV_B32_e64_vi: 3073 3074 case AMDGPU::V_LSHLREV_B16_e32: 3075 case AMDGPU::V_LSHLREV_B16_e64: 3076 case AMDGPU::V_LSHLREV_B16_e32_vi: 3077 case AMDGPU::V_LSHLREV_B16_e64_vi: 3078 case AMDGPU::V_LSHLREV_B16_gfx10: 3079 3080 case AMDGPU::V_LSHRREV_B16_e32: 3081 case AMDGPU::V_LSHRREV_B16_e64: 3082 case AMDGPU::V_LSHRREV_B16_e32_vi: 3083 case AMDGPU::V_LSHRREV_B16_e64_vi: 3084 case AMDGPU::V_LSHRREV_B16_gfx10: 3085 3086 case AMDGPU::V_ASHRREV_I16_e32: 3087 case AMDGPU::V_ASHRREV_I16_e64: 3088 case AMDGPU::V_ASHRREV_I16_e32_vi: 3089 case AMDGPU::V_ASHRREV_I16_e64_vi: 3090 case AMDGPU::V_ASHRREV_I16_gfx10: 3091 3092 case AMDGPU::V_LSHLREV_B64: 3093 case AMDGPU::V_LSHLREV_B64_gfx10: 3094 case AMDGPU::V_LSHLREV_B64_vi: 3095 3096 case AMDGPU::V_LSHRREV_B64: 3097 case AMDGPU::V_LSHRREV_B64_gfx10: 3098 case AMDGPU::V_LSHRREV_B64_vi: 3099 3100 case AMDGPU::V_ASHRREV_I64: 3101 case AMDGPU::V_ASHRREV_I64_gfx10: 3102 case AMDGPU::V_ASHRREV_I64_vi: 3103 3104 case AMDGPU::V_PK_LSHLREV_B16: 3105 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3106 case AMDGPU::V_PK_LSHLREV_B16_vi: 3107 3108 case AMDGPU::V_PK_LSHRREV_B16: 3109 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3110 case AMDGPU::V_PK_LSHRREV_B16_vi: 3111 case AMDGPU::V_PK_ASHRREV_I16: 3112 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3113 case AMDGPU::V_PK_ASHRREV_I16_vi: 3114 return true; 3115 default: 3116 return false; 3117 } 3118 } 3119 3120 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3121 3122 using namespace SIInstrFlags; 3123 const unsigned Opcode = Inst.getOpcode(); 3124 const MCInstrDesc &Desc = MII.get(Opcode); 3125 3126 // lds_direct register is defined so that it can be used 3127 // with 9-bit operands only. Ignore encodings which do not accept these. 3128 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3129 return true; 3130 3131 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3132 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3133 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3134 3135 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3136 3137 // lds_direct cannot be specified as either src1 or src2. 3138 for (int SrcIdx : SrcIndices) { 3139 if (SrcIdx == -1) break; 3140 const MCOperand &Src = Inst.getOperand(SrcIdx); 3141 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3142 return false; 3143 } 3144 } 3145 3146 if (Src0Idx == -1) 3147 return true; 3148 3149 const MCOperand &Src = Inst.getOperand(Src0Idx); 3150 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3151 return true; 3152 3153 // lds_direct is specified as src0. Check additional limitations. 3154 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3155 } 3156 3157 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3158 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3159 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3160 if (Op.isFlatOffset()) 3161 return Op.getStartLoc(); 3162 } 3163 return getLoc(); 3164 } 3165 3166 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3167 const OperandVector &Operands) { 3168 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3169 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3170 return true; 3171 3172 auto Opcode = Inst.getOpcode(); 3173 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3174 assert(OpNum != -1); 3175 3176 const auto &Op = Inst.getOperand(OpNum); 3177 if (!hasFlatOffsets() && Op.getImm() != 0) { 3178 Error(getFlatOffsetLoc(Operands), 3179 "flat offset modifier is not supported on this GPU"); 3180 return false; 3181 } 3182 3183 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3184 // For FLAT segment the offset must be positive; 3185 // MSB is ignored and forced to zero. 3186 unsigned OffsetSize = isGFX9() ? 13 : 12; 3187 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3188 if (!isIntN(OffsetSize, Op.getImm())) { 3189 Error(getFlatOffsetLoc(Operands), 3190 isGFX9() ? "expected a 13-bit signed offset" : 3191 "expected a 12-bit signed offset"); 3192 return false; 3193 } 3194 } else { 3195 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3196 Error(getFlatOffsetLoc(Operands), 3197 isGFX9() ? "expected a 12-bit unsigned offset" : 3198 "expected an 11-bit unsigned offset"); 3199 return false; 3200 } 3201 } 3202 3203 return true; 3204 } 3205 3206 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3207 unsigned Opcode = Inst.getOpcode(); 3208 const MCInstrDesc &Desc = MII.get(Opcode); 3209 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3210 return true; 3211 3212 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3213 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3214 3215 const int OpIndices[] = { Src0Idx, Src1Idx }; 3216 3217 unsigned NumLiterals = 0; 3218 uint32_t LiteralValue; 3219 3220 for (int OpIdx : OpIndices) { 3221 if (OpIdx == -1) break; 3222 3223 const MCOperand &MO = Inst.getOperand(OpIdx); 3224 if (MO.isImm() && 3225 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3226 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3227 !isInlineConstant(Inst, OpIdx)) { 3228 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3229 if (NumLiterals == 0 || LiteralValue != Value) { 3230 LiteralValue = Value; 3231 ++NumLiterals; 3232 } 3233 } 3234 } 3235 3236 return NumLiterals <= 1; 3237 } 3238 3239 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3240 const unsigned Opc = Inst.getOpcode(); 3241 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3242 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3243 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3244 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3245 3246 if (OpSel & ~3) 3247 return false; 3248 } 3249 return true; 3250 } 3251 3252 // Check if VCC register matches wavefront size 3253 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3254 auto FB = getFeatureBits(); 3255 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3256 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3257 } 3258 3259 // VOP3 literal is only allowed in GFX10+ and only one can be used 3260 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3261 unsigned Opcode = Inst.getOpcode(); 3262 const MCInstrDesc &Desc = MII.get(Opcode); 3263 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3264 return true; 3265 3266 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3267 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3268 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3269 3270 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3271 3272 unsigned NumLiterals = 0; 3273 uint32_t LiteralValue; 3274 3275 for (int OpIdx : OpIndices) { 3276 if (OpIdx == -1) break; 3277 3278 const MCOperand &MO = Inst.getOperand(OpIdx); 3279 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3280 continue; 3281 3282 if (!isInlineConstant(Inst, OpIdx)) { 3283 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3284 if (NumLiterals == 0 || LiteralValue != Value) { 3285 LiteralValue = Value; 3286 ++NumLiterals; 3287 } 3288 } 3289 } 3290 3291 return !NumLiterals || 3292 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3293 } 3294 3295 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3296 const SMLoc &IDLoc, 3297 const OperandVector &Operands) { 3298 if (!validateLdsDirect(Inst)) { 3299 Error(IDLoc, 3300 "invalid use of lds_direct"); 3301 return false; 3302 } 3303 if (!validateSOPLiteral(Inst)) { 3304 Error(IDLoc, 3305 "only one literal operand is allowed"); 3306 return false; 3307 } 3308 if (!validateVOP3Literal(Inst)) { 3309 Error(IDLoc, 3310 "invalid literal operand"); 3311 return false; 3312 } 3313 if (!validateConstantBusLimitations(Inst)) { 3314 Error(IDLoc, 3315 "invalid operand (violates constant bus restrictions)"); 3316 return false; 3317 } 3318 if (!validateEarlyClobberLimitations(Inst)) { 3319 Error(IDLoc, 3320 "destination must be different than all sources"); 3321 return false; 3322 } 3323 if (!validateIntClampSupported(Inst)) { 3324 Error(IDLoc, 3325 "integer clamping is not supported on this GPU"); 3326 return false; 3327 } 3328 if (!validateOpSel(Inst)) { 3329 Error(IDLoc, 3330 "invalid op_sel operand"); 3331 return false; 3332 } 3333 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3334 if (!validateMIMGD16(Inst)) { 3335 Error(IDLoc, 3336 "d16 modifier is not supported on this GPU"); 3337 return false; 3338 } 3339 if (!validateMIMGDim(Inst)) { 3340 Error(IDLoc, "dim modifier is required on this GPU"); 3341 return false; 3342 } 3343 if (!validateMIMGDataSize(Inst)) { 3344 Error(IDLoc, 3345 "image data size does not match dmask and tfe"); 3346 return false; 3347 } 3348 if (!validateMIMGAddrSize(Inst)) { 3349 Error(IDLoc, 3350 "image address size does not match dim and a16"); 3351 return false; 3352 } 3353 if (!validateMIMGAtomicDMask(Inst)) { 3354 Error(IDLoc, 3355 "invalid atomic image dmask"); 3356 return false; 3357 } 3358 if (!validateMIMGGatherDMask(Inst)) { 3359 Error(IDLoc, 3360 "invalid image_gather dmask: only one bit must be set"); 3361 return false; 3362 } 3363 if (!validateFlatOffset(Inst, Operands)) { 3364 return false; 3365 } 3366 3367 return true; 3368 } 3369 3370 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3371 const FeatureBitset &FBS, 3372 unsigned VariantID = 0); 3373 3374 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3375 OperandVector &Operands, 3376 MCStreamer &Out, 3377 uint64_t &ErrorInfo, 3378 bool MatchingInlineAsm) { 3379 MCInst Inst; 3380 unsigned Result = Match_Success; 3381 for (auto Variant : getMatchedVariants()) { 3382 uint64_t EI; 3383 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3384 Variant); 3385 // We order match statuses from least to most specific. We use most specific 3386 // status as resulting 3387 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3388 if ((R == Match_Success) || 3389 (R == Match_PreferE32) || 3390 (R == Match_MissingFeature && Result != Match_PreferE32) || 3391 (R == Match_InvalidOperand && Result != Match_MissingFeature 3392 && Result != Match_PreferE32) || 3393 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3394 && Result != Match_MissingFeature 3395 && Result != Match_PreferE32)) { 3396 Result = R; 3397 ErrorInfo = EI; 3398 } 3399 if (R == Match_Success) 3400 break; 3401 } 3402 3403 switch (Result) { 3404 default: break; 3405 case Match_Success: 3406 if (!validateInstruction(Inst, IDLoc, Operands)) { 3407 return true; 3408 } 3409 Inst.setLoc(IDLoc); 3410 Out.EmitInstruction(Inst, getSTI()); 3411 return false; 3412 3413 case Match_MissingFeature: 3414 return Error(IDLoc, "instruction not supported on this GPU"); 3415 3416 case Match_MnemonicFail: { 3417 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3418 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3419 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3420 return Error(IDLoc, "invalid instruction" + Suggestion, 3421 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3422 } 3423 3424 case Match_InvalidOperand: { 3425 SMLoc ErrorLoc = IDLoc; 3426 if (ErrorInfo != ~0ULL) { 3427 if (ErrorInfo >= Operands.size()) { 3428 return Error(IDLoc, "too few operands for instruction"); 3429 } 3430 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3431 if (ErrorLoc == SMLoc()) 3432 ErrorLoc = IDLoc; 3433 } 3434 return Error(ErrorLoc, "invalid operand for instruction"); 3435 } 3436 3437 case Match_PreferE32: 3438 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3439 "should be encoded as e32"); 3440 } 3441 llvm_unreachable("Implement any new match types added!"); 3442 } 3443 3444 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3445 int64_t Tmp = -1; 3446 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3447 return true; 3448 } 3449 if (getParser().parseAbsoluteExpression(Tmp)) { 3450 return true; 3451 } 3452 Ret = static_cast<uint32_t>(Tmp); 3453 return false; 3454 } 3455 3456 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3457 uint32_t &Minor) { 3458 if (ParseAsAbsoluteExpression(Major)) 3459 return TokError("invalid major version"); 3460 3461 if (getLexer().isNot(AsmToken::Comma)) 3462 return TokError("minor version number required, comma expected"); 3463 Lex(); 3464 3465 if (ParseAsAbsoluteExpression(Minor)) 3466 return TokError("invalid minor version"); 3467 3468 return false; 3469 } 3470 3471 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3472 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3473 return TokError("directive only supported for amdgcn architecture"); 3474 3475 std::string Target; 3476 3477 SMLoc TargetStart = getTok().getLoc(); 3478 if (getParser().parseEscapedString(Target)) 3479 return true; 3480 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3481 3482 std::string ExpectedTarget; 3483 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3484 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3485 3486 if (Target != ExpectedTargetOS.str()) 3487 return getParser().Error(TargetRange.Start, "target must match options", 3488 TargetRange); 3489 3490 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3491 return false; 3492 } 3493 3494 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3495 return getParser().Error(Range.Start, "value out of range", Range); 3496 } 3497 3498 bool AMDGPUAsmParser::calculateGPRBlocks( 3499 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3500 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3501 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3502 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3503 // TODO(scott.linder): These calculations are duplicated from 3504 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3505 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3506 3507 unsigned NumVGPRs = NextFreeVGPR; 3508 unsigned NumSGPRs = NextFreeSGPR; 3509 3510 if (Version.Major >= 10) 3511 NumSGPRs = 0; 3512 else { 3513 unsigned MaxAddressableNumSGPRs = 3514 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3515 3516 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3517 NumSGPRs > MaxAddressableNumSGPRs) 3518 return OutOfRangeError(SGPRRange); 3519 3520 NumSGPRs += 3521 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3522 3523 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3524 NumSGPRs > MaxAddressableNumSGPRs) 3525 return OutOfRangeError(SGPRRange); 3526 3527 if (Features.test(FeatureSGPRInitBug)) 3528 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3529 } 3530 3531 VGPRBlocks = 3532 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3533 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3534 3535 return false; 3536 } 3537 3538 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3539 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3540 return TokError("directive only supported for amdgcn architecture"); 3541 3542 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3543 return TokError("directive only supported for amdhsa OS"); 3544 3545 StringRef KernelName; 3546 if (getParser().parseIdentifier(KernelName)) 3547 return true; 3548 3549 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3550 3551 StringSet<> Seen; 3552 3553 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3554 3555 SMRange VGPRRange; 3556 uint64_t NextFreeVGPR = 0; 3557 SMRange SGPRRange; 3558 uint64_t NextFreeSGPR = 0; 3559 unsigned UserSGPRCount = 0; 3560 bool ReserveVCC = true; 3561 bool ReserveFlatScr = true; 3562 bool ReserveXNACK = hasXNACK(); 3563 Optional<bool> EnableWavefrontSize32; 3564 3565 while (true) { 3566 while (getLexer().is(AsmToken::EndOfStatement)) 3567 Lex(); 3568 3569 if (getLexer().isNot(AsmToken::Identifier)) 3570 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3571 3572 StringRef ID = getTok().getIdentifier(); 3573 SMRange IDRange = getTok().getLocRange(); 3574 Lex(); 3575 3576 if (ID == ".end_amdhsa_kernel") 3577 break; 3578 3579 if (Seen.find(ID) != Seen.end()) 3580 return TokError(".amdhsa_ directives cannot be repeated"); 3581 Seen.insert(ID); 3582 3583 SMLoc ValStart = getTok().getLoc(); 3584 int64_t IVal; 3585 if (getParser().parseAbsoluteExpression(IVal)) 3586 return true; 3587 SMLoc ValEnd = getTok().getLoc(); 3588 SMRange ValRange = SMRange(ValStart, ValEnd); 3589 3590 if (IVal < 0) 3591 return OutOfRangeError(ValRange); 3592 3593 uint64_t Val = IVal; 3594 3595 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3596 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3597 return OutOfRangeError(RANGE); \ 3598 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3599 3600 if (ID == ".amdhsa_group_segment_fixed_size") { 3601 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3602 return OutOfRangeError(ValRange); 3603 KD.group_segment_fixed_size = Val; 3604 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3605 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3606 return OutOfRangeError(ValRange); 3607 KD.private_segment_fixed_size = Val; 3608 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3609 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3610 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3611 Val, ValRange); 3612 UserSGPRCount += 4; 3613 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3614 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3615 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3616 ValRange); 3617 UserSGPRCount += 2; 3618 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3619 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3620 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3621 ValRange); 3622 UserSGPRCount += 2; 3623 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3624 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3625 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3626 Val, ValRange); 3627 UserSGPRCount += 2; 3628 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3629 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3630 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3631 ValRange); 3632 UserSGPRCount += 2; 3633 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3634 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3635 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3636 ValRange); 3637 UserSGPRCount += 2; 3638 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3639 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3640 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3641 Val, ValRange); 3642 UserSGPRCount += 1; 3643 } else if (ID == ".amdhsa_wavefront_size32") { 3644 if (IVersion.Major < 10) 3645 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3646 IDRange); 3647 EnableWavefrontSize32 = Val; 3648 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3649 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3650 Val, ValRange); 3651 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3652 PARSE_BITS_ENTRY( 3653 KD.compute_pgm_rsrc2, 3654 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3655 ValRange); 3656 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3657 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3658 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3659 ValRange); 3660 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3661 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3662 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3663 ValRange); 3664 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3666 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3667 ValRange); 3668 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3669 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3670 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3671 ValRange); 3672 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3674 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3675 ValRange); 3676 } else if (ID == ".amdhsa_next_free_vgpr") { 3677 VGPRRange = ValRange; 3678 NextFreeVGPR = Val; 3679 } else if (ID == ".amdhsa_next_free_sgpr") { 3680 SGPRRange = ValRange; 3681 NextFreeSGPR = Val; 3682 } else if (ID == ".amdhsa_reserve_vcc") { 3683 if (!isUInt<1>(Val)) 3684 return OutOfRangeError(ValRange); 3685 ReserveVCC = Val; 3686 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3687 if (IVersion.Major < 7) 3688 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3689 IDRange); 3690 if (!isUInt<1>(Val)) 3691 return OutOfRangeError(ValRange); 3692 ReserveFlatScr = Val; 3693 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3694 if (IVersion.Major < 8) 3695 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3696 IDRange); 3697 if (!isUInt<1>(Val)) 3698 return OutOfRangeError(ValRange); 3699 ReserveXNACK = Val; 3700 } else if (ID == ".amdhsa_float_round_mode_32") { 3701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3702 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3703 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3704 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3705 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3706 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3707 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3708 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3709 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3710 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3711 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3712 ValRange); 3713 } else if (ID == ".amdhsa_dx10_clamp") { 3714 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3715 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3716 } else if (ID == ".amdhsa_ieee_mode") { 3717 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3718 Val, ValRange); 3719 } else if (ID == ".amdhsa_fp16_overflow") { 3720 if (IVersion.Major < 9) 3721 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3722 IDRange); 3723 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3724 ValRange); 3725 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3726 if (IVersion.Major < 10) 3727 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3728 IDRange); 3729 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3730 ValRange); 3731 } else if (ID == ".amdhsa_memory_ordered") { 3732 if (IVersion.Major < 10) 3733 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3734 IDRange); 3735 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3736 ValRange); 3737 } else if (ID == ".amdhsa_forward_progress") { 3738 if (IVersion.Major < 10) 3739 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3740 IDRange); 3741 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3742 ValRange); 3743 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3744 PARSE_BITS_ENTRY( 3745 KD.compute_pgm_rsrc2, 3746 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3747 ValRange); 3748 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3749 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3750 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3751 Val, ValRange); 3752 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3753 PARSE_BITS_ENTRY( 3754 KD.compute_pgm_rsrc2, 3755 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3756 ValRange); 3757 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3758 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3759 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3760 Val, ValRange); 3761 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3764 Val, ValRange); 3765 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3768 Val, ValRange); 3769 } else if (ID == ".amdhsa_exception_int_div_zero") { 3770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3772 Val, ValRange); 3773 } else { 3774 return getParser().Error(IDRange.Start, 3775 "unknown .amdhsa_kernel directive", IDRange); 3776 } 3777 3778 #undef PARSE_BITS_ENTRY 3779 } 3780 3781 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3782 return TokError(".amdhsa_next_free_vgpr directive is required"); 3783 3784 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3785 return TokError(".amdhsa_next_free_sgpr directive is required"); 3786 3787 unsigned VGPRBlocks; 3788 unsigned SGPRBlocks; 3789 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3790 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3791 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3792 SGPRBlocks)) 3793 return true; 3794 3795 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3796 VGPRBlocks)) 3797 return OutOfRangeError(VGPRRange); 3798 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3799 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3800 3801 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3802 SGPRBlocks)) 3803 return OutOfRangeError(SGPRRange); 3804 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3805 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3806 SGPRBlocks); 3807 3808 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3809 return TokError("too many user SGPRs enabled"); 3810 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3811 UserSGPRCount); 3812 3813 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3814 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3815 ReserveFlatScr, ReserveXNACK); 3816 return false; 3817 } 3818 3819 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3820 uint32_t Major; 3821 uint32_t Minor; 3822 3823 if (ParseDirectiveMajorMinor(Major, Minor)) 3824 return true; 3825 3826 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3827 return false; 3828 } 3829 3830 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3831 uint32_t Major; 3832 uint32_t Minor; 3833 uint32_t Stepping; 3834 StringRef VendorName; 3835 StringRef ArchName; 3836 3837 // If this directive has no arguments, then use the ISA version for the 3838 // targeted GPU. 3839 if (getLexer().is(AsmToken::EndOfStatement)) { 3840 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3841 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3842 ISA.Stepping, 3843 "AMD", "AMDGPU"); 3844 return false; 3845 } 3846 3847 if (ParseDirectiveMajorMinor(Major, Minor)) 3848 return true; 3849 3850 if (getLexer().isNot(AsmToken::Comma)) 3851 return TokError("stepping version number required, comma expected"); 3852 Lex(); 3853 3854 if (ParseAsAbsoluteExpression(Stepping)) 3855 return TokError("invalid stepping version"); 3856 3857 if (getLexer().isNot(AsmToken::Comma)) 3858 return TokError("vendor name required, comma expected"); 3859 Lex(); 3860 3861 if (getLexer().isNot(AsmToken::String)) 3862 return TokError("invalid vendor name"); 3863 3864 VendorName = getLexer().getTok().getStringContents(); 3865 Lex(); 3866 3867 if (getLexer().isNot(AsmToken::Comma)) 3868 return TokError("arch name required, comma expected"); 3869 Lex(); 3870 3871 if (getLexer().isNot(AsmToken::String)) 3872 return TokError("invalid arch name"); 3873 3874 ArchName = getLexer().getTok().getStringContents(); 3875 Lex(); 3876 3877 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3878 VendorName, ArchName); 3879 return false; 3880 } 3881 3882 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3883 amd_kernel_code_t &Header) { 3884 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3885 // assembly for backwards compatibility. 3886 if (ID == "max_scratch_backing_memory_byte_size") { 3887 Parser.eatToEndOfStatement(); 3888 return false; 3889 } 3890 3891 SmallString<40> ErrStr; 3892 raw_svector_ostream Err(ErrStr); 3893 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3894 return TokError(Err.str()); 3895 } 3896 Lex(); 3897 3898 if (ID == "enable_wavefront_size32") { 3899 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3900 if (!isGFX10()) 3901 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3902 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3903 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3904 } else { 3905 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3906 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3907 } 3908 } 3909 3910 if (ID == "wavefront_size") { 3911 if (Header.wavefront_size == 5) { 3912 if (!isGFX10()) 3913 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3914 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3915 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3916 } else if (Header.wavefront_size == 6) { 3917 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3918 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3919 } 3920 } 3921 3922 if (ID == "enable_wgp_mode") { 3923 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3924 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3925 } 3926 3927 if (ID == "enable_mem_ordered") { 3928 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3929 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3930 } 3931 3932 if (ID == "enable_fwd_progress") { 3933 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3934 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3935 } 3936 3937 return false; 3938 } 3939 3940 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3941 amd_kernel_code_t Header; 3942 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3943 3944 while (true) { 3945 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3946 // will set the current token to EndOfStatement. 3947 while(getLexer().is(AsmToken::EndOfStatement)) 3948 Lex(); 3949 3950 if (getLexer().isNot(AsmToken::Identifier)) 3951 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3952 3953 StringRef ID = getLexer().getTok().getIdentifier(); 3954 Lex(); 3955 3956 if (ID == ".end_amd_kernel_code_t") 3957 break; 3958 3959 if (ParseAMDKernelCodeTValue(ID, Header)) 3960 return true; 3961 } 3962 3963 getTargetStreamer().EmitAMDKernelCodeT(Header); 3964 3965 return false; 3966 } 3967 3968 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3969 if (getLexer().isNot(AsmToken::Identifier)) 3970 return TokError("expected symbol name"); 3971 3972 StringRef KernelName = Parser.getTok().getString(); 3973 3974 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3975 ELF::STT_AMDGPU_HSA_KERNEL); 3976 Lex(); 3977 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3978 KernelScope.initialize(getContext()); 3979 return false; 3980 } 3981 3982 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3983 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3984 return Error(getParser().getTok().getLoc(), 3985 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3986 "architectures"); 3987 } 3988 3989 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3990 3991 std::string ISAVersionStringFromSTI; 3992 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3993 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3994 3995 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3996 return Error(getParser().getTok().getLoc(), 3997 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3998 "arguments specified through the command line"); 3999 } 4000 4001 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4002 Lex(); 4003 4004 return false; 4005 } 4006 4007 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4008 const char *AssemblerDirectiveBegin; 4009 const char *AssemblerDirectiveEnd; 4010 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4011 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4012 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4013 HSAMD::V3::AssemblerDirectiveEnd) 4014 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4015 HSAMD::AssemblerDirectiveEnd); 4016 4017 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4018 return Error(getParser().getTok().getLoc(), 4019 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4020 "not available on non-amdhsa OSes")).str()); 4021 } 4022 4023 std::string HSAMetadataString; 4024 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4025 HSAMetadataString)) 4026 return true; 4027 4028 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4029 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4030 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4031 } else { 4032 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4033 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4034 } 4035 4036 return false; 4037 } 4038 4039 /// Common code to parse out a block of text (typically YAML) between start and 4040 /// end directives. 4041 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4042 const char *AssemblerDirectiveEnd, 4043 std::string &CollectString) { 4044 4045 raw_string_ostream CollectStream(CollectString); 4046 4047 getLexer().setSkipSpace(false); 4048 4049 bool FoundEnd = false; 4050 while (!getLexer().is(AsmToken::Eof)) { 4051 while (getLexer().is(AsmToken::Space)) { 4052 CollectStream << getLexer().getTok().getString(); 4053 Lex(); 4054 } 4055 4056 if (getLexer().is(AsmToken::Identifier)) { 4057 StringRef ID = getLexer().getTok().getIdentifier(); 4058 if (ID == AssemblerDirectiveEnd) { 4059 Lex(); 4060 FoundEnd = true; 4061 break; 4062 } 4063 } 4064 4065 CollectStream << Parser.parseStringToEndOfStatement() 4066 << getContext().getAsmInfo()->getSeparatorString(); 4067 4068 Parser.eatToEndOfStatement(); 4069 } 4070 4071 getLexer().setSkipSpace(true); 4072 4073 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4074 return TokError(Twine("expected directive ") + 4075 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4076 } 4077 4078 CollectStream.flush(); 4079 return false; 4080 } 4081 4082 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4083 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4084 std::string String; 4085 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4086 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4087 return true; 4088 4089 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4090 if (!PALMetadata->setFromString(String)) 4091 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4092 return false; 4093 } 4094 4095 /// Parse the assembler directive for old linear-format PAL metadata. 4096 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4097 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4098 return Error(getParser().getTok().getLoc(), 4099 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4100 "not available on non-amdpal OSes")).str()); 4101 } 4102 4103 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4104 PALMetadata->setLegacy(); 4105 for (;;) { 4106 uint32_t Key, Value; 4107 if (ParseAsAbsoluteExpression(Key)) { 4108 return TokError(Twine("invalid value in ") + 4109 Twine(PALMD::AssemblerDirective)); 4110 } 4111 if (getLexer().isNot(AsmToken::Comma)) { 4112 return TokError(Twine("expected an even number of values in ") + 4113 Twine(PALMD::AssemblerDirective)); 4114 } 4115 Lex(); 4116 if (ParseAsAbsoluteExpression(Value)) { 4117 return TokError(Twine("invalid value in ") + 4118 Twine(PALMD::AssemblerDirective)); 4119 } 4120 PALMetadata->setRegister(Key, Value); 4121 if (getLexer().isNot(AsmToken::Comma)) 4122 break; 4123 Lex(); 4124 } 4125 return false; 4126 } 4127 4128 /// ParseDirectiveAMDGPULDS 4129 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4130 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4131 if (getParser().checkForValidSection()) 4132 return true; 4133 4134 StringRef Name; 4135 SMLoc NameLoc = getLexer().getLoc(); 4136 if (getParser().parseIdentifier(Name)) 4137 return TokError("expected identifier in directive"); 4138 4139 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4140 if (parseToken(AsmToken::Comma, "expected ','")) 4141 return true; 4142 4143 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4144 4145 int64_t Size; 4146 SMLoc SizeLoc = getLexer().getLoc(); 4147 if (getParser().parseAbsoluteExpression(Size)) 4148 return true; 4149 if (Size < 0) 4150 return Error(SizeLoc, "size must be non-negative"); 4151 if (Size > LocalMemorySize) 4152 return Error(SizeLoc, "size is too large"); 4153 4154 int64_t Align = 4; 4155 if (getLexer().is(AsmToken::Comma)) { 4156 Lex(); 4157 SMLoc AlignLoc = getLexer().getLoc(); 4158 if (getParser().parseAbsoluteExpression(Align)) 4159 return true; 4160 if (Align < 0 || !isPowerOf2_64(Align)) 4161 return Error(AlignLoc, "alignment must be a power of two"); 4162 4163 // Alignment larger than the size of LDS is possible in theory, as long 4164 // as the linker manages to place to symbol at address 0, but we do want 4165 // to make sure the alignment fits nicely into a 32-bit integer. 4166 if (Align >= 1u << 31) 4167 return Error(AlignLoc, "alignment is too large"); 4168 } 4169 4170 if (parseToken(AsmToken::EndOfStatement, 4171 "unexpected token in '.amdgpu_lds' directive")) 4172 return true; 4173 4174 Symbol->redefineIfPossible(); 4175 if (!Symbol->isUndefined()) 4176 return Error(NameLoc, "invalid symbol redefinition"); 4177 4178 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4179 return false; 4180 } 4181 4182 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4183 StringRef IDVal = DirectiveID.getString(); 4184 4185 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4186 if (IDVal == ".amdgcn_target") 4187 return ParseDirectiveAMDGCNTarget(); 4188 4189 if (IDVal == ".amdhsa_kernel") 4190 return ParseDirectiveAMDHSAKernel(); 4191 4192 // TODO: Restructure/combine with PAL metadata directive. 4193 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4194 return ParseDirectiveHSAMetadata(); 4195 } else { 4196 if (IDVal == ".hsa_code_object_version") 4197 return ParseDirectiveHSACodeObjectVersion(); 4198 4199 if (IDVal == ".hsa_code_object_isa") 4200 return ParseDirectiveHSACodeObjectISA(); 4201 4202 if (IDVal == ".amd_kernel_code_t") 4203 return ParseDirectiveAMDKernelCodeT(); 4204 4205 if (IDVal == ".amdgpu_hsa_kernel") 4206 return ParseDirectiveAMDGPUHsaKernel(); 4207 4208 if (IDVal == ".amd_amdgpu_isa") 4209 return ParseDirectiveISAVersion(); 4210 4211 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4212 return ParseDirectiveHSAMetadata(); 4213 } 4214 4215 if (IDVal == ".amdgpu_lds") 4216 return ParseDirectiveAMDGPULDS(); 4217 4218 if (IDVal == PALMD::AssemblerDirectiveBegin) 4219 return ParseDirectivePALMetadataBegin(); 4220 4221 if (IDVal == PALMD::AssemblerDirective) 4222 return ParseDirectivePALMetadata(); 4223 4224 return true; 4225 } 4226 4227 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4228 unsigned RegNo) const { 4229 4230 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4231 R.isValid(); ++R) { 4232 if (*R == RegNo) 4233 return isGFX9() || isGFX10(); 4234 } 4235 4236 // GFX10 has 2 more SGPRs 104 and 105. 4237 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4238 R.isValid(); ++R) { 4239 if (*R == RegNo) 4240 return hasSGPR104_SGPR105(); 4241 } 4242 4243 switch (RegNo) { 4244 case AMDGPU::SRC_SHARED_BASE: 4245 case AMDGPU::SRC_SHARED_LIMIT: 4246 case AMDGPU::SRC_PRIVATE_BASE: 4247 case AMDGPU::SRC_PRIVATE_LIMIT: 4248 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4249 return !isCI() && !isSI() && !isVI(); 4250 case AMDGPU::TBA: 4251 case AMDGPU::TBA_LO: 4252 case AMDGPU::TBA_HI: 4253 case AMDGPU::TMA: 4254 case AMDGPU::TMA_LO: 4255 case AMDGPU::TMA_HI: 4256 return !isGFX9() && !isGFX10(); 4257 case AMDGPU::XNACK_MASK: 4258 case AMDGPU::XNACK_MASK_LO: 4259 case AMDGPU::XNACK_MASK_HI: 4260 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4261 case AMDGPU::SGPR_NULL: 4262 return isGFX10(); 4263 default: 4264 break; 4265 } 4266 4267 if (isCI()) 4268 return true; 4269 4270 if (isSI() || isGFX10()) { 4271 // No flat_scr on SI. 4272 // On GFX10 flat scratch is not a valid register operand and can only be 4273 // accessed with s_setreg/s_getreg. 4274 switch (RegNo) { 4275 case AMDGPU::FLAT_SCR: 4276 case AMDGPU::FLAT_SCR_LO: 4277 case AMDGPU::FLAT_SCR_HI: 4278 return false; 4279 default: 4280 return true; 4281 } 4282 } 4283 4284 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4285 // SI/CI have. 4286 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4287 R.isValid(); ++R) { 4288 if (*R == RegNo) 4289 return hasSGPR102_SGPR103(); 4290 } 4291 4292 return true; 4293 } 4294 4295 OperandMatchResultTy 4296 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4297 OperandMode Mode) { 4298 // Try to parse with a custom parser 4299 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4300 4301 // If we successfully parsed the operand or if there as an error parsing, 4302 // we are done. 4303 // 4304 // If we are parsing after we reach EndOfStatement then this means we 4305 // are appending default values to the Operands list. This is only done 4306 // by custom parser, so we shouldn't continue on to the generic parsing. 4307 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4308 getLexer().is(AsmToken::EndOfStatement)) 4309 return ResTy; 4310 4311 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4312 unsigned Prefix = Operands.size(); 4313 SMLoc LBraceLoc = getTok().getLoc(); 4314 Parser.Lex(); // eat the '[' 4315 4316 for (;;) { 4317 ResTy = parseReg(Operands); 4318 if (ResTy != MatchOperand_Success) 4319 return ResTy; 4320 4321 if (getLexer().is(AsmToken::RBrac)) 4322 break; 4323 4324 if (getLexer().isNot(AsmToken::Comma)) 4325 return MatchOperand_ParseFail; 4326 Parser.Lex(); 4327 } 4328 4329 if (Operands.size() - Prefix > 1) { 4330 Operands.insert(Operands.begin() + Prefix, 4331 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4332 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4333 getTok().getLoc())); 4334 } 4335 4336 Parser.Lex(); // eat the ']' 4337 return MatchOperand_Success; 4338 } 4339 4340 return parseRegOrImm(Operands); 4341 } 4342 4343 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4344 // Clear any forced encodings from the previous instruction. 4345 setForcedEncodingSize(0); 4346 setForcedDPP(false); 4347 setForcedSDWA(false); 4348 4349 if (Name.endswith("_e64")) { 4350 setForcedEncodingSize(64); 4351 return Name.substr(0, Name.size() - 4); 4352 } else if (Name.endswith("_e32")) { 4353 setForcedEncodingSize(32); 4354 return Name.substr(0, Name.size() - 4); 4355 } else if (Name.endswith("_dpp")) { 4356 setForcedDPP(true); 4357 return Name.substr(0, Name.size() - 4); 4358 } else if (Name.endswith("_sdwa")) { 4359 setForcedSDWA(true); 4360 return Name.substr(0, Name.size() - 5); 4361 } 4362 return Name; 4363 } 4364 4365 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4366 StringRef Name, 4367 SMLoc NameLoc, OperandVector &Operands) { 4368 // Add the instruction mnemonic 4369 Name = parseMnemonicSuffix(Name); 4370 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4371 4372 bool IsMIMG = Name.startswith("image_"); 4373 4374 while (!getLexer().is(AsmToken::EndOfStatement)) { 4375 OperandMode Mode = OperandMode_Default; 4376 if (IsMIMG && isGFX10() && Operands.size() == 2) 4377 Mode = OperandMode_NSA; 4378 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4379 4380 // Eat the comma or space if there is one. 4381 if (getLexer().is(AsmToken::Comma)) 4382 Parser.Lex(); 4383 4384 switch (Res) { 4385 case MatchOperand_Success: break; 4386 case MatchOperand_ParseFail: 4387 // FIXME: use real operand location rather than the current location. 4388 Error(getLexer().getLoc(), "failed parsing operand."); 4389 while (!getLexer().is(AsmToken::EndOfStatement)) { 4390 Parser.Lex(); 4391 } 4392 return true; 4393 case MatchOperand_NoMatch: 4394 // FIXME: use real operand location rather than the current location. 4395 Error(getLexer().getLoc(), "not a valid operand."); 4396 while (!getLexer().is(AsmToken::EndOfStatement)) { 4397 Parser.Lex(); 4398 } 4399 return true; 4400 } 4401 } 4402 4403 return false; 4404 } 4405 4406 //===----------------------------------------------------------------------===// 4407 // Utility functions 4408 //===----------------------------------------------------------------------===// 4409 4410 OperandMatchResultTy 4411 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4412 4413 if (!trySkipId(Prefix, AsmToken::Colon)) 4414 return MatchOperand_NoMatch; 4415 4416 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4417 } 4418 4419 OperandMatchResultTy 4420 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4421 AMDGPUOperand::ImmTy ImmTy, 4422 bool (*ConvertResult)(int64_t&)) { 4423 SMLoc S = getLoc(); 4424 int64_t Value = 0; 4425 4426 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4427 if (Res != MatchOperand_Success) 4428 return Res; 4429 4430 if (ConvertResult && !ConvertResult(Value)) { 4431 Error(S, "invalid " + StringRef(Prefix) + " value."); 4432 } 4433 4434 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4435 return MatchOperand_Success; 4436 } 4437 4438 OperandMatchResultTy 4439 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4440 OperandVector &Operands, 4441 AMDGPUOperand::ImmTy ImmTy, 4442 bool (*ConvertResult)(int64_t&)) { 4443 SMLoc S = getLoc(); 4444 if (!trySkipId(Prefix, AsmToken::Colon)) 4445 return MatchOperand_NoMatch; 4446 4447 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4448 return MatchOperand_ParseFail; 4449 4450 unsigned Val = 0; 4451 const unsigned MaxSize = 4; 4452 4453 // FIXME: How to verify the number of elements matches the number of src 4454 // operands? 4455 for (int I = 0; ; ++I) { 4456 int64_t Op; 4457 SMLoc Loc = getLoc(); 4458 if (!parseExpr(Op)) 4459 return MatchOperand_ParseFail; 4460 4461 if (Op != 0 && Op != 1) { 4462 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4463 return MatchOperand_ParseFail; 4464 } 4465 4466 Val |= (Op << I); 4467 4468 if (trySkipToken(AsmToken::RBrac)) 4469 break; 4470 4471 if (I + 1 == MaxSize) { 4472 Error(getLoc(), "expected a closing square bracket"); 4473 return MatchOperand_ParseFail; 4474 } 4475 4476 if (!skipToken(AsmToken::Comma, "expected a comma")) 4477 return MatchOperand_ParseFail; 4478 } 4479 4480 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4481 return MatchOperand_Success; 4482 } 4483 4484 OperandMatchResultTy 4485 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4486 AMDGPUOperand::ImmTy ImmTy) { 4487 int64_t Bit = 0; 4488 SMLoc S = Parser.getTok().getLoc(); 4489 4490 // We are at the end of the statement, and this is a default argument, so 4491 // use a default value. 4492 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4493 switch(getLexer().getKind()) { 4494 case AsmToken::Identifier: { 4495 StringRef Tok = Parser.getTok().getString(); 4496 if (Tok == Name) { 4497 if (Tok == "r128" && isGFX9()) 4498 Error(S, "r128 modifier is not supported on this GPU"); 4499 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4500 Error(S, "a16 modifier is not supported on this GPU"); 4501 Bit = 1; 4502 Parser.Lex(); 4503 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4504 Bit = 0; 4505 Parser.Lex(); 4506 } else { 4507 return MatchOperand_NoMatch; 4508 } 4509 break; 4510 } 4511 default: 4512 return MatchOperand_NoMatch; 4513 } 4514 } 4515 4516 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4517 return MatchOperand_ParseFail; 4518 4519 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4520 return MatchOperand_Success; 4521 } 4522 4523 static void addOptionalImmOperand( 4524 MCInst& Inst, const OperandVector& Operands, 4525 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4526 AMDGPUOperand::ImmTy ImmT, 4527 int64_t Default = 0) { 4528 auto i = OptionalIdx.find(ImmT); 4529 if (i != OptionalIdx.end()) { 4530 unsigned Idx = i->second; 4531 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4532 } else { 4533 Inst.addOperand(MCOperand::createImm(Default)); 4534 } 4535 } 4536 4537 OperandMatchResultTy 4538 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4539 if (getLexer().isNot(AsmToken::Identifier)) { 4540 return MatchOperand_NoMatch; 4541 } 4542 StringRef Tok = Parser.getTok().getString(); 4543 if (Tok != Prefix) { 4544 return MatchOperand_NoMatch; 4545 } 4546 4547 Parser.Lex(); 4548 if (getLexer().isNot(AsmToken::Colon)) { 4549 return MatchOperand_ParseFail; 4550 } 4551 4552 Parser.Lex(); 4553 if (getLexer().isNot(AsmToken::Identifier)) { 4554 return MatchOperand_ParseFail; 4555 } 4556 4557 Value = Parser.getTok().getString(); 4558 return MatchOperand_Success; 4559 } 4560 4561 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4562 // values to live in a joint format operand in the MCInst encoding. 4563 OperandMatchResultTy 4564 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4565 SMLoc S = Parser.getTok().getLoc(); 4566 int64_t Dfmt = 0, Nfmt = 0; 4567 // dfmt and nfmt can appear in either order, and each is optional. 4568 bool GotDfmt = false, GotNfmt = false; 4569 while (!GotDfmt || !GotNfmt) { 4570 if (!GotDfmt) { 4571 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4572 if (Res != MatchOperand_NoMatch) { 4573 if (Res != MatchOperand_Success) 4574 return Res; 4575 if (Dfmt >= 16) { 4576 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4577 return MatchOperand_ParseFail; 4578 } 4579 GotDfmt = true; 4580 Parser.Lex(); 4581 continue; 4582 } 4583 } 4584 if (!GotNfmt) { 4585 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4586 if (Res != MatchOperand_NoMatch) { 4587 if (Res != MatchOperand_Success) 4588 return Res; 4589 if (Nfmt >= 8) { 4590 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4591 return MatchOperand_ParseFail; 4592 } 4593 GotNfmt = true; 4594 Parser.Lex(); 4595 continue; 4596 } 4597 } 4598 break; 4599 } 4600 if (!GotDfmt && !GotNfmt) 4601 return MatchOperand_NoMatch; 4602 auto Format = Dfmt | Nfmt << 4; 4603 Operands.push_back( 4604 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4605 return MatchOperand_Success; 4606 } 4607 4608 //===----------------------------------------------------------------------===// 4609 // ds 4610 //===----------------------------------------------------------------------===// 4611 4612 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4613 const OperandVector &Operands) { 4614 OptionalImmIndexMap OptionalIdx; 4615 4616 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4617 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4618 4619 // Add the register arguments 4620 if (Op.isReg()) { 4621 Op.addRegOperands(Inst, 1); 4622 continue; 4623 } 4624 4625 // Handle optional arguments 4626 OptionalIdx[Op.getImmTy()] = i; 4627 } 4628 4629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4632 4633 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4634 } 4635 4636 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4637 bool IsGdsHardcoded) { 4638 OptionalImmIndexMap OptionalIdx; 4639 4640 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4641 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4642 4643 // Add the register arguments 4644 if (Op.isReg()) { 4645 Op.addRegOperands(Inst, 1); 4646 continue; 4647 } 4648 4649 if (Op.isToken() && Op.getToken() == "gds") { 4650 IsGdsHardcoded = true; 4651 continue; 4652 } 4653 4654 // Handle optional arguments 4655 OptionalIdx[Op.getImmTy()] = i; 4656 } 4657 4658 AMDGPUOperand::ImmTy OffsetType = 4659 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4660 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4661 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4662 AMDGPUOperand::ImmTyOffset; 4663 4664 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4665 4666 if (!IsGdsHardcoded) { 4667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4668 } 4669 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4670 } 4671 4672 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4673 OptionalImmIndexMap OptionalIdx; 4674 4675 unsigned OperandIdx[4]; 4676 unsigned EnMask = 0; 4677 int SrcIdx = 0; 4678 4679 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4680 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4681 4682 // Add the register arguments 4683 if (Op.isReg()) { 4684 assert(SrcIdx < 4); 4685 OperandIdx[SrcIdx] = Inst.size(); 4686 Op.addRegOperands(Inst, 1); 4687 ++SrcIdx; 4688 continue; 4689 } 4690 4691 if (Op.isOff()) { 4692 assert(SrcIdx < 4); 4693 OperandIdx[SrcIdx] = Inst.size(); 4694 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4695 ++SrcIdx; 4696 continue; 4697 } 4698 4699 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4700 Op.addImmOperands(Inst, 1); 4701 continue; 4702 } 4703 4704 if (Op.isToken() && Op.getToken() == "done") 4705 continue; 4706 4707 // Handle optional arguments 4708 OptionalIdx[Op.getImmTy()] = i; 4709 } 4710 4711 assert(SrcIdx == 4); 4712 4713 bool Compr = false; 4714 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4715 Compr = true; 4716 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4717 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4718 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4719 } 4720 4721 for (auto i = 0; i < SrcIdx; ++i) { 4722 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4723 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4724 } 4725 } 4726 4727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4729 4730 Inst.addOperand(MCOperand::createImm(EnMask)); 4731 } 4732 4733 //===----------------------------------------------------------------------===// 4734 // s_waitcnt 4735 //===----------------------------------------------------------------------===// 4736 4737 static bool 4738 encodeCnt( 4739 const AMDGPU::IsaVersion ISA, 4740 int64_t &IntVal, 4741 int64_t CntVal, 4742 bool Saturate, 4743 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4744 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4745 { 4746 bool Failed = false; 4747 4748 IntVal = encode(ISA, IntVal, CntVal); 4749 if (CntVal != decode(ISA, IntVal)) { 4750 if (Saturate) { 4751 IntVal = encode(ISA, IntVal, -1); 4752 } else { 4753 Failed = true; 4754 } 4755 } 4756 return Failed; 4757 } 4758 4759 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4760 4761 SMLoc CntLoc = getLoc(); 4762 StringRef CntName = getTokenStr(); 4763 4764 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4765 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4766 return false; 4767 4768 int64_t CntVal; 4769 SMLoc ValLoc = getLoc(); 4770 if (!parseExpr(CntVal)) 4771 return false; 4772 4773 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4774 4775 bool Failed = true; 4776 bool Sat = CntName.endswith("_sat"); 4777 4778 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4779 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4780 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4781 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4782 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4783 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4784 } else { 4785 Error(CntLoc, "invalid counter name " + CntName); 4786 return false; 4787 } 4788 4789 if (Failed) { 4790 Error(ValLoc, "too large value for " + CntName); 4791 return false; 4792 } 4793 4794 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4795 return false; 4796 4797 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4798 if (isToken(AsmToken::EndOfStatement)) { 4799 Error(getLoc(), "expected a counter name"); 4800 return false; 4801 } 4802 } 4803 4804 return true; 4805 } 4806 4807 OperandMatchResultTy 4808 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4809 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4810 int64_t Waitcnt = getWaitcntBitMask(ISA); 4811 SMLoc S = getLoc(); 4812 4813 // If parse failed, do not return error code 4814 // to avoid excessive error messages. 4815 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4816 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4817 } else { 4818 parseExpr(Waitcnt); 4819 } 4820 4821 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4822 return MatchOperand_Success; 4823 } 4824 4825 bool 4826 AMDGPUOperand::isSWaitCnt() const { 4827 return isImm(); 4828 } 4829 4830 //===----------------------------------------------------------------------===// 4831 // hwreg 4832 //===----------------------------------------------------------------------===// 4833 4834 bool 4835 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4836 int64_t &Offset, 4837 int64_t &Width) { 4838 using namespace llvm::AMDGPU::Hwreg; 4839 4840 // The register may be specified by name or using a numeric code 4841 if (isToken(AsmToken::Identifier) && 4842 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4843 HwReg.IsSymbolic = true; 4844 lex(); // skip message name 4845 } else if (!parseExpr(HwReg.Id)) { 4846 return false; 4847 } 4848 4849 if (trySkipToken(AsmToken::RParen)) 4850 return true; 4851 4852 // parse optional params 4853 return 4854 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4855 parseExpr(Offset) && 4856 skipToken(AsmToken::Comma, "expected a comma") && 4857 parseExpr(Width) && 4858 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4859 } 4860 4861 bool 4862 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4863 const int64_t Offset, 4864 const int64_t Width, 4865 const SMLoc Loc) { 4866 4867 using namespace llvm::AMDGPU::Hwreg; 4868 4869 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4870 Error(Loc, "specified hardware register is not supported on this GPU"); 4871 return false; 4872 } else if (!isValidHwreg(HwReg.Id)) { 4873 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4874 return false; 4875 } else if (!isValidHwregOffset(Offset)) { 4876 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4877 return false; 4878 } else if (!isValidHwregWidth(Width)) { 4879 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4880 return false; 4881 } 4882 return true; 4883 } 4884 4885 OperandMatchResultTy 4886 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4887 using namespace llvm::AMDGPU::Hwreg; 4888 4889 int64_t ImmVal = 0; 4890 SMLoc Loc = getLoc(); 4891 4892 // If parse failed, do not return error code 4893 // to avoid excessive error messages. 4894 if (trySkipId("hwreg", AsmToken::LParen)) { 4895 OperandInfoTy HwReg(ID_UNKNOWN_); 4896 int64_t Offset = OFFSET_DEFAULT_; 4897 int64_t Width = WIDTH_DEFAULT_; 4898 if (parseHwregBody(HwReg, Offset, Width) && 4899 validateHwreg(HwReg, Offset, Width, Loc)) { 4900 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4901 } 4902 } else if (parseExpr(ImmVal)) { 4903 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4904 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4905 } 4906 4907 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4908 return MatchOperand_Success; 4909 } 4910 4911 bool AMDGPUOperand::isHwreg() const { 4912 return isImmTy(ImmTyHwreg); 4913 } 4914 4915 //===----------------------------------------------------------------------===// 4916 // sendmsg 4917 //===----------------------------------------------------------------------===// 4918 4919 bool 4920 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4921 OperandInfoTy &Op, 4922 OperandInfoTy &Stream) { 4923 using namespace llvm::AMDGPU::SendMsg; 4924 4925 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4926 Msg.IsSymbolic = true; 4927 lex(); // skip message name 4928 } else if (!parseExpr(Msg.Id)) { 4929 return false; 4930 } 4931 4932 if (trySkipToken(AsmToken::Comma)) { 4933 Op.IsDefined = true; 4934 if (isToken(AsmToken::Identifier) && 4935 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4936 lex(); // skip operation name 4937 } else if (!parseExpr(Op.Id)) { 4938 return false; 4939 } 4940 4941 if (trySkipToken(AsmToken::Comma)) { 4942 Stream.IsDefined = true; 4943 if (!parseExpr(Stream.Id)) 4944 return false; 4945 } 4946 } 4947 4948 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4949 } 4950 4951 bool 4952 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4953 const OperandInfoTy &Op, 4954 const OperandInfoTy &Stream, 4955 const SMLoc S) { 4956 using namespace llvm::AMDGPU::SendMsg; 4957 4958 // Validation strictness depends on whether message is specified 4959 // in a symbolc or in a numeric form. In the latter case 4960 // only encoding possibility is checked. 4961 bool Strict = Msg.IsSymbolic; 4962 4963 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 4964 Error(S, "invalid message id"); 4965 return false; 4966 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 4967 Error(S, Op.IsDefined ? 4968 "message does not support operations" : 4969 "missing message operation"); 4970 return false; 4971 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 4972 Error(S, "invalid operation id"); 4973 return false; 4974 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 4975 Error(S, "message operation does not support streams"); 4976 return false; 4977 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 4978 Error(S, "invalid message stream id"); 4979 return false; 4980 } 4981 return true; 4982 } 4983 4984 OperandMatchResultTy 4985 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4986 using namespace llvm::AMDGPU::SendMsg; 4987 4988 int64_t ImmVal = 0; 4989 SMLoc Loc = getLoc(); 4990 4991 // If parse failed, do not return error code 4992 // to avoid excessive error messages. 4993 if (trySkipId("sendmsg", AsmToken::LParen)) { 4994 OperandInfoTy Msg(ID_UNKNOWN_); 4995 OperandInfoTy Op(OP_NONE_); 4996 OperandInfoTy Stream(STREAM_ID_NONE_); 4997 if (parseSendMsgBody(Msg, Op, Stream) && 4998 validateSendMsg(Msg, Op, Stream, Loc)) { 4999 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5000 } 5001 } else if (parseExpr(ImmVal)) { 5002 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5003 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5004 } 5005 5006 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5007 return MatchOperand_Success; 5008 } 5009 5010 bool AMDGPUOperand::isSendMsg() const { 5011 return isImmTy(ImmTySendMsg); 5012 } 5013 5014 //===----------------------------------------------------------------------===// 5015 // v_interp 5016 //===----------------------------------------------------------------------===// 5017 5018 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5019 if (getLexer().getKind() != AsmToken::Identifier) 5020 return MatchOperand_NoMatch; 5021 5022 StringRef Str = Parser.getTok().getString(); 5023 int Slot = StringSwitch<int>(Str) 5024 .Case("p10", 0) 5025 .Case("p20", 1) 5026 .Case("p0", 2) 5027 .Default(-1); 5028 5029 SMLoc S = Parser.getTok().getLoc(); 5030 if (Slot == -1) 5031 return MatchOperand_ParseFail; 5032 5033 Parser.Lex(); 5034 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5035 AMDGPUOperand::ImmTyInterpSlot)); 5036 return MatchOperand_Success; 5037 } 5038 5039 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5040 if (getLexer().getKind() != AsmToken::Identifier) 5041 return MatchOperand_NoMatch; 5042 5043 StringRef Str = Parser.getTok().getString(); 5044 if (!Str.startswith("attr")) 5045 return MatchOperand_NoMatch; 5046 5047 StringRef Chan = Str.take_back(2); 5048 int AttrChan = StringSwitch<int>(Chan) 5049 .Case(".x", 0) 5050 .Case(".y", 1) 5051 .Case(".z", 2) 5052 .Case(".w", 3) 5053 .Default(-1); 5054 if (AttrChan == -1) 5055 return MatchOperand_ParseFail; 5056 5057 Str = Str.drop_back(2).drop_front(4); 5058 5059 uint8_t Attr; 5060 if (Str.getAsInteger(10, Attr)) 5061 return MatchOperand_ParseFail; 5062 5063 SMLoc S = Parser.getTok().getLoc(); 5064 Parser.Lex(); 5065 if (Attr > 63) { 5066 Error(S, "out of bounds attr"); 5067 return MatchOperand_Success; 5068 } 5069 5070 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5071 5072 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5073 AMDGPUOperand::ImmTyInterpAttr)); 5074 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5075 AMDGPUOperand::ImmTyAttrChan)); 5076 return MatchOperand_Success; 5077 } 5078 5079 //===----------------------------------------------------------------------===// 5080 // exp 5081 //===----------------------------------------------------------------------===// 5082 5083 void AMDGPUAsmParser::errorExpTgt() { 5084 Error(Parser.getTok().getLoc(), "invalid exp target"); 5085 } 5086 5087 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5088 uint8_t &Val) { 5089 if (Str == "null") { 5090 Val = 9; 5091 return MatchOperand_Success; 5092 } 5093 5094 if (Str.startswith("mrt")) { 5095 Str = Str.drop_front(3); 5096 if (Str == "z") { // == mrtz 5097 Val = 8; 5098 return MatchOperand_Success; 5099 } 5100 5101 if (Str.getAsInteger(10, Val)) 5102 return MatchOperand_ParseFail; 5103 5104 if (Val > 7) 5105 errorExpTgt(); 5106 5107 return MatchOperand_Success; 5108 } 5109 5110 if (Str.startswith("pos")) { 5111 Str = Str.drop_front(3); 5112 if (Str.getAsInteger(10, Val)) 5113 return MatchOperand_ParseFail; 5114 5115 if (Val > 4 || (Val == 4 && !isGFX10())) 5116 errorExpTgt(); 5117 5118 Val += 12; 5119 return MatchOperand_Success; 5120 } 5121 5122 if (isGFX10() && Str == "prim") { 5123 Val = 20; 5124 return MatchOperand_Success; 5125 } 5126 5127 if (Str.startswith("param")) { 5128 Str = Str.drop_front(5); 5129 if (Str.getAsInteger(10, Val)) 5130 return MatchOperand_ParseFail; 5131 5132 if (Val >= 32) 5133 errorExpTgt(); 5134 5135 Val += 32; 5136 return MatchOperand_Success; 5137 } 5138 5139 if (Str.startswith("invalid_target_")) { 5140 Str = Str.drop_front(15); 5141 if (Str.getAsInteger(10, Val)) 5142 return MatchOperand_ParseFail; 5143 5144 errorExpTgt(); 5145 return MatchOperand_Success; 5146 } 5147 5148 return MatchOperand_NoMatch; 5149 } 5150 5151 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5152 uint8_t Val; 5153 StringRef Str = Parser.getTok().getString(); 5154 5155 auto Res = parseExpTgtImpl(Str, Val); 5156 if (Res != MatchOperand_Success) 5157 return Res; 5158 5159 SMLoc S = Parser.getTok().getLoc(); 5160 Parser.Lex(); 5161 5162 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5163 AMDGPUOperand::ImmTyExpTgt)); 5164 return MatchOperand_Success; 5165 } 5166 5167 //===----------------------------------------------------------------------===// 5168 // parser helpers 5169 //===----------------------------------------------------------------------===// 5170 5171 bool 5172 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5173 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5174 } 5175 5176 bool 5177 AMDGPUAsmParser::isId(const StringRef Id) const { 5178 return isId(getToken(), Id); 5179 } 5180 5181 bool 5182 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5183 return getTokenKind() == Kind; 5184 } 5185 5186 bool 5187 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5188 if (isId(Id)) { 5189 lex(); 5190 return true; 5191 } 5192 return false; 5193 } 5194 5195 bool 5196 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5197 if (isId(Id) && peekToken().is(Kind)) { 5198 lex(); 5199 lex(); 5200 return true; 5201 } 5202 return false; 5203 } 5204 5205 bool 5206 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5207 if (isToken(Kind)) { 5208 lex(); 5209 return true; 5210 } 5211 return false; 5212 } 5213 5214 bool 5215 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5216 const StringRef ErrMsg) { 5217 if (!trySkipToken(Kind)) { 5218 Error(getLoc(), ErrMsg); 5219 return false; 5220 } 5221 return true; 5222 } 5223 5224 bool 5225 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5226 return !getParser().parseAbsoluteExpression(Imm); 5227 } 5228 5229 bool 5230 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5231 SMLoc S = getLoc(); 5232 5233 const MCExpr *Expr; 5234 if (Parser.parseExpression(Expr)) 5235 return false; 5236 5237 int64_t IntVal; 5238 if (Expr->evaluateAsAbsolute(IntVal)) { 5239 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5240 } else { 5241 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5242 } 5243 return true; 5244 } 5245 5246 bool 5247 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5248 if (isToken(AsmToken::String)) { 5249 Val = getToken().getStringContents(); 5250 lex(); 5251 return true; 5252 } else { 5253 Error(getLoc(), ErrMsg); 5254 return false; 5255 } 5256 } 5257 5258 AsmToken 5259 AMDGPUAsmParser::getToken() const { 5260 return Parser.getTok(); 5261 } 5262 5263 AsmToken 5264 AMDGPUAsmParser::peekToken() { 5265 return getLexer().peekTok(); 5266 } 5267 5268 void 5269 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5270 auto TokCount = getLexer().peekTokens(Tokens); 5271 5272 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5273 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5274 } 5275 5276 AsmToken::TokenKind 5277 AMDGPUAsmParser::getTokenKind() const { 5278 return getLexer().getKind(); 5279 } 5280 5281 SMLoc 5282 AMDGPUAsmParser::getLoc() const { 5283 return getToken().getLoc(); 5284 } 5285 5286 StringRef 5287 AMDGPUAsmParser::getTokenStr() const { 5288 return getToken().getString(); 5289 } 5290 5291 void 5292 AMDGPUAsmParser::lex() { 5293 Parser.Lex(); 5294 } 5295 5296 //===----------------------------------------------------------------------===// 5297 // swizzle 5298 //===----------------------------------------------------------------------===// 5299 5300 LLVM_READNONE 5301 static unsigned 5302 encodeBitmaskPerm(const unsigned AndMask, 5303 const unsigned OrMask, 5304 const unsigned XorMask) { 5305 using namespace llvm::AMDGPU::Swizzle; 5306 5307 return BITMASK_PERM_ENC | 5308 (AndMask << BITMASK_AND_SHIFT) | 5309 (OrMask << BITMASK_OR_SHIFT) | 5310 (XorMask << BITMASK_XOR_SHIFT); 5311 } 5312 5313 bool 5314 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5315 const unsigned MinVal, 5316 const unsigned MaxVal, 5317 const StringRef ErrMsg) { 5318 for (unsigned i = 0; i < OpNum; ++i) { 5319 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5320 return false; 5321 } 5322 SMLoc ExprLoc = Parser.getTok().getLoc(); 5323 if (!parseExpr(Op[i])) { 5324 return false; 5325 } 5326 if (Op[i] < MinVal || Op[i] > MaxVal) { 5327 Error(ExprLoc, ErrMsg); 5328 return false; 5329 } 5330 } 5331 5332 return true; 5333 } 5334 5335 bool 5336 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5337 using namespace llvm::AMDGPU::Swizzle; 5338 5339 int64_t Lane[LANE_NUM]; 5340 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5341 "expected a 2-bit lane id")) { 5342 Imm = QUAD_PERM_ENC; 5343 for (unsigned I = 0; I < LANE_NUM; ++I) { 5344 Imm |= Lane[I] << (LANE_SHIFT * I); 5345 } 5346 return true; 5347 } 5348 return false; 5349 } 5350 5351 bool 5352 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5353 using namespace llvm::AMDGPU::Swizzle; 5354 5355 SMLoc S = Parser.getTok().getLoc(); 5356 int64_t GroupSize; 5357 int64_t LaneIdx; 5358 5359 if (!parseSwizzleOperands(1, &GroupSize, 5360 2, 32, 5361 "group size must be in the interval [2,32]")) { 5362 return false; 5363 } 5364 if (!isPowerOf2_64(GroupSize)) { 5365 Error(S, "group size must be a power of two"); 5366 return false; 5367 } 5368 if (parseSwizzleOperands(1, &LaneIdx, 5369 0, GroupSize - 1, 5370 "lane id must be in the interval [0,group size - 1]")) { 5371 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5372 return true; 5373 } 5374 return false; 5375 } 5376 5377 bool 5378 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5379 using namespace llvm::AMDGPU::Swizzle; 5380 5381 SMLoc S = Parser.getTok().getLoc(); 5382 int64_t GroupSize; 5383 5384 if (!parseSwizzleOperands(1, &GroupSize, 5385 2, 32, "group size must be in the interval [2,32]")) { 5386 return false; 5387 } 5388 if (!isPowerOf2_64(GroupSize)) { 5389 Error(S, "group size must be a power of two"); 5390 return false; 5391 } 5392 5393 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5394 return true; 5395 } 5396 5397 bool 5398 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5399 using namespace llvm::AMDGPU::Swizzle; 5400 5401 SMLoc S = Parser.getTok().getLoc(); 5402 int64_t GroupSize; 5403 5404 if (!parseSwizzleOperands(1, &GroupSize, 5405 1, 16, "group size must be in the interval [1,16]")) { 5406 return false; 5407 } 5408 if (!isPowerOf2_64(GroupSize)) { 5409 Error(S, "group size must be a power of two"); 5410 return false; 5411 } 5412 5413 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5414 return true; 5415 } 5416 5417 bool 5418 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5419 using namespace llvm::AMDGPU::Swizzle; 5420 5421 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5422 return false; 5423 } 5424 5425 StringRef Ctl; 5426 SMLoc StrLoc = Parser.getTok().getLoc(); 5427 if (!parseString(Ctl)) { 5428 return false; 5429 } 5430 if (Ctl.size() != BITMASK_WIDTH) { 5431 Error(StrLoc, "expected a 5-character mask"); 5432 return false; 5433 } 5434 5435 unsigned AndMask = 0; 5436 unsigned OrMask = 0; 5437 unsigned XorMask = 0; 5438 5439 for (size_t i = 0; i < Ctl.size(); ++i) { 5440 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5441 switch(Ctl[i]) { 5442 default: 5443 Error(StrLoc, "invalid mask"); 5444 return false; 5445 case '0': 5446 break; 5447 case '1': 5448 OrMask |= Mask; 5449 break; 5450 case 'p': 5451 AndMask |= Mask; 5452 break; 5453 case 'i': 5454 AndMask |= Mask; 5455 XorMask |= Mask; 5456 break; 5457 } 5458 } 5459 5460 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5461 return true; 5462 } 5463 5464 bool 5465 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5466 5467 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5468 5469 if (!parseExpr(Imm)) { 5470 return false; 5471 } 5472 if (!isUInt<16>(Imm)) { 5473 Error(OffsetLoc, "expected a 16-bit offset"); 5474 return false; 5475 } 5476 return true; 5477 } 5478 5479 bool 5480 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5481 using namespace llvm::AMDGPU::Swizzle; 5482 5483 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5484 5485 SMLoc ModeLoc = Parser.getTok().getLoc(); 5486 bool Ok = false; 5487 5488 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5489 Ok = parseSwizzleQuadPerm(Imm); 5490 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5491 Ok = parseSwizzleBitmaskPerm(Imm); 5492 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5493 Ok = parseSwizzleBroadcast(Imm); 5494 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5495 Ok = parseSwizzleSwap(Imm); 5496 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5497 Ok = parseSwizzleReverse(Imm); 5498 } else { 5499 Error(ModeLoc, "expected a swizzle mode"); 5500 } 5501 5502 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5503 } 5504 5505 return false; 5506 } 5507 5508 OperandMatchResultTy 5509 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5510 SMLoc S = Parser.getTok().getLoc(); 5511 int64_t Imm = 0; 5512 5513 if (trySkipId("offset")) { 5514 5515 bool Ok = false; 5516 if (skipToken(AsmToken::Colon, "expected a colon")) { 5517 if (trySkipId("swizzle")) { 5518 Ok = parseSwizzleMacro(Imm); 5519 } else { 5520 Ok = parseSwizzleOffset(Imm); 5521 } 5522 } 5523 5524 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5525 5526 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5527 } else { 5528 // Swizzle "offset" operand is optional. 5529 // If it is omitted, try parsing other optional operands. 5530 return parseOptionalOpr(Operands); 5531 } 5532 } 5533 5534 bool 5535 AMDGPUOperand::isSwizzle() const { 5536 return isImmTy(ImmTySwizzle); 5537 } 5538 5539 //===----------------------------------------------------------------------===// 5540 // VGPR Index Mode 5541 //===----------------------------------------------------------------------===// 5542 5543 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5544 5545 using namespace llvm::AMDGPU::VGPRIndexMode; 5546 5547 if (trySkipToken(AsmToken::RParen)) { 5548 return OFF; 5549 } 5550 5551 int64_t Imm = 0; 5552 5553 while (true) { 5554 unsigned Mode = 0; 5555 SMLoc S = Parser.getTok().getLoc(); 5556 5557 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5558 if (trySkipId(IdSymbolic[ModeId])) { 5559 Mode = 1 << ModeId; 5560 break; 5561 } 5562 } 5563 5564 if (Mode == 0) { 5565 Error(S, (Imm == 0)? 5566 "expected a VGPR index mode or a closing parenthesis" : 5567 "expected a VGPR index mode"); 5568 break; 5569 } 5570 5571 if (Imm & Mode) { 5572 Error(S, "duplicate VGPR index mode"); 5573 break; 5574 } 5575 Imm |= Mode; 5576 5577 if (trySkipToken(AsmToken::RParen)) 5578 break; 5579 if (!skipToken(AsmToken::Comma, 5580 "expected a comma or a closing parenthesis")) 5581 break; 5582 } 5583 5584 return Imm; 5585 } 5586 5587 OperandMatchResultTy 5588 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5589 5590 int64_t Imm = 0; 5591 SMLoc S = Parser.getTok().getLoc(); 5592 5593 if (getLexer().getKind() == AsmToken::Identifier && 5594 Parser.getTok().getString() == "gpr_idx" && 5595 getLexer().peekTok().is(AsmToken::LParen)) { 5596 5597 Parser.Lex(); 5598 Parser.Lex(); 5599 5600 // If parse failed, trigger an error but do not return error code 5601 // to avoid excessive error messages. 5602 Imm = parseGPRIdxMacro(); 5603 5604 } else { 5605 if (getParser().parseAbsoluteExpression(Imm)) 5606 return MatchOperand_NoMatch; 5607 if (Imm < 0 || !isUInt<4>(Imm)) { 5608 Error(S, "invalid immediate: only 4-bit values are legal"); 5609 } 5610 } 5611 5612 Operands.push_back( 5613 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5614 return MatchOperand_Success; 5615 } 5616 5617 bool AMDGPUOperand::isGPRIdxMode() const { 5618 return isImmTy(ImmTyGprIdxMode); 5619 } 5620 5621 //===----------------------------------------------------------------------===// 5622 // sopp branch targets 5623 //===----------------------------------------------------------------------===// 5624 5625 OperandMatchResultTy 5626 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5627 5628 // Make sure we are not parsing something 5629 // that looks like a label or an expression but is not. 5630 // This will improve error messages. 5631 if (isRegister() || isModifier()) 5632 return MatchOperand_NoMatch; 5633 5634 if (parseExpr(Operands)) { 5635 5636 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5637 assert(Opr.isImm() || Opr.isExpr()); 5638 SMLoc Loc = Opr.getStartLoc(); 5639 5640 // Currently we do not support arbitrary expressions as branch targets. 5641 // Only labels and absolute expressions are accepted. 5642 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5643 Error(Loc, "expected an absolute expression or a label"); 5644 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5645 Error(Loc, "expected a 16-bit signed jump offset"); 5646 } 5647 } 5648 5649 return MatchOperand_Success; // avoid excessive error messages 5650 } 5651 5652 //===----------------------------------------------------------------------===// 5653 // Boolean holding registers 5654 //===----------------------------------------------------------------------===// 5655 5656 OperandMatchResultTy 5657 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5658 return parseReg(Operands); 5659 } 5660 5661 //===----------------------------------------------------------------------===// 5662 // mubuf 5663 //===----------------------------------------------------------------------===// 5664 5665 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5666 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5667 } 5668 5669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5670 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5671 } 5672 5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5674 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5675 } 5676 5677 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5678 const OperandVector &Operands, 5679 bool IsAtomic, 5680 bool IsAtomicReturn, 5681 bool IsLds) { 5682 bool IsLdsOpcode = IsLds; 5683 bool HasLdsModifier = false; 5684 OptionalImmIndexMap OptionalIdx; 5685 assert(IsAtomicReturn ? IsAtomic : true); 5686 unsigned FirstOperandIdx = 1; 5687 5688 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5689 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5690 5691 // Add the register arguments 5692 if (Op.isReg()) { 5693 Op.addRegOperands(Inst, 1); 5694 // Insert a tied src for atomic return dst. 5695 // This cannot be postponed as subsequent calls to 5696 // addImmOperands rely on correct number of MC operands. 5697 if (IsAtomicReturn && i == FirstOperandIdx) 5698 Op.addRegOperands(Inst, 1); 5699 continue; 5700 } 5701 5702 // Handle the case where soffset is an immediate 5703 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5704 Op.addImmOperands(Inst, 1); 5705 continue; 5706 } 5707 5708 HasLdsModifier |= Op.isLDS(); 5709 5710 // Handle tokens like 'offen' which are sometimes hard-coded into the 5711 // asm string. There are no MCInst operands for these. 5712 if (Op.isToken()) { 5713 continue; 5714 } 5715 assert(Op.isImm()); 5716 5717 // Handle optional arguments 5718 OptionalIdx[Op.getImmTy()] = i; 5719 } 5720 5721 // This is a workaround for an llvm quirk which may result in an 5722 // incorrect instruction selection. Lds and non-lds versions of 5723 // MUBUF instructions are identical except that lds versions 5724 // have mandatory 'lds' modifier. However this modifier follows 5725 // optional modifiers and llvm asm matcher regards this 'lds' 5726 // modifier as an optional one. As a result, an lds version 5727 // of opcode may be selected even if it has no 'lds' modifier. 5728 if (IsLdsOpcode && !HasLdsModifier) { 5729 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5730 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5731 Inst.setOpcode(NoLdsOpcode); 5732 IsLdsOpcode = false; 5733 } 5734 } 5735 5736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5737 if (!IsAtomic) { // glc is hard-coded. 5738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5739 } 5740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5741 5742 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5744 } 5745 5746 if (isGFX10()) 5747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5748 } 5749 5750 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5751 OptionalImmIndexMap OptionalIdx; 5752 5753 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5754 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5755 5756 // Add the register arguments 5757 if (Op.isReg()) { 5758 Op.addRegOperands(Inst, 1); 5759 continue; 5760 } 5761 5762 // Handle the case where soffset is an immediate 5763 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5764 Op.addImmOperands(Inst, 1); 5765 continue; 5766 } 5767 5768 // Handle tokens like 'offen' which are sometimes hard-coded into the 5769 // asm string. There are no MCInst operands for these. 5770 if (Op.isToken()) { 5771 continue; 5772 } 5773 assert(Op.isImm()); 5774 5775 // Handle optional arguments 5776 OptionalIdx[Op.getImmTy()] = i; 5777 } 5778 5779 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5780 AMDGPUOperand::ImmTyOffset); 5781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5785 5786 if (isGFX10()) 5787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5788 } 5789 5790 //===----------------------------------------------------------------------===// 5791 // mimg 5792 //===----------------------------------------------------------------------===// 5793 5794 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5795 bool IsAtomic) { 5796 unsigned I = 1; 5797 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5798 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5799 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5800 } 5801 5802 if (IsAtomic) { 5803 // Add src, same as dst 5804 assert(Desc.getNumDefs() == 1); 5805 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5806 } 5807 5808 OptionalImmIndexMap OptionalIdx; 5809 5810 for (unsigned E = Operands.size(); I != E; ++I) { 5811 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5812 5813 // Add the register arguments 5814 if (Op.isReg()) { 5815 Op.addRegOperands(Inst, 1); 5816 } else if (Op.isImmModifier()) { 5817 OptionalIdx[Op.getImmTy()] = I; 5818 } else if (!Op.isToken()) { 5819 llvm_unreachable("unexpected operand type"); 5820 } 5821 } 5822 5823 bool IsGFX10 = isGFX10(); 5824 5825 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5826 if (IsGFX10) 5827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5828 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5829 if (IsGFX10) 5830 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5836 if (!IsGFX10) 5837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5839 } 5840 5841 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5842 cvtMIMG(Inst, Operands, true); 5843 } 5844 5845 //===----------------------------------------------------------------------===// 5846 // smrd 5847 //===----------------------------------------------------------------------===// 5848 5849 bool AMDGPUOperand::isSMRDOffset8() const { 5850 return isImm() && isUInt<8>(getImm()); 5851 } 5852 5853 bool AMDGPUOperand::isSMRDOffset20() const { 5854 return isImm() && isUInt<20>(getImm()); 5855 } 5856 5857 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5858 // 32-bit literals are only supported on CI and we only want to use them 5859 // when the offset is > 8-bits. 5860 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5861 } 5862 5863 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5864 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5865 } 5866 5867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5868 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5869 } 5870 5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5872 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5873 } 5874 5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5876 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5877 } 5878 5879 //===----------------------------------------------------------------------===// 5880 // vop3 5881 //===----------------------------------------------------------------------===// 5882 5883 static bool ConvertOmodMul(int64_t &Mul) { 5884 if (Mul != 1 && Mul != 2 && Mul != 4) 5885 return false; 5886 5887 Mul >>= 1; 5888 return true; 5889 } 5890 5891 static bool ConvertOmodDiv(int64_t &Div) { 5892 if (Div == 1) { 5893 Div = 0; 5894 return true; 5895 } 5896 5897 if (Div == 2) { 5898 Div = 3; 5899 return true; 5900 } 5901 5902 return false; 5903 } 5904 5905 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5906 if (BoundCtrl == 0) { 5907 BoundCtrl = 1; 5908 return true; 5909 } 5910 5911 if (BoundCtrl == -1) { 5912 BoundCtrl = 0; 5913 return true; 5914 } 5915 5916 return false; 5917 } 5918 5919 // Note: the order in this table matches the order of operands in AsmString. 5920 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5921 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5922 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5923 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5924 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5925 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5926 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5927 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5928 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5929 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5930 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5931 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5932 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5933 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5934 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5935 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5936 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5937 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5938 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5939 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5940 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5941 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5942 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5943 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5944 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5945 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5946 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5947 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5948 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5949 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5950 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5951 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5952 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5953 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5954 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5955 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5956 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5957 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5958 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5959 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5960 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 5961 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 5962 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 5963 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 5964 }; 5965 5966 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5967 unsigned size = Operands.size(); 5968 assert(size > 0); 5969 5970 OperandMatchResultTy res = parseOptionalOpr(Operands); 5971 5972 // This is a hack to enable hardcoded mandatory operands which follow 5973 // optional operands. 5974 // 5975 // Current design assumes that all operands after the first optional operand 5976 // are also optional. However implementation of some instructions violates 5977 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5978 // 5979 // To alleviate this problem, we have to (implicitly) parse extra operands 5980 // to make sure autogenerated parser of custom operands never hit hardcoded 5981 // mandatory operands. 5982 5983 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5984 5985 // We have parsed the first optional operand. 5986 // Parse as many operands as necessary to skip all mandatory operands. 5987 5988 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5989 if (res != MatchOperand_Success || 5990 getLexer().is(AsmToken::EndOfStatement)) break; 5991 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5992 res = parseOptionalOpr(Operands); 5993 } 5994 } 5995 5996 return res; 5997 } 5998 5999 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6000 OperandMatchResultTy res; 6001 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6002 // try to parse any optional operand here 6003 if (Op.IsBit) { 6004 res = parseNamedBit(Op.Name, Operands, Op.Type); 6005 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6006 res = parseOModOperand(Operands); 6007 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6008 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6009 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6010 res = parseSDWASel(Operands, Op.Name, Op.Type); 6011 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6012 res = parseSDWADstUnused(Operands); 6013 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6014 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6015 Op.Type == AMDGPUOperand::ImmTyNegLo || 6016 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6017 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6018 Op.ConvertResult); 6019 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6020 res = parseDim(Operands); 6021 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6022 res = parseDfmtNfmt(Operands); 6023 } else { 6024 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6025 } 6026 if (res != MatchOperand_NoMatch) { 6027 return res; 6028 } 6029 } 6030 return MatchOperand_NoMatch; 6031 } 6032 6033 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6034 StringRef Name = Parser.getTok().getString(); 6035 if (Name == "mul") { 6036 return parseIntWithPrefix("mul", Operands, 6037 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6038 } 6039 6040 if (Name == "div") { 6041 return parseIntWithPrefix("div", Operands, 6042 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6043 } 6044 6045 return MatchOperand_NoMatch; 6046 } 6047 6048 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6049 cvtVOP3P(Inst, Operands); 6050 6051 int Opc = Inst.getOpcode(); 6052 6053 int SrcNum; 6054 const int Ops[] = { AMDGPU::OpName::src0, 6055 AMDGPU::OpName::src1, 6056 AMDGPU::OpName::src2 }; 6057 for (SrcNum = 0; 6058 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6059 ++SrcNum); 6060 assert(SrcNum > 0); 6061 6062 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6063 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6064 6065 if ((OpSel & (1 << SrcNum)) != 0) { 6066 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6067 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6068 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6069 } 6070 } 6071 6072 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6073 // 1. This operand is input modifiers 6074 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6075 // 2. This is not last operand 6076 && Desc.NumOperands > (OpNum + 1) 6077 // 3. Next operand is register class 6078 && Desc.OpInfo[OpNum + 1].RegClass != -1 6079 // 4. Next register is not tied to any other operand 6080 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6081 } 6082 6083 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6084 { 6085 OptionalImmIndexMap OptionalIdx; 6086 unsigned Opc = Inst.getOpcode(); 6087 6088 unsigned I = 1; 6089 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6090 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6091 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6092 } 6093 6094 for (unsigned E = Operands.size(); I != E; ++I) { 6095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6096 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6097 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6098 } else if (Op.isInterpSlot() || 6099 Op.isInterpAttr() || 6100 Op.isAttrChan()) { 6101 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6102 } else if (Op.isImmModifier()) { 6103 OptionalIdx[Op.getImmTy()] = I; 6104 } else { 6105 llvm_unreachable("unhandled operand type"); 6106 } 6107 } 6108 6109 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6111 } 6112 6113 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6115 } 6116 6117 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6119 } 6120 } 6121 6122 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6123 OptionalImmIndexMap &OptionalIdx) { 6124 unsigned Opc = Inst.getOpcode(); 6125 6126 unsigned I = 1; 6127 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6128 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6129 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6130 } 6131 6132 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6133 // This instruction has src modifiers 6134 for (unsigned E = Operands.size(); I != E; ++I) { 6135 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6136 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6137 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6138 } else if (Op.isImmModifier()) { 6139 OptionalIdx[Op.getImmTy()] = I; 6140 } else if (Op.isRegOrImm()) { 6141 Op.addRegOrImmOperands(Inst, 1); 6142 } else { 6143 llvm_unreachable("unhandled operand type"); 6144 } 6145 } 6146 } else { 6147 // No src modifiers 6148 for (unsigned E = Operands.size(); I != E; ++I) { 6149 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6150 if (Op.isMod()) { 6151 OptionalIdx[Op.getImmTy()] = I; 6152 } else { 6153 Op.addRegOrImmOperands(Inst, 1); 6154 } 6155 } 6156 } 6157 6158 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6160 } 6161 6162 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6164 } 6165 6166 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6167 // it has src2 register operand that is tied to dst operand 6168 // we don't allow modifiers for this operand in assembler so src2_modifiers 6169 // should be 0. 6170 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6171 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6172 Opc == AMDGPU::V_MAC_F32_e64_vi || 6173 Opc == AMDGPU::V_MAC_F16_e64_vi || 6174 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6175 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6176 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6177 auto it = Inst.begin(); 6178 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6179 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6180 ++it; 6181 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6182 } 6183 } 6184 6185 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6186 OptionalImmIndexMap OptionalIdx; 6187 cvtVOP3(Inst, Operands, OptionalIdx); 6188 } 6189 6190 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6191 const OperandVector &Operands) { 6192 OptionalImmIndexMap OptIdx; 6193 const int Opc = Inst.getOpcode(); 6194 const MCInstrDesc &Desc = MII.get(Opc); 6195 6196 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6197 6198 cvtVOP3(Inst, Operands, OptIdx); 6199 6200 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6201 assert(!IsPacked); 6202 Inst.addOperand(Inst.getOperand(0)); 6203 } 6204 6205 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6206 // instruction, and then figure out where to actually put the modifiers 6207 6208 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6209 6210 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6211 if (OpSelHiIdx != -1) { 6212 int DefaultVal = IsPacked ? -1 : 0; 6213 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6214 DefaultVal); 6215 } 6216 6217 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6218 if (NegLoIdx != -1) { 6219 assert(IsPacked); 6220 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6221 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6222 } 6223 6224 const int Ops[] = { AMDGPU::OpName::src0, 6225 AMDGPU::OpName::src1, 6226 AMDGPU::OpName::src2 }; 6227 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6228 AMDGPU::OpName::src1_modifiers, 6229 AMDGPU::OpName::src2_modifiers }; 6230 6231 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6232 6233 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6234 unsigned OpSelHi = 0; 6235 unsigned NegLo = 0; 6236 unsigned NegHi = 0; 6237 6238 if (OpSelHiIdx != -1) { 6239 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6240 } 6241 6242 if (NegLoIdx != -1) { 6243 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6244 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6245 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6246 } 6247 6248 for (int J = 0; J < 3; ++J) { 6249 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6250 if (OpIdx == -1) 6251 break; 6252 6253 uint32_t ModVal = 0; 6254 6255 if ((OpSel & (1 << J)) != 0) 6256 ModVal |= SISrcMods::OP_SEL_0; 6257 6258 if ((OpSelHi & (1 << J)) != 0) 6259 ModVal |= SISrcMods::OP_SEL_1; 6260 6261 if ((NegLo & (1 << J)) != 0) 6262 ModVal |= SISrcMods::NEG; 6263 6264 if ((NegHi & (1 << J)) != 0) 6265 ModVal |= SISrcMods::NEG_HI; 6266 6267 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6268 6269 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6270 } 6271 } 6272 6273 //===----------------------------------------------------------------------===// 6274 // dpp 6275 //===----------------------------------------------------------------------===// 6276 6277 bool AMDGPUOperand::isDPP8() const { 6278 return isImmTy(ImmTyDPP8); 6279 } 6280 6281 bool AMDGPUOperand::isDPPCtrl() const { 6282 using namespace AMDGPU::DPP; 6283 6284 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6285 if (result) { 6286 int64_t Imm = getImm(); 6287 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6288 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6289 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6290 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6291 (Imm == DppCtrl::WAVE_SHL1) || 6292 (Imm == DppCtrl::WAVE_ROL1) || 6293 (Imm == DppCtrl::WAVE_SHR1) || 6294 (Imm == DppCtrl::WAVE_ROR1) || 6295 (Imm == DppCtrl::ROW_MIRROR) || 6296 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6297 (Imm == DppCtrl::BCAST15) || 6298 (Imm == DppCtrl::BCAST31) || 6299 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6300 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6301 } 6302 return false; 6303 } 6304 6305 //===----------------------------------------------------------------------===// 6306 // mAI 6307 //===----------------------------------------------------------------------===// 6308 6309 bool AMDGPUOperand::isBLGP() const { 6310 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6311 } 6312 6313 bool AMDGPUOperand::isCBSZ() const { 6314 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6315 } 6316 6317 bool AMDGPUOperand::isABID() const { 6318 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6319 } 6320 6321 bool AMDGPUOperand::isS16Imm() const { 6322 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6323 } 6324 6325 bool AMDGPUOperand::isU16Imm() const { 6326 return isImm() && isUInt<16>(getImm()); 6327 } 6328 6329 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6330 if (!isGFX10()) 6331 return MatchOperand_NoMatch; 6332 6333 SMLoc S = Parser.getTok().getLoc(); 6334 6335 if (getLexer().isNot(AsmToken::Identifier)) 6336 return MatchOperand_NoMatch; 6337 if (getLexer().getTok().getString() != "dim") 6338 return MatchOperand_NoMatch; 6339 6340 Parser.Lex(); 6341 if (getLexer().isNot(AsmToken::Colon)) 6342 return MatchOperand_ParseFail; 6343 6344 Parser.Lex(); 6345 6346 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6347 // integer. 6348 std::string Token; 6349 if (getLexer().is(AsmToken::Integer)) { 6350 SMLoc Loc = getLexer().getTok().getEndLoc(); 6351 Token = getLexer().getTok().getString(); 6352 Parser.Lex(); 6353 if (getLexer().getTok().getLoc() != Loc) 6354 return MatchOperand_ParseFail; 6355 } 6356 if (getLexer().isNot(AsmToken::Identifier)) 6357 return MatchOperand_ParseFail; 6358 Token += getLexer().getTok().getString(); 6359 6360 StringRef DimId = Token; 6361 if (DimId.startswith("SQ_RSRC_IMG_")) 6362 DimId = DimId.substr(12); 6363 6364 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6365 if (!DimInfo) 6366 return MatchOperand_ParseFail; 6367 6368 Parser.Lex(); 6369 6370 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6371 AMDGPUOperand::ImmTyDim)); 6372 return MatchOperand_Success; 6373 } 6374 6375 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6376 SMLoc S = Parser.getTok().getLoc(); 6377 StringRef Prefix; 6378 6379 if (getLexer().getKind() == AsmToken::Identifier) { 6380 Prefix = Parser.getTok().getString(); 6381 } else { 6382 return MatchOperand_NoMatch; 6383 } 6384 6385 if (Prefix != "dpp8") 6386 return parseDPPCtrl(Operands); 6387 if (!isGFX10()) 6388 return MatchOperand_NoMatch; 6389 6390 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6391 6392 int64_t Sels[8]; 6393 6394 Parser.Lex(); 6395 if (getLexer().isNot(AsmToken::Colon)) 6396 return MatchOperand_ParseFail; 6397 6398 Parser.Lex(); 6399 if (getLexer().isNot(AsmToken::LBrac)) 6400 return MatchOperand_ParseFail; 6401 6402 Parser.Lex(); 6403 if (getParser().parseAbsoluteExpression(Sels[0])) 6404 return MatchOperand_ParseFail; 6405 if (0 > Sels[0] || 7 < Sels[0]) 6406 return MatchOperand_ParseFail; 6407 6408 for (size_t i = 1; i < 8; ++i) { 6409 if (getLexer().isNot(AsmToken::Comma)) 6410 return MatchOperand_ParseFail; 6411 6412 Parser.Lex(); 6413 if (getParser().parseAbsoluteExpression(Sels[i])) 6414 return MatchOperand_ParseFail; 6415 if (0 > Sels[i] || 7 < Sels[i]) 6416 return MatchOperand_ParseFail; 6417 } 6418 6419 if (getLexer().isNot(AsmToken::RBrac)) 6420 return MatchOperand_ParseFail; 6421 Parser.Lex(); 6422 6423 unsigned DPP8 = 0; 6424 for (size_t i = 0; i < 8; ++i) 6425 DPP8 |= (Sels[i] << (i * 3)); 6426 6427 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6428 return MatchOperand_Success; 6429 } 6430 6431 OperandMatchResultTy 6432 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6433 using namespace AMDGPU::DPP; 6434 6435 SMLoc S = Parser.getTok().getLoc(); 6436 StringRef Prefix; 6437 int64_t Int; 6438 6439 if (getLexer().getKind() == AsmToken::Identifier) { 6440 Prefix = Parser.getTok().getString(); 6441 } else { 6442 return MatchOperand_NoMatch; 6443 } 6444 6445 if (Prefix == "row_mirror") { 6446 Int = DppCtrl::ROW_MIRROR; 6447 Parser.Lex(); 6448 } else if (Prefix == "row_half_mirror") { 6449 Int = DppCtrl::ROW_HALF_MIRROR; 6450 Parser.Lex(); 6451 } else { 6452 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6453 if (Prefix != "quad_perm" 6454 && Prefix != "row_shl" 6455 && Prefix != "row_shr" 6456 && Prefix != "row_ror" 6457 && Prefix != "wave_shl" 6458 && Prefix != "wave_rol" 6459 && Prefix != "wave_shr" 6460 && Prefix != "wave_ror" 6461 && Prefix != "row_bcast" 6462 && Prefix != "row_share" 6463 && Prefix != "row_xmask") { 6464 return MatchOperand_NoMatch; 6465 } 6466 6467 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6468 return MatchOperand_NoMatch; 6469 6470 if (!isVI() && !isGFX9() && 6471 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6472 Prefix == "wave_rol" || Prefix == "wave_ror" || 6473 Prefix == "row_bcast")) 6474 return MatchOperand_NoMatch; 6475 6476 Parser.Lex(); 6477 if (getLexer().isNot(AsmToken::Colon)) 6478 return MatchOperand_ParseFail; 6479 6480 if (Prefix == "quad_perm") { 6481 // quad_perm:[%d,%d,%d,%d] 6482 Parser.Lex(); 6483 if (getLexer().isNot(AsmToken::LBrac)) 6484 return MatchOperand_ParseFail; 6485 Parser.Lex(); 6486 6487 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6488 return MatchOperand_ParseFail; 6489 6490 for (int i = 0; i < 3; ++i) { 6491 if (getLexer().isNot(AsmToken::Comma)) 6492 return MatchOperand_ParseFail; 6493 Parser.Lex(); 6494 6495 int64_t Temp; 6496 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6497 return MatchOperand_ParseFail; 6498 const int shift = i*2 + 2; 6499 Int += (Temp << shift); 6500 } 6501 6502 if (getLexer().isNot(AsmToken::RBrac)) 6503 return MatchOperand_ParseFail; 6504 Parser.Lex(); 6505 } else { 6506 // sel:%d 6507 Parser.Lex(); 6508 if (getParser().parseAbsoluteExpression(Int)) 6509 return MatchOperand_ParseFail; 6510 6511 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6512 Int |= DppCtrl::ROW_SHL0; 6513 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6514 Int |= DppCtrl::ROW_SHR0; 6515 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6516 Int |= DppCtrl::ROW_ROR0; 6517 } else if (Prefix == "wave_shl" && 1 == Int) { 6518 Int = DppCtrl::WAVE_SHL1; 6519 } else if (Prefix == "wave_rol" && 1 == Int) { 6520 Int = DppCtrl::WAVE_ROL1; 6521 } else if (Prefix == "wave_shr" && 1 == Int) { 6522 Int = DppCtrl::WAVE_SHR1; 6523 } else if (Prefix == "wave_ror" && 1 == Int) { 6524 Int = DppCtrl::WAVE_ROR1; 6525 } else if (Prefix == "row_bcast") { 6526 if (Int == 15) { 6527 Int = DppCtrl::BCAST15; 6528 } else if (Int == 31) { 6529 Int = DppCtrl::BCAST31; 6530 } else { 6531 return MatchOperand_ParseFail; 6532 } 6533 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6534 Int |= DppCtrl::ROW_SHARE_FIRST; 6535 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6536 Int |= DppCtrl::ROW_XMASK_FIRST; 6537 } else { 6538 return MatchOperand_ParseFail; 6539 } 6540 } 6541 } 6542 6543 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6544 return MatchOperand_Success; 6545 } 6546 6547 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6548 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6549 } 6550 6551 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6552 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6553 } 6554 6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6556 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6557 } 6558 6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6560 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6561 } 6562 6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6564 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6565 } 6566 6567 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6568 OptionalImmIndexMap OptionalIdx; 6569 6570 unsigned I = 1; 6571 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6572 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6573 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6574 } 6575 6576 int Fi = 0; 6577 for (unsigned E = Operands.size(); I != E; ++I) { 6578 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6579 MCOI::TIED_TO); 6580 if (TiedTo != -1) { 6581 assert((unsigned)TiedTo < Inst.getNumOperands()); 6582 // handle tied old or src2 for MAC instructions 6583 Inst.addOperand(Inst.getOperand(TiedTo)); 6584 } 6585 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6586 // Add the register arguments 6587 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6588 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6589 // Skip it. 6590 continue; 6591 } 6592 6593 if (IsDPP8) { 6594 if (Op.isDPP8()) { 6595 Op.addImmOperands(Inst, 1); 6596 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6597 Op.addRegWithFPInputModsOperands(Inst, 2); 6598 } else if (Op.isFI()) { 6599 Fi = Op.getImm(); 6600 } else if (Op.isReg()) { 6601 Op.addRegOperands(Inst, 1); 6602 } else { 6603 llvm_unreachable("Invalid operand type"); 6604 } 6605 } else { 6606 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6607 Op.addRegWithFPInputModsOperands(Inst, 2); 6608 } else if (Op.isDPPCtrl()) { 6609 Op.addImmOperands(Inst, 1); 6610 } else if (Op.isImm()) { 6611 // Handle optional arguments 6612 OptionalIdx[Op.getImmTy()] = I; 6613 } else { 6614 llvm_unreachable("Invalid operand type"); 6615 } 6616 } 6617 } 6618 6619 if (IsDPP8) { 6620 using namespace llvm::AMDGPU::DPP; 6621 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6622 } else { 6623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6625 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6626 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6628 } 6629 } 6630 } 6631 6632 //===----------------------------------------------------------------------===// 6633 // sdwa 6634 //===----------------------------------------------------------------------===// 6635 6636 OperandMatchResultTy 6637 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6638 AMDGPUOperand::ImmTy Type) { 6639 using namespace llvm::AMDGPU::SDWA; 6640 6641 SMLoc S = Parser.getTok().getLoc(); 6642 StringRef Value; 6643 OperandMatchResultTy res; 6644 6645 res = parseStringWithPrefix(Prefix, Value); 6646 if (res != MatchOperand_Success) { 6647 return res; 6648 } 6649 6650 int64_t Int; 6651 Int = StringSwitch<int64_t>(Value) 6652 .Case("BYTE_0", SdwaSel::BYTE_0) 6653 .Case("BYTE_1", SdwaSel::BYTE_1) 6654 .Case("BYTE_2", SdwaSel::BYTE_2) 6655 .Case("BYTE_3", SdwaSel::BYTE_3) 6656 .Case("WORD_0", SdwaSel::WORD_0) 6657 .Case("WORD_1", SdwaSel::WORD_1) 6658 .Case("DWORD", SdwaSel::DWORD) 6659 .Default(0xffffffff); 6660 Parser.Lex(); // eat last token 6661 6662 if (Int == 0xffffffff) { 6663 return MatchOperand_ParseFail; 6664 } 6665 6666 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6667 return MatchOperand_Success; 6668 } 6669 6670 OperandMatchResultTy 6671 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6672 using namespace llvm::AMDGPU::SDWA; 6673 6674 SMLoc S = Parser.getTok().getLoc(); 6675 StringRef Value; 6676 OperandMatchResultTy res; 6677 6678 res = parseStringWithPrefix("dst_unused", Value); 6679 if (res != MatchOperand_Success) { 6680 return res; 6681 } 6682 6683 int64_t Int; 6684 Int = StringSwitch<int64_t>(Value) 6685 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6686 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6687 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6688 .Default(0xffffffff); 6689 Parser.Lex(); // eat last token 6690 6691 if (Int == 0xffffffff) { 6692 return MatchOperand_ParseFail; 6693 } 6694 6695 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6696 return MatchOperand_Success; 6697 } 6698 6699 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6700 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6701 } 6702 6703 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6704 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6705 } 6706 6707 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6708 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6709 } 6710 6711 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6712 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6713 } 6714 6715 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6716 uint64_t BasicInstType, bool skipVcc) { 6717 using namespace llvm::AMDGPU::SDWA; 6718 6719 OptionalImmIndexMap OptionalIdx; 6720 bool skippedVcc = false; 6721 6722 unsigned I = 1; 6723 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6724 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6725 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6726 } 6727 6728 for (unsigned E = Operands.size(); I != E; ++I) { 6729 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6730 if (skipVcc && !skippedVcc && Op.isReg() && 6731 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6732 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6733 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6734 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6735 // Skip VCC only if we didn't skip it on previous iteration. 6736 if (BasicInstType == SIInstrFlags::VOP2 && 6737 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6738 skippedVcc = true; 6739 continue; 6740 } else if (BasicInstType == SIInstrFlags::VOPC && 6741 Inst.getNumOperands() == 0) { 6742 skippedVcc = true; 6743 continue; 6744 } 6745 } 6746 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6747 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6748 } else if (Op.isImm()) { 6749 // Handle optional arguments 6750 OptionalIdx[Op.getImmTy()] = I; 6751 } else { 6752 llvm_unreachable("Invalid operand type"); 6753 } 6754 skippedVcc = false; 6755 } 6756 6757 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6758 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6759 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6760 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6761 switch (BasicInstType) { 6762 case SIInstrFlags::VOP1: 6763 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6764 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6765 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6766 } 6767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6768 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6770 break; 6771 6772 case SIInstrFlags::VOP2: 6773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6774 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6776 } 6777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6781 break; 6782 6783 case SIInstrFlags::VOPC: 6784 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6788 break; 6789 6790 default: 6791 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6792 } 6793 } 6794 6795 // special case v_mac_{f16, f32}: 6796 // it has src2 register operand that is tied to dst operand 6797 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6798 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6799 auto it = Inst.begin(); 6800 std::advance( 6801 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6802 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6803 } 6804 } 6805 6806 //===----------------------------------------------------------------------===// 6807 // mAI 6808 //===----------------------------------------------------------------------===// 6809 6810 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6811 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6812 } 6813 6814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6815 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6816 } 6817 6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6819 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6820 } 6821 6822 /// Force static initialization. 6823 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6824 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6825 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6826 } 6827 6828 #define GET_REGISTER_MATCHER 6829 #define GET_MATCHER_IMPLEMENTATION 6830 #define GET_MNEMONIC_SPELL_CHECKER 6831 #include "AMDGPUGenAsmMatcher.inc" 6832 6833 // This fuction should be defined after auto-generated include so that we have 6834 // MatchClassKind enum defined 6835 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6836 unsigned Kind) { 6837 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6838 // But MatchInstructionImpl() expects to meet token and fails to validate 6839 // operand. This method checks if we are given immediate operand but expect to 6840 // get corresponding token. 6841 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6842 switch (Kind) { 6843 case MCK_addr64: 6844 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6845 case MCK_gds: 6846 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6847 case MCK_lds: 6848 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6849 case MCK_glc: 6850 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6851 case MCK_idxen: 6852 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6853 case MCK_offen: 6854 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6855 case MCK_SSrcB32: 6856 // When operands have expression values, they will return true for isToken, 6857 // because it is not possible to distinguish between a token and an 6858 // expression at parse time. MatchInstructionImpl() will always try to 6859 // match an operand as a token, when isToken returns true, and when the 6860 // name of the expression is not a valid token, the match will fail, 6861 // so we need to handle it here. 6862 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6863 case MCK_SSrcF32: 6864 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6865 case MCK_SoppBrTarget: 6866 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6867 case MCK_VReg32OrOff: 6868 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6869 case MCK_InterpSlot: 6870 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6871 case MCK_Attr: 6872 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6873 case MCK_AttrChan: 6874 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6875 default: 6876 return Match_InvalidOperand; 6877 } 6878 } 6879 6880 //===----------------------------------------------------------------------===// 6881 // endpgm 6882 //===----------------------------------------------------------------------===// 6883 6884 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6885 SMLoc S = Parser.getTok().getLoc(); 6886 int64_t Imm = 0; 6887 6888 if (!parseExpr(Imm)) { 6889 // The operand is optional, if not present default to 0 6890 Imm = 0; 6891 } 6892 6893 if (!isUInt<16>(Imm)) { 6894 Error(S, "expected a 16-bit value"); 6895 return MatchOperand_ParseFail; 6896 } 6897 6898 Operands.push_back( 6899 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6900 return MatchOperand_Success; 6901 } 6902 6903 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6904