1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isSymbolRefExpr(); 224 } 225 226 bool isSymbolRefExpr() const { 227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 228 } 229 230 bool isImm() const override { 231 return Kind == Immediate; 232 } 233 234 bool isInlinableImm(MVT type) const; 235 bool isLiteralImm(MVT type) const; 236 237 bool isRegKind() const { 238 return Kind == Register; 239 } 240 241 bool isReg() const override { 242 return isRegKind() && !hasModifiers(); 243 } 244 245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 247 } 248 249 bool isRegOrImmWithInt16InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 251 } 252 253 bool isRegOrImmWithInt32InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 255 } 256 257 bool isRegOrImmWithInt64InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 259 } 260 261 bool isRegOrImmWithFP16InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 263 } 264 265 bool isRegOrImmWithFP32InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 267 } 268 269 bool isRegOrImmWithFP64InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 271 } 272 273 bool isVReg() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID) || 275 isRegClass(AMDGPU::VReg_64RegClassID) || 276 isRegClass(AMDGPU::VReg_96RegClassID) || 277 isRegClass(AMDGPU::VReg_128RegClassID) || 278 isRegClass(AMDGPU::VReg_160RegClassID) || 279 isRegClass(AMDGPU::VReg_256RegClassID) || 280 isRegClass(AMDGPU::VReg_512RegClassID) || 281 isRegClass(AMDGPU::VReg_1024RegClassID); 282 } 283 284 bool isVReg32() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID); 286 } 287 288 bool isVReg32OrOff() const { 289 return isOff() || isVReg32(); 290 } 291 292 bool isSDWAOperand(MVT type) const; 293 bool isSDWAFP16Operand() const; 294 bool isSDWAFP32Operand() const; 295 bool isSDWAInt16Operand() const; 296 bool isSDWAInt32Operand() const; 297 298 bool isImmTy(ImmTy ImmT) const { 299 return isImm() && Imm.Type == ImmT; 300 } 301 302 bool isImmModifier() const { 303 return isImm() && Imm.Type != ImmTyNone; 304 } 305 306 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 307 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 308 bool isDMask() const { return isImmTy(ImmTyDMask); } 309 bool isDim() const { return isImmTy(ImmTyDim); } 310 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 311 bool isDA() const { return isImmTy(ImmTyDA); } 312 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 313 bool isLWE() const { return isImmTy(ImmTyLWE); } 314 bool isOff() const { return isImmTy(ImmTyOff); } 315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 316 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 318 bool isOffen() const { return isImmTy(ImmTyOffen); } 319 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 320 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 324 325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 326 bool isGDS() const { return isImmTy(ImmTyGDS); } 327 bool isLDS() const { return isImmTy(ImmTyLDS); } 328 bool isDLC() const { return isImmTy(ImmTyDLC); } 329 bool isGLC() const { return isImmTy(ImmTyGLC); } 330 bool isSLC() const { return isImmTy(ImmTySLC); } 331 bool isTFE() const { return isImmTy(ImmTyTFE); } 332 bool isD16() const { return isImmTy(ImmTyD16); } 333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 337 bool isFI() const { return isImmTy(ImmTyDppFi); } 338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 345 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 347 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 348 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 349 bool isHigh() const { return isImmTy(ImmTyHigh); } 350 351 bool isMod() const { 352 return isClampSI() || isOModSI(); 353 } 354 355 bool isRegOrImm() const { 356 return isReg() || isImm(); 357 } 358 359 bool isRegClass(unsigned RCID) const; 360 361 bool isInlineValue() const; 362 363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 365 } 366 367 bool isSCSrcB16() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 369 } 370 371 bool isSCSrcV2B16() const { 372 return isSCSrcB16(); 373 } 374 375 bool isSCSrcB32() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 377 } 378 379 bool isSCSrcB64() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 381 } 382 383 bool isBoolReg() const; 384 385 bool isSCSrcF16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 387 } 388 389 bool isSCSrcV2F16() const { 390 return isSCSrcF16(); 391 } 392 393 bool isSCSrcF32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 395 } 396 397 bool isSCSrcF64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 399 } 400 401 bool isSSrcB32() const { 402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 403 } 404 405 bool isSSrcB16() const { 406 return isSCSrcB16() || isLiteralImm(MVT::i16); 407 } 408 409 bool isSSrcV2B16() const { 410 llvm_unreachable("cannot happen"); 411 return isSSrcB16(); 412 } 413 414 bool isSSrcB64() const { 415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 416 // See isVSrc64(). 417 return isSCSrcB64() || isLiteralImm(MVT::i64); 418 } 419 420 bool isSSrcF32() const { 421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 422 } 423 424 bool isSSrcF64() const { 425 return isSCSrcB64() || isLiteralImm(MVT::f64); 426 } 427 428 bool isSSrcF16() const { 429 return isSCSrcB16() || isLiteralImm(MVT::f16); 430 } 431 432 bool isSSrcV2F16() const { 433 llvm_unreachable("cannot happen"); 434 return isSSrcF16(); 435 } 436 437 bool isSSrcOrLdsB32() const { 438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 439 isLiteralImm(MVT::i32) || isExpr(); 440 } 441 442 bool isVCSrcB32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 444 } 445 446 bool isVCSrcB64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 448 } 449 450 bool isVCSrcB16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 452 } 453 454 bool isVCSrcV2B16() const { 455 return isVCSrcB16(); 456 } 457 458 bool isVCSrcF32() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 460 } 461 462 bool isVCSrcF64() const { 463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 464 } 465 466 bool isVCSrcF16() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 468 } 469 470 bool isVCSrcV2F16() const { 471 return isVCSrcF16(); 472 } 473 474 bool isVSrcB32() const { 475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 476 } 477 478 bool isVSrcB64() const { 479 return isVCSrcF64() || isLiteralImm(MVT::i64); 480 } 481 482 bool isVSrcB16() const { 483 return isVCSrcF16() || isLiteralImm(MVT::i16); 484 } 485 486 bool isVSrcV2B16() const { 487 return isVSrcB16() || isLiteralImm(MVT::v2i16); 488 } 489 490 bool isVSrcF32() const { 491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 492 } 493 494 bool isVSrcF64() const { 495 return isVCSrcF64() || isLiteralImm(MVT::f64); 496 } 497 498 bool isVSrcF16() const { 499 return isVCSrcF16() || isLiteralImm(MVT::f16); 500 } 501 502 bool isVSrcV2F16() const { 503 return isVSrcF16() || isLiteralImm(MVT::v2f16); 504 } 505 506 bool isVISrcB32() const { 507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 508 } 509 510 bool isVISrcB16() const { 511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 512 } 513 514 bool isVISrcV2B16() const { 515 return isVISrcB16(); 516 } 517 518 bool isVISrcF32() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 520 } 521 522 bool isVISrcF16() const { 523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 524 } 525 526 bool isVISrcV2F16() const { 527 return isVISrcF16() || isVISrcB32(); 528 } 529 530 bool isAISrcB32() const { 531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 532 } 533 534 bool isAISrcB16() const { 535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 536 } 537 538 bool isAISrcV2B16() const { 539 return isAISrcB16(); 540 } 541 542 bool isAISrcF32() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 544 } 545 546 bool isAISrcF16() const { 547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 548 } 549 550 bool isAISrcV2F16() const { 551 return isAISrcF16() || isAISrcB32(); 552 } 553 554 bool isAISrc_128B32() const { 555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 556 } 557 558 bool isAISrc_128B16() const { 559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 560 } 561 562 bool isAISrc_128V2B16() const { 563 return isAISrc_128B16(); 564 } 565 566 bool isAISrc_128F32() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 568 } 569 570 bool isAISrc_128F16() const { 571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 572 } 573 574 bool isAISrc_128V2F16() const { 575 return isAISrc_128F16() || isAISrc_128B32(); 576 } 577 578 bool isAISrc_512B32() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 580 } 581 582 bool isAISrc_512B16() const { 583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 584 } 585 586 bool isAISrc_512V2B16() const { 587 return isAISrc_512B16(); 588 } 589 590 bool isAISrc_512F32() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 592 } 593 594 bool isAISrc_512F16() const { 595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 596 } 597 598 bool isAISrc_512V2F16() const { 599 return isAISrc_512F16() || isAISrc_512B32(); 600 } 601 602 bool isAISrc_1024B32() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 604 } 605 606 bool isAISrc_1024B16() const { 607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 608 } 609 610 bool isAISrc_1024V2B16() const { 611 return isAISrc_1024B16(); 612 } 613 614 bool isAISrc_1024F32() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 616 } 617 618 bool isAISrc_1024F16() const { 619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 620 } 621 622 bool isAISrc_1024V2F16() const { 623 return isAISrc_1024F16() || isAISrc_1024B32(); 624 } 625 626 bool isKImmFP32() const { 627 return isLiteralImm(MVT::f32); 628 } 629 630 bool isKImmFP16() const { 631 return isLiteralImm(MVT::f16); 632 } 633 634 bool isMem() const override { 635 return false; 636 } 637 638 bool isExpr() const { 639 return Kind == Expression; 640 } 641 642 bool isSoppBrTarget() const { 643 return isExpr() || isImm(); 644 } 645 646 bool isSWaitCnt() const; 647 bool isHwreg() const; 648 bool isSendMsg() const; 649 bool isSwizzle() const; 650 bool isSMRDOffset8() const; 651 bool isSMRDOffset20() const; 652 bool isSMRDLiteralOffset() const; 653 bool isDPP8() const; 654 bool isDPPCtrl() const; 655 bool isBLGP() const; 656 bool isCBSZ() const; 657 bool isABID() const; 658 bool isGPRIdxMode() const; 659 bool isS16Imm() const; 660 bool isU16Imm() const; 661 bool isEndpgm() const; 662 663 StringRef getExpressionAsToken() const { 664 assert(isExpr()); 665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 666 return S->getSymbol().getName(); 667 } 668 669 StringRef getToken() const { 670 assert(isToken()); 671 672 if (Kind == Expression) 673 return getExpressionAsToken(); 674 675 return StringRef(Tok.Data, Tok.Length); 676 } 677 678 int64_t getImm() const { 679 assert(isImm()); 680 return Imm.Val; 681 } 682 683 ImmTy getImmTy() const { 684 assert(isImm()); 685 return Imm.Type; 686 } 687 688 unsigned getReg() const override { 689 assert(isRegKind()); 690 return Reg.RegNo; 691 } 692 693 SMLoc getStartLoc() const override { 694 return StartLoc; 695 } 696 697 SMLoc getEndLoc() const override { 698 return EndLoc; 699 } 700 701 SMRange getLocRange() const { 702 return SMRange(StartLoc, EndLoc); 703 } 704 705 Modifiers getModifiers() const { 706 assert(isRegKind() || isImmTy(ImmTyNone)); 707 return isRegKind() ? Reg.Mods : Imm.Mods; 708 } 709 710 void setModifiers(Modifiers Mods) { 711 assert(isRegKind() || isImmTy(ImmTyNone)); 712 if (isRegKind()) 713 Reg.Mods = Mods; 714 else 715 Imm.Mods = Mods; 716 } 717 718 bool hasModifiers() const { 719 return getModifiers().hasModifiers(); 720 } 721 722 bool hasFPModifiers() const { 723 return getModifiers().hasFPModifiers(); 724 } 725 726 bool hasIntModifiers() const { 727 return getModifiers().hasIntModifiers(); 728 } 729 730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 731 732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 733 734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 735 736 template <unsigned Bitwidth> 737 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 738 739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 740 addKImmFPOperands<16>(Inst, N); 741 } 742 743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 744 addKImmFPOperands<32>(Inst, N); 745 } 746 747 void addRegOperands(MCInst &Inst, unsigned N) const; 748 749 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 750 addRegOperands(Inst, N); 751 } 752 753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 754 if (isRegKind()) 755 addRegOperands(Inst, N); 756 else if (isExpr()) 757 Inst.addOperand(MCOperand::createExpr(Expr)); 758 else 759 addImmOperands(Inst, N); 760 } 761 762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 763 Modifiers Mods = getModifiers(); 764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 765 if (isRegKind()) { 766 addRegOperands(Inst, N); 767 } else { 768 addImmOperands(Inst, N, false); 769 } 770 } 771 772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 773 assert(!hasIntModifiers()); 774 addRegOrImmWithInputModsOperands(Inst, N); 775 } 776 777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 778 assert(!hasFPModifiers()); 779 addRegOrImmWithInputModsOperands(Inst, N); 780 } 781 782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 assert(isRegKind()); 786 addRegOperands(Inst, N); 787 } 788 789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 790 assert(!hasIntModifiers()); 791 addRegWithInputModsOperands(Inst, N); 792 } 793 794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 795 assert(!hasFPModifiers()); 796 addRegWithInputModsOperands(Inst, N); 797 } 798 799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 800 if (isImm()) 801 addImmOperands(Inst, N); 802 else { 803 assert(isExpr()); 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 } 806 } 807 808 static void printImmTy(raw_ostream& OS, ImmTy Type) { 809 switch (Type) { 810 case ImmTyNone: OS << "None"; break; 811 case ImmTyGDS: OS << "GDS"; break; 812 case ImmTyLDS: OS << "LDS"; break; 813 case ImmTyOffen: OS << "Offen"; break; 814 case ImmTyIdxen: OS << "Idxen"; break; 815 case ImmTyAddr64: OS << "Addr64"; break; 816 case ImmTyOffset: OS << "Offset"; break; 817 case ImmTyInstOffset: OS << "InstOffset"; break; 818 case ImmTyOffset0: OS << "Offset0"; break; 819 case ImmTyOffset1: OS << "Offset1"; break; 820 case ImmTyDLC: OS << "DLC"; break; 821 case ImmTyGLC: OS << "GLC"; break; 822 case ImmTySLC: OS << "SLC"; break; 823 case ImmTyTFE: OS << "TFE"; break; 824 case ImmTyD16: OS << "D16"; break; 825 case ImmTyFORMAT: OS << "FORMAT"; break; 826 case ImmTyClampSI: OS << "ClampSI"; break; 827 case ImmTyOModSI: OS << "OModSI"; break; 828 case ImmTyDPP8: OS << "DPP8"; break; 829 case ImmTyDppCtrl: OS << "DppCtrl"; break; 830 case ImmTyDppRowMask: OS << "DppRowMask"; break; 831 case ImmTyDppBankMask: OS << "DppBankMask"; break; 832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 833 case ImmTyDppFi: OS << "FI"; break; 834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 838 case ImmTyDMask: OS << "DMask"; break; 839 case ImmTyDim: OS << "Dim"; break; 840 case ImmTyUNorm: OS << "UNorm"; break; 841 case ImmTyDA: OS << "DA"; break; 842 case ImmTyR128A16: OS << "R128A16"; break; 843 case ImmTyLWE: OS << "LWE"; break; 844 case ImmTyOff: OS << "Off"; break; 845 case ImmTyExpTgt: OS << "ExpTgt"; break; 846 case ImmTyExpCompr: OS << "ExpCompr"; break; 847 case ImmTyExpVM: OS << "ExpVM"; break; 848 case ImmTyHwreg: OS << "Hwreg"; break; 849 case ImmTySendMsg: OS << "SendMsg"; break; 850 case ImmTyInterpSlot: OS << "InterpSlot"; break; 851 case ImmTyInterpAttr: OS << "InterpAttr"; break; 852 case ImmTyAttrChan: OS << "AttrChan"; break; 853 case ImmTyOpSel: OS << "OpSel"; break; 854 case ImmTyOpSelHi: OS << "OpSelHi"; break; 855 case ImmTyNegLo: OS << "NegLo"; break; 856 case ImmTyNegHi: OS << "NegHi"; break; 857 case ImmTySwizzle: OS << "Swizzle"; break; 858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 859 case ImmTyHigh: OS << "High"; break; 860 case ImmTyBLGP: OS << "BLGP"; break; 861 case ImmTyCBSZ: OS << "CBSZ"; break; 862 case ImmTyABID: OS << "ABID"; break; 863 case ImmTyEndpgm: OS << "Endpgm"; break; 864 } 865 } 866 867 void print(raw_ostream &OS) const override { 868 switch (Kind) { 869 case Register: 870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 871 break; 872 case Immediate: 873 OS << '<' << getImm(); 874 if (getImmTy() != ImmTyNone) { 875 OS << " type: "; printImmTy(OS, getImmTy()); 876 } 877 OS << " mods: " << Imm.Mods << '>'; 878 break; 879 case Token: 880 OS << '\'' << getToken() << '\''; 881 break; 882 case Expression: 883 OS << "<expr " << *Expr << '>'; 884 break; 885 } 886 } 887 888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 889 int64_t Val, SMLoc Loc, 890 ImmTy Type = ImmTyNone, 891 bool IsFPImm = false) { 892 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 893 Op->Imm.Val = Val; 894 Op->Imm.IsFPImm = IsFPImm; 895 Op->Imm.Type = Type; 896 Op->Imm.Mods = Modifiers(); 897 Op->StartLoc = Loc; 898 Op->EndLoc = Loc; 899 return Op; 900 } 901 902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 903 StringRef Str, SMLoc Loc, 904 bool HasExplicitEncodingSize = true) { 905 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 906 Res->Tok.Data = Str.data(); 907 Res->Tok.Length = Str.size(); 908 Res->StartLoc = Loc; 909 Res->EndLoc = Loc; 910 return Res; 911 } 912 913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 914 unsigned RegNo, SMLoc S, 915 SMLoc E) { 916 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 917 Op->Reg.RegNo = RegNo; 918 Op->Reg.Mods = Modifiers(); 919 Op->StartLoc = S; 920 Op->EndLoc = E; 921 return Op; 922 } 923 924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 925 const class MCExpr *Expr, SMLoc S) { 926 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 927 Op->Expr = Expr; 928 Op->StartLoc = S; 929 Op->EndLoc = S; 930 return Op; 931 } 932 }; 933 934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 936 return OS; 937 } 938 939 //===----------------------------------------------------------------------===// 940 // AsmParser 941 //===----------------------------------------------------------------------===// 942 943 // Holds info related to the current kernel, e.g. count of SGPRs used. 944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 945 // .amdgpu_hsa_kernel or at EOF. 946 class KernelScopeInfo { 947 int SgprIndexUnusedMin = -1; 948 int VgprIndexUnusedMin = -1; 949 MCContext *Ctx = nullptr; 950 951 void usesSgprAt(int i) { 952 if (i >= SgprIndexUnusedMin) { 953 SgprIndexUnusedMin = ++i; 954 if (Ctx) { 955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 957 } 958 } 959 } 960 961 void usesVgprAt(int i) { 962 if (i >= VgprIndexUnusedMin) { 963 VgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 public: 972 KernelScopeInfo() = default; 973 974 void initialize(MCContext &Context) { 975 Ctx = &Context; 976 usesSgprAt(SgprIndexUnusedMin = -1); 977 usesVgprAt(VgprIndexUnusedMin = -1); 978 } 979 980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 981 switch (RegKind) { 982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 983 case IS_AGPR: // fall through 984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 985 default: break; 986 } 987 } 988 }; 989 990 class AMDGPUAsmParser : public MCTargetAsmParser { 991 MCAsmParser &Parser; 992 993 // Number of extra operands parsed after the first optional operand. 994 // This may be necessary to skip hardcoded mandatory operands. 995 static const unsigned MAX_OPR_LOOKAHEAD = 8; 996 997 unsigned ForcedEncodingSize = 0; 998 bool ForcedDPP = false; 999 bool ForcedSDWA = false; 1000 KernelScopeInfo KernelScope; 1001 1002 /// @name Auto-generated Match Functions 1003 /// { 1004 1005 #define GET_ASSEMBLER_HEADER 1006 #include "AMDGPUGenAsmMatcher.inc" 1007 1008 /// } 1009 1010 private: 1011 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1012 bool OutOfRangeError(SMRange Range); 1013 /// Calculate VGPR/SGPR blocks required for given target, reserved 1014 /// registers, and user-specified NextFreeXGPR values. 1015 /// 1016 /// \param Features [in] Target features, used for bug corrections. 1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1021 /// descriptor field, if valid. 1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1026 /// \param VGPRBlocks [out] Result VGPR block count. 1027 /// \param SGPRBlocks [out] Result SGPR block count. 1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1029 bool FlatScrUsed, bool XNACKUsed, 1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1031 SMRange VGPRRange, unsigned NextFreeSGPR, 1032 SMRange SGPRRange, unsigned &VGPRBlocks, 1033 unsigned &SGPRBlocks); 1034 bool ParseDirectiveAMDGCNTarget(); 1035 bool ParseDirectiveAMDHSAKernel(); 1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1037 bool ParseDirectiveHSACodeObjectVersion(); 1038 bool ParseDirectiveHSACodeObjectISA(); 1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1040 bool ParseDirectiveAMDKernelCodeT(); 1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1042 bool ParseDirectiveAMDGPUHsaKernel(); 1043 1044 bool ParseDirectiveISAVersion(); 1045 bool ParseDirectiveHSAMetadata(); 1046 bool ParseDirectivePALMetadataBegin(); 1047 bool ParseDirectivePALMetadata(); 1048 bool ParseDirectiveAMDGPULDS(); 1049 1050 /// Common code to parse out a block of text (typically YAML) between start and 1051 /// end directives. 1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1053 const char *AssemblerDirectiveEnd, 1054 std::string &CollectString); 1055 1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1057 RegisterKind RegKind, unsigned Reg1, 1058 unsigned RegNum); 1059 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1060 unsigned& RegNum, unsigned& RegWidth, 1061 unsigned *DwordRegIndex); 1062 bool isRegister(); 1063 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1064 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1065 void initializeGprCountSymbol(RegisterKind RegKind); 1066 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1067 unsigned RegWidth); 1068 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1069 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1070 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1071 bool IsGdsHardcoded); 1072 1073 public: 1074 enum AMDGPUMatchResultTy { 1075 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1076 }; 1077 enum OperandMode { 1078 OperandMode_Default, 1079 OperandMode_NSA, 1080 }; 1081 1082 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1083 1084 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1085 const MCInstrInfo &MII, 1086 const MCTargetOptions &Options) 1087 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1088 MCAsmParserExtension::Initialize(Parser); 1089 1090 if (getFeatureBits().none()) { 1091 // Set default features. 1092 copySTI().ToggleFeature("southern-islands"); 1093 } 1094 1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1096 1097 { 1098 // TODO: make those pre-defined variables read-only. 1099 // Currently there is none suitable machinery in the core llvm-mc for this. 1100 // MCSymbol::isRedefinable is intended for another purpose, and 1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1103 MCContext &Ctx = getContext(); 1104 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1105 MCSymbol *Sym = 1106 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1110 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1111 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1112 } else { 1113 MCSymbol *Sym = 1114 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1118 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1120 } 1121 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1122 initializeGprCountSymbol(IS_VGPR); 1123 initializeGprCountSymbol(IS_SGPR); 1124 } else 1125 KernelScope.initialize(getContext()); 1126 } 1127 } 1128 1129 bool hasXNACK() const { 1130 return AMDGPU::hasXNACK(getSTI()); 1131 } 1132 1133 bool hasMIMG_R128() const { 1134 return AMDGPU::hasMIMG_R128(getSTI()); 1135 } 1136 1137 bool hasPackedD16() const { 1138 return AMDGPU::hasPackedD16(getSTI()); 1139 } 1140 1141 bool isSI() const { 1142 return AMDGPU::isSI(getSTI()); 1143 } 1144 1145 bool isCI() const { 1146 return AMDGPU::isCI(getSTI()); 1147 } 1148 1149 bool isVI() const { 1150 return AMDGPU::isVI(getSTI()); 1151 } 1152 1153 bool isGFX9() const { 1154 return AMDGPU::isGFX9(getSTI()); 1155 } 1156 1157 bool isGFX10() const { 1158 return AMDGPU::isGFX10(getSTI()); 1159 } 1160 1161 bool hasInv2PiInlineImm() const { 1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1163 } 1164 1165 bool hasFlatOffsets() const { 1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1167 } 1168 1169 bool hasSGPR102_SGPR103() const { 1170 return !isVI() && !isGFX9(); 1171 } 1172 1173 bool hasSGPR104_SGPR105() const { 1174 return isGFX10(); 1175 } 1176 1177 bool hasIntClamp() const { 1178 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1179 } 1180 1181 AMDGPUTargetStreamer &getTargetStreamer() { 1182 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1183 return static_cast<AMDGPUTargetStreamer &>(TS); 1184 } 1185 1186 const MCRegisterInfo *getMRI() const { 1187 // We need this const_cast because for some reason getContext() is not const 1188 // in MCAsmParser. 1189 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1190 } 1191 1192 const MCInstrInfo *getMII() const { 1193 return &MII; 1194 } 1195 1196 const FeatureBitset &getFeatureBits() const { 1197 return getSTI().getFeatureBits(); 1198 } 1199 1200 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1201 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1202 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1203 1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1205 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1206 bool isForcedDPP() const { return ForcedDPP; } 1207 bool isForcedSDWA() const { return ForcedSDWA; } 1208 ArrayRef<unsigned> getMatchedVariants() const; 1209 1210 std::unique_ptr<AMDGPUOperand> parseRegister(); 1211 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1212 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1213 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1214 unsigned Kind) override; 1215 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1216 OperandVector &Operands, MCStreamer &Out, 1217 uint64_t &ErrorInfo, 1218 bool MatchingInlineAsm) override; 1219 bool ParseDirective(AsmToken DirectiveID) override; 1220 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1221 OperandMode Mode = OperandMode_Default); 1222 StringRef parseMnemonicSuffix(StringRef Name); 1223 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1224 SMLoc NameLoc, OperandVector &Operands) override; 1225 //bool ProcessInstruction(MCInst &Inst); 1226 1227 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1228 1229 OperandMatchResultTy 1230 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1231 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1232 bool (*ConvertResult)(int64_t &) = nullptr); 1233 1234 OperandMatchResultTy 1235 parseOperandArrayWithPrefix(const char *Prefix, 1236 OperandVector &Operands, 1237 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1238 bool (*ConvertResult)(int64_t&) = nullptr); 1239 1240 OperandMatchResultTy 1241 parseNamedBit(const char *Name, OperandVector &Operands, 1242 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1243 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1244 StringRef &Value); 1245 1246 bool isModifier(); 1247 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1248 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1249 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1250 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1251 bool parseSP3NegModifier(); 1252 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1253 OperandMatchResultTy parseReg(OperandVector &Operands); 1254 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1255 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1256 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1257 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1258 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1259 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1260 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1261 1262 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1263 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1264 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1265 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1266 1267 bool parseCnt(int64_t &IntVal); 1268 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1269 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1270 1271 private: 1272 struct OperandInfoTy { 1273 int64_t Id; 1274 bool IsSymbolic = false; 1275 bool IsDefined = false; 1276 1277 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1278 }; 1279 1280 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1281 bool validateSendMsg(const OperandInfoTy &Msg, 1282 const OperandInfoTy &Op, 1283 const OperandInfoTy &Stream, 1284 const SMLoc Loc); 1285 1286 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1287 bool validateHwreg(const OperandInfoTy &HwReg, 1288 const int64_t Offset, 1289 const int64_t Width, 1290 const SMLoc Loc); 1291 1292 void errorExpTgt(); 1293 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1294 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1295 1296 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1297 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1298 bool validateSOPLiteral(const MCInst &Inst) const; 1299 bool validateConstantBusLimitations(const MCInst &Inst); 1300 bool validateEarlyClobberLimitations(const MCInst &Inst); 1301 bool validateIntClampSupported(const MCInst &Inst); 1302 bool validateMIMGAtomicDMask(const MCInst &Inst); 1303 bool validateMIMGGatherDMask(const MCInst &Inst); 1304 bool validateMIMGDataSize(const MCInst &Inst); 1305 bool validateMIMGAddrSize(const MCInst &Inst); 1306 bool validateMIMGD16(const MCInst &Inst); 1307 bool validateMIMGDim(const MCInst &Inst); 1308 bool validateLdsDirect(const MCInst &Inst); 1309 bool validateOpSel(const MCInst &Inst); 1310 bool validateVccOperand(unsigned Reg) const; 1311 bool validateVOP3Literal(const MCInst &Inst) const; 1312 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1313 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1314 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1315 1316 bool isId(const StringRef Id) const; 1317 bool isId(const AsmToken &Token, const StringRef Id) const; 1318 bool isToken(const AsmToken::TokenKind Kind) const; 1319 bool trySkipId(const StringRef Id); 1320 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1321 bool trySkipToken(const AsmToken::TokenKind Kind); 1322 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1323 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1324 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1325 AsmToken::TokenKind getTokenKind() const; 1326 bool parseExpr(int64_t &Imm); 1327 bool parseExpr(OperandVector &Operands); 1328 StringRef getTokenStr() const; 1329 AsmToken peekToken(); 1330 AsmToken getToken() const; 1331 SMLoc getLoc() const; 1332 void lex(); 1333 1334 public: 1335 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1336 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1337 1338 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1339 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1340 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1341 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1342 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1343 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1344 1345 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1346 const unsigned MinVal, 1347 const unsigned MaxVal, 1348 const StringRef ErrMsg); 1349 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1350 bool parseSwizzleOffset(int64_t &Imm); 1351 bool parseSwizzleMacro(int64_t &Imm); 1352 bool parseSwizzleQuadPerm(int64_t &Imm); 1353 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1354 bool parseSwizzleBroadcast(int64_t &Imm); 1355 bool parseSwizzleSwap(int64_t &Imm); 1356 bool parseSwizzleReverse(int64_t &Imm); 1357 1358 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1359 int64_t parseGPRIdxMacro(); 1360 1361 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1362 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1363 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1364 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1365 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1366 1367 AMDGPUOperand::Ptr defaultDLC() const; 1368 AMDGPUOperand::Ptr defaultGLC() const; 1369 AMDGPUOperand::Ptr defaultSLC() const; 1370 1371 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1372 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1373 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1374 AMDGPUOperand::Ptr defaultFlatOffset() const; 1375 1376 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1377 1378 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1379 OptionalImmIndexMap &OptionalIdx); 1380 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1381 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1382 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1383 1384 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1385 1386 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1387 bool IsAtomic = false); 1388 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1389 1390 OperandMatchResultTy parseDim(OperandVector &Operands); 1391 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1392 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1393 AMDGPUOperand::Ptr defaultRowMask() const; 1394 AMDGPUOperand::Ptr defaultBankMask() const; 1395 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1396 AMDGPUOperand::Ptr defaultFI() const; 1397 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1398 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1399 1400 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1401 AMDGPUOperand::ImmTy Type); 1402 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1403 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1404 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1405 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1406 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1407 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1408 uint64_t BasicInstType, bool skipVcc = false); 1409 1410 AMDGPUOperand::Ptr defaultBLGP() const; 1411 AMDGPUOperand::Ptr defaultCBSZ() const; 1412 AMDGPUOperand::Ptr defaultABID() const; 1413 1414 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1415 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1416 }; 1417 1418 struct OptionalOperand { 1419 const char *Name; 1420 AMDGPUOperand::ImmTy Type; 1421 bool IsBit; 1422 bool (*ConvertResult)(int64_t&); 1423 }; 1424 1425 } // end anonymous namespace 1426 1427 // May be called with integer type with equivalent bitwidth. 1428 static const fltSemantics *getFltSemantics(unsigned Size) { 1429 switch (Size) { 1430 case 4: 1431 return &APFloat::IEEEsingle(); 1432 case 8: 1433 return &APFloat::IEEEdouble(); 1434 case 2: 1435 return &APFloat::IEEEhalf(); 1436 default: 1437 llvm_unreachable("unsupported fp type"); 1438 } 1439 } 1440 1441 static const fltSemantics *getFltSemantics(MVT VT) { 1442 return getFltSemantics(VT.getSizeInBits() / 8); 1443 } 1444 1445 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1446 switch (OperandType) { 1447 case AMDGPU::OPERAND_REG_IMM_INT32: 1448 case AMDGPU::OPERAND_REG_IMM_FP32: 1449 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1450 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1451 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1452 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1453 return &APFloat::IEEEsingle(); 1454 case AMDGPU::OPERAND_REG_IMM_INT64: 1455 case AMDGPU::OPERAND_REG_IMM_FP64: 1456 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1457 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1458 return &APFloat::IEEEdouble(); 1459 case AMDGPU::OPERAND_REG_IMM_INT16: 1460 case AMDGPU::OPERAND_REG_IMM_FP16: 1461 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1462 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1463 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1464 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1465 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1466 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1467 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1469 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1470 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1471 return &APFloat::IEEEhalf(); 1472 default: 1473 llvm_unreachable("unsupported fp type"); 1474 } 1475 } 1476 1477 //===----------------------------------------------------------------------===// 1478 // Operand 1479 //===----------------------------------------------------------------------===// 1480 1481 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1482 bool Lost; 1483 1484 // Convert literal to single precision 1485 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1486 APFloat::rmNearestTiesToEven, 1487 &Lost); 1488 // We allow precision lost but not overflow or underflow 1489 if (Status != APFloat::opOK && 1490 Lost && 1491 ((Status & APFloat::opOverflow) != 0 || 1492 (Status & APFloat::opUnderflow) != 0)) { 1493 return false; 1494 } 1495 1496 return true; 1497 } 1498 1499 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1500 return isUIntN(Size, Val) || isIntN(Size, Val); 1501 } 1502 1503 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1504 1505 // This is a hack to enable named inline values like 1506 // shared_base with both 32-bit and 64-bit operands. 1507 // Note that these values are defined as 1508 // 32-bit operands only. 1509 if (isInlineValue()) { 1510 return true; 1511 } 1512 1513 if (!isImmTy(ImmTyNone)) { 1514 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1515 return false; 1516 } 1517 // TODO: We should avoid using host float here. It would be better to 1518 // check the float bit values which is what a few other places do. 1519 // We've had bot failures before due to weird NaN support on mips hosts. 1520 1521 APInt Literal(64, Imm.Val); 1522 1523 if (Imm.IsFPImm) { // We got fp literal token 1524 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1525 return AMDGPU::isInlinableLiteral64(Imm.Val, 1526 AsmParser->hasInv2PiInlineImm()); 1527 } 1528 1529 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1530 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1531 return false; 1532 1533 if (type.getScalarSizeInBits() == 16) { 1534 return AMDGPU::isInlinableLiteral16( 1535 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1536 AsmParser->hasInv2PiInlineImm()); 1537 } 1538 1539 // Check if single precision literal is inlinable 1540 return AMDGPU::isInlinableLiteral32( 1541 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1542 AsmParser->hasInv2PiInlineImm()); 1543 } 1544 1545 // We got int literal token. 1546 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1547 return AMDGPU::isInlinableLiteral64(Imm.Val, 1548 AsmParser->hasInv2PiInlineImm()); 1549 } 1550 1551 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1552 return false; 1553 } 1554 1555 if (type.getScalarSizeInBits() == 16) { 1556 return AMDGPU::isInlinableLiteral16( 1557 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1558 AsmParser->hasInv2PiInlineImm()); 1559 } 1560 1561 return AMDGPU::isInlinableLiteral32( 1562 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1563 AsmParser->hasInv2PiInlineImm()); 1564 } 1565 1566 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1567 // Check that this immediate can be added as literal 1568 if (!isImmTy(ImmTyNone)) { 1569 return false; 1570 } 1571 1572 if (!Imm.IsFPImm) { 1573 // We got int literal token. 1574 1575 if (type == MVT::f64 && hasFPModifiers()) { 1576 // Cannot apply fp modifiers to int literals preserving the same semantics 1577 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1578 // disable these cases. 1579 return false; 1580 } 1581 1582 unsigned Size = type.getSizeInBits(); 1583 if (Size == 64) 1584 Size = 32; 1585 1586 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1587 // types. 1588 return isSafeTruncation(Imm.Val, Size); 1589 } 1590 1591 // We got fp literal token 1592 if (type == MVT::f64) { // Expected 64-bit fp operand 1593 // We would set low 64-bits of literal to zeroes but we accept this literals 1594 return true; 1595 } 1596 1597 if (type == MVT::i64) { // Expected 64-bit int operand 1598 // We don't allow fp literals in 64-bit integer instructions. It is 1599 // unclear how we should encode them. 1600 return false; 1601 } 1602 1603 // We allow fp literals with f16x2 operands assuming that the specified 1604 // literal goes into the lower half and the upper half is zero. We also 1605 // require that the literal may be losslesly converted to f16. 1606 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1607 (type == MVT::v2i16)? MVT::i16 : type; 1608 1609 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1610 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1611 } 1612 1613 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1614 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1615 } 1616 1617 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1618 if (AsmParser->isVI()) 1619 return isVReg32(); 1620 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1621 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1622 else 1623 return false; 1624 } 1625 1626 bool AMDGPUOperand::isSDWAFP16Operand() const { 1627 return isSDWAOperand(MVT::f16); 1628 } 1629 1630 bool AMDGPUOperand::isSDWAFP32Operand() const { 1631 return isSDWAOperand(MVT::f32); 1632 } 1633 1634 bool AMDGPUOperand::isSDWAInt16Operand() const { 1635 return isSDWAOperand(MVT::i16); 1636 } 1637 1638 bool AMDGPUOperand::isSDWAInt32Operand() const { 1639 return isSDWAOperand(MVT::i32); 1640 } 1641 1642 bool AMDGPUOperand::isBoolReg() const { 1643 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1644 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1645 } 1646 1647 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1648 { 1649 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1650 assert(Size == 2 || Size == 4 || Size == 8); 1651 1652 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1653 1654 if (Imm.Mods.Abs) { 1655 Val &= ~FpSignMask; 1656 } 1657 if (Imm.Mods.Neg) { 1658 Val ^= FpSignMask; 1659 } 1660 1661 return Val; 1662 } 1663 1664 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1665 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1666 Inst.getNumOperands())) { 1667 addLiteralImmOperand(Inst, Imm.Val, 1668 ApplyModifiers & 1669 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1670 } else { 1671 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1672 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1673 } 1674 } 1675 1676 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1677 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1678 auto OpNum = Inst.getNumOperands(); 1679 // Check that this operand accepts literals 1680 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1681 1682 if (ApplyModifiers) { 1683 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1684 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1685 Val = applyInputFPModifiers(Val, Size); 1686 } 1687 1688 APInt Literal(64, Val); 1689 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1690 1691 if (Imm.IsFPImm) { // We got fp literal token 1692 switch (OpTy) { 1693 case AMDGPU::OPERAND_REG_IMM_INT64: 1694 case AMDGPU::OPERAND_REG_IMM_FP64: 1695 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1696 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1697 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1698 AsmParser->hasInv2PiInlineImm())) { 1699 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1700 return; 1701 } 1702 1703 // Non-inlineable 1704 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1705 // For fp operands we check if low 32 bits are zeros 1706 if (Literal.getLoBits(32) != 0) { 1707 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1708 "Can't encode literal as exact 64-bit floating-point operand. " 1709 "Low 32-bits will be set to zero"); 1710 } 1711 1712 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1713 return; 1714 } 1715 1716 // We don't allow fp literals in 64-bit integer instructions. It is 1717 // unclear how we should encode them. This case should be checked earlier 1718 // in predicate methods (isLiteralImm()) 1719 llvm_unreachable("fp literal in 64-bit integer instruction."); 1720 1721 case AMDGPU::OPERAND_REG_IMM_INT32: 1722 case AMDGPU::OPERAND_REG_IMM_FP32: 1723 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1725 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1727 case AMDGPU::OPERAND_REG_IMM_INT16: 1728 case AMDGPU::OPERAND_REG_IMM_FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1737 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1738 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1739 bool lost; 1740 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1741 // Convert literal to single precision 1742 FPLiteral.convert(*getOpFltSemantics(OpTy), 1743 APFloat::rmNearestTiesToEven, &lost); 1744 // We allow precision lost but not overflow or underflow. This should be 1745 // checked earlier in isLiteralImm() 1746 1747 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1748 Inst.addOperand(MCOperand::createImm(ImmVal)); 1749 return; 1750 } 1751 default: 1752 llvm_unreachable("invalid operand size"); 1753 } 1754 1755 return; 1756 } 1757 1758 // We got int literal token. 1759 // Only sign extend inline immediates. 1760 switch (OpTy) { 1761 case AMDGPU::OPERAND_REG_IMM_INT32: 1762 case AMDGPU::OPERAND_REG_IMM_FP32: 1763 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1764 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1765 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1767 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1768 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1769 if (isSafeTruncation(Val, 32) && 1770 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1771 AsmParser->hasInv2PiInlineImm())) { 1772 Inst.addOperand(MCOperand::createImm(Val)); 1773 return; 1774 } 1775 1776 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1777 return; 1778 1779 case AMDGPU::OPERAND_REG_IMM_INT64: 1780 case AMDGPU::OPERAND_REG_IMM_FP64: 1781 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1782 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1783 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1784 Inst.addOperand(MCOperand::createImm(Val)); 1785 return; 1786 } 1787 1788 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1789 return; 1790 1791 case AMDGPU::OPERAND_REG_IMM_INT16: 1792 case AMDGPU::OPERAND_REG_IMM_FP16: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1797 if (isSafeTruncation(Val, 16) && 1798 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1799 AsmParser->hasInv2PiInlineImm())) { 1800 Inst.addOperand(MCOperand::createImm(Val)); 1801 return; 1802 } 1803 1804 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1805 return; 1806 1807 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1808 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1809 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1811 assert(isSafeTruncation(Val, 16)); 1812 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1813 AsmParser->hasInv2PiInlineImm())); 1814 1815 Inst.addOperand(MCOperand::createImm(Val)); 1816 return; 1817 } 1818 default: 1819 llvm_unreachable("invalid operand size"); 1820 } 1821 } 1822 1823 template <unsigned Bitwidth> 1824 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1825 APInt Literal(64, Imm.Val); 1826 1827 if (!Imm.IsFPImm) { 1828 // We got int literal token. 1829 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1830 return; 1831 } 1832 1833 bool Lost; 1834 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1835 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1836 APFloat::rmNearestTiesToEven, &Lost); 1837 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1838 } 1839 1840 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1841 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1842 } 1843 1844 static bool isInlineValue(unsigned Reg) { 1845 switch (Reg) { 1846 case AMDGPU::SRC_SHARED_BASE: 1847 case AMDGPU::SRC_SHARED_LIMIT: 1848 case AMDGPU::SRC_PRIVATE_BASE: 1849 case AMDGPU::SRC_PRIVATE_LIMIT: 1850 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1851 return true; 1852 case AMDGPU::SRC_VCCZ: 1853 case AMDGPU::SRC_EXECZ: 1854 case AMDGPU::SRC_SCC: 1855 return true; 1856 default: 1857 return false; 1858 } 1859 } 1860 1861 bool AMDGPUOperand::isInlineValue() const { 1862 return isRegKind() && ::isInlineValue(getReg()); 1863 } 1864 1865 //===----------------------------------------------------------------------===// 1866 // AsmParser 1867 //===----------------------------------------------------------------------===// 1868 1869 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1870 if (Is == IS_VGPR) { 1871 switch (RegWidth) { 1872 default: return -1; 1873 case 1: return AMDGPU::VGPR_32RegClassID; 1874 case 2: return AMDGPU::VReg_64RegClassID; 1875 case 3: return AMDGPU::VReg_96RegClassID; 1876 case 4: return AMDGPU::VReg_128RegClassID; 1877 case 5: return AMDGPU::VReg_160RegClassID; 1878 case 8: return AMDGPU::VReg_256RegClassID; 1879 case 16: return AMDGPU::VReg_512RegClassID; 1880 case 32: return AMDGPU::VReg_1024RegClassID; 1881 } 1882 } else if (Is == IS_TTMP) { 1883 switch (RegWidth) { 1884 default: return -1; 1885 case 1: return AMDGPU::TTMP_32RegClassID; 1886 case 2: return AMDGPU::TTMP_64RegClassID; 1887 case 4: return AMDGPU::TTMP_128RegClassID; 1888 case 8: return AMDGPU::TTMP_256RegClassID; 1889 case 16: return AMDGPU::TTMP_512RegClassID; 1890 } 1891 } else if (Is == IS_SGPR) { 1892 switch (RegWidth) { 1893 default: return -1; 1894 case 1: return AMDGPU::SGPR_32RegClassID; 1895 case 2: return AMDGPU::SGPR_64RegClassID; 1896 case 4: return AMDGPU::SGPR_128RegClassID; 1897 case 8: return AMDGPU::SGPR_256RegClassID; 1898 case 16: return AMDGPU::SGPR_512RegClassID; 1899 } 1900 } else if (Is == IS_AGPR) { 1901 switch (RegWidth) { 1902 default: return -1; 1903 case 1: return AMDGPU::AGPR_32RegClassID; 1904 case 2: return AMDGPU::AReg_64RegClassID; 1905 case 4: return AMDGPU::AReg_128RegClassID; 1906 case 16: return AMDGPU::AReg_512RegClassID; 1907 case 32: return AMDGPU::AReg_1024RegClassID; 1908 } 1909 } 1910 return -1; 1911 } 1912 1913 static unsigned getSpecialRegForName(StringRef RegName) { 1914 return StringSwitch<unsigned>(RegName) 1915 .Case("exec", AMDGPU::EXEC) 1916 .Case("vcc", AMDGPU::VCC) 1917 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1918 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1919 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1920 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1921 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1922 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1923 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1924 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1925 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1926 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1927 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1928 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1929 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1930 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1931 .Case("m0", AMDGPU::M0) 1932 .Case("vccz", AMDGPU::SRC_VCCZ) 1933 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1934 .Case("execz", AMDGPU::SRC_EXECZ) 1935 .Case("src_execz", AMDGPU::SRC_EXECZ) 1936 .Case("scc", AMDGPU::SRC_SCC) 1937 .Case("src_scc", AMDGPU::SRC_SCC) 1938 .Case("tba", AMDGPU::TBA) 1939 .Case("tma", AMDGPU::TMA) 1940 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1941 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1942 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1943 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1944 .Case("vcc_lo", AMDGPU::VCC_LO) 1945 .Case("vcc_hi", AMDGPU::VCC_HI) 1946 .Case("exec_lo", AMDGPU::EXEC_LO) 1947 .Case("exec_hi", AMDGPU::EXEC_HI) 1948 .Case("tma_lo", AMDGPU::TMA_LO) 1949 .Case("tma_hi", AMDGPU::TMA_HI) 1950 .Case("tba_lo", AMDGPU::TBA_LO) 1951 .Case("tba_hi", AMDGPU::TBA_HI) 1952 .Case("null", AMDGPU::SGPR_NULL) 1953 .Default(0); 1954 } 1955 1956 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1957 SMLoc &EndLoc) { 1958 auto R = parseRegister(); 1959 if (!R) return true; 1960 assert(R->isReg()); 1961 RegNo = R->getReg(); 1962 StartLoc = R->getStartLoc(); 1963 EndLoc = R->getEndLoc(); 1964 return false; 1965 } 1966 1967 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1968 RegisterKind RegKind, unsigned Reg1, 1969 unsigned RegNum) { 1970 switch (RegKind) { 1971 case IS_SPECIAL: 1972 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1973 Reg = AMDGPU::EXEC; 1974 RegWidth = 2; 1975 return true; 1976 } 1977 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1978 Reg = AMDGPU::FLAT_SCR; 1979 RegWidth = 2; 1980 return true; 1981 } 1982 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1983 Reg = AMDGPU::XNACK_MASK; 1984 RegWidth = 2; 1985 return true; 1986 } 1987 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1988 Reg = AMDGPU::VCC; 1989 RegWidth = 2; 1990 return true; 1991 } 1992 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1993 Reg = AMDGPU::TBA; 1994 RegWidth = 2; 1995 return true; 1996 } 1997 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1998 Reg = AMDGPU::TMA; 1999 RegWidth = 2; 2000 return true; 2001 } 2002 return false; 2003 case IS_VGPR: 2004 case IS_SGPR: 2005 case IS_AGPR: 2006 case IS_TTMP: 2007 if (Reg1 != Reg + RegWidth) { 2008 return false; 2009 } 2010 RegWidth++; 2011 return true; 2012 default: 2013 llvm_unreachable("unexpected register kind"); 2014 } 2015 } 2016 2017 static const StringRef Registers[] = { 2018 { "v" }, 2019 { "s" }, 2020 { "ttmp" }, 2021 { "acc" }, 2022 { "a" }, 2023 }; 2024 2025 bool 2026 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2027 const AsmToken &NextToken) const { 2028 2029 // A list of consecutive registers: [s0,s1,s2,s3] 2030 if (Token.is(AsmToken::LBrac)) 2031 return true; 2032 2033 if (!Token.is(AsmToken::Identifier)) 2034 return false; 2035 2036 // A single register like s0 or a range of registers like s[0:1] 2037 2038 StringRef RegName = Token.getString(); 2039 2040 for (StringRef Reg : Registers) { 2041 if (RegName.startswith(Reg)) { 2042 if (Reg.size() < RegName.size()) { 2043 unsigned RegNum; 2044 // A single register with an index: rXX 2045 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 2046 return true; 2047 } else { 2048 // A range of registers: r[XX:YY]. 2049 if (NextToken.is(AsmToken::LBrac)) 2050 return true; 2051 } 2052 } 2053 } 2054 2055 return getSpecialRegForName(RegName); 2056 } 2057 2058 bool 2059 AMDGPUAsmParser::isRegister() 2060 { 2061 return isRegister(getToken(), peekToken()); 2062 } 2063 2064 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2065 unsigned &RegNum, unsigned &RegWidth, 2066 unsigned *DwordRegIndex) { 2067 if (DwordRegIndex) { *DwordRegIndex = 0; } 2068 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2069 if (getLexer().is(AsmToken::Identifier)) { 2070 StringRef RegName = Parser.getTok().getString(); 2071 if ((Reg = getSpecialRegForName(RegName))) { 2072 Parser.Lex(); 2073 RegKind = IS_SPECIAL; 2074 } else { 2075 unsigned RegNumIndex = 0; 2076 if (RegName[0] == 'v') { 2077 RegNumIndex = 1; 2078 RegKind = IS_VGPR; 2079 } else if (RegName[0] == 's') { 2080 RegNumIndex = 1; 2081 RegKind = IS_SGPR; 2082 } else if (RegName[0] == 'a') { 2083 RegNumIndex = RegName.startswith("acc") ? 3 : 1; 2084 RegKind = IS_AGPR; 2085 } else if (RegName.startswith("ttmp")) { 2086 RegNumIndex = strlen("ttmp"); 2087 RegKind = IS_TTMP; 2088 } else { 2089 return false; 2090 } 2091 if (RegName.size() > RegNumIndex) { 2092 // Single 32-bit register: vXX. 2093 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 2094 return false; 2095 Parser.Lex(); 2096 RegWidth = 1; 2097 } else { 2098 // Range of registers: v[XX:YY]. ":YY" is optional. 2099 Parser.Lex(); 2100 int64_t RegLo, RegHi; 2101 if (getLexer().isNot(AsmToken::LBrac)) 2102 return false; 2103 Parser.Lex(); 2104 2105 if (getParser().parseAbsoluteExpression(RegLo)) 2106 return false; 2107 2108 const bool isRBrace = getLexer().is(AsmToken::RBrac); 2109 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 2110 return false; 2111 Parser.Lex(); 2112 2113 if (isRBrace) { 2114 RegHi = RegLo; 2115 } else { 2116 if (getParser().parseAbsoluteExpression(RegHi)) 2117 return false; 2118 2119 if (getLexer().isNot(AsmToken::RBrac)) 2120 return false; 2121 Parser.Lex(); 2122 } 2123 RegNum = (unsigned) RegLo; 2124 RegWidth = (RegHi - RegLo) + 1; 2125 } 2126 } 2127 } else if (getLexer().is(AsmToken::LBrac)) { 2128 // List of consecutive registers: [s0,s1,s2,s3] 2129 Parser.Lex(); 2130 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 2131 return false; 2132 if (RegWidth != 1) 2133 return false; 2134 RegisterKind RegKind1; 2135 unsigned Reg1, RegNum1, RegWidth1; 2136 do { 2137 if (getLexer().is(AsmToken::Comma)) { 2138 Parser.Lex(); 2139 } else if (getLexer().is(AsmToken::RBrac)) { 2140 Parser.Lex(); 2141 break; 2142 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 2143 if (RegWidth1 != 1) { 2144 return false; 2145 } 2146 if (RegKind1 != RegKind) { 2147 return false; 2148 } 2149 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 2150 return false; 2151 } 2152 } else { 2153 return false; 2154 } 2155 } while (true); 2156 } else { 2157 return false; 2158 } 2159 switch (RegKind) { 2160 case IS_SPECIAL: 2161 RegNum = 0; 2162 RegWidth = 1; 2163 break; 2164 case IS_VGPR: 2165 case IS_SGPR: 2166 case IS_AGPR: 2167 case IS_TTMP: 2168 { 2169 unsigned Size = 1; 2170 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2171 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2172 Size = std::min(RegWidth, 4u); 2173 } 2174 if (RegNum % Size != 0) 2175 return false; 2176 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2177 RegNum = RegNum / Size; 2178 int RCID = getRegClass(RegKind, RegWidth); 2179 if (RCID == -1) 2180 return false; 2181 const MCRegisterClass RC = TRI->getRegClass(RCID); 2182 if (RegNum >= RC.getNumRegs()) 2183 return false; 2184 Reg = RC.getRegister(RegNum); 2185 break; 2186 } 2187 2188 default: 2189 llvm_unreachable("unexpected register kind"); 2190 } 2191 2192 if (!subtargetHasRegister(*TRI, Reg)) 2193 return false; 2194 return true; 2195 } 2196 2197 Optional<StringRef> 2198 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2199 switch (RegKind) { 2200 case IS_VGPR: 2201 return StringRef(".amdgcn.next_free_vgpr"); 2202 case IS_SGPR: 2203 return StringRef(".amdgcn.next_free_sgpr"); 2204 default: 2205 return None; 2206 } 2207 } 2208 2209 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2210 auto SymbolName = getGprCountSymbolName(RegKind); 2211 assert(SymbolName && "initializing invalid register kind"); 2212 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2213 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2214 } 2215 2216 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2217 unsigned DwordRegIndex, 2218 unsigned RegWidth) { 2219 // Symbols are only defined for GCN targets 2220 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2221 return true; 2222 2223 auto SymbolName = getGprCountSymbolName(RegKind); 2224 if (!SymbolName) 2225 return true; 2226 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2227 2228 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2229 int64_t OldCount; 2230 2231 if (!Sym->isVariable()) 2232 return !Error(getParser().getTok().getLoc(), 2233 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2234 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2235 return !Error( 2236 getParser().getTok().getLoc(), 2237 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2238 2239 if (OldCount <= NewMax) 2240 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2241 2242 return true; 2243 } 2244 2245 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2246 const auto &Tok = Parser.getTok(); 2247 SMLoc StartLoc = Tok.getLoc(); 2248 SMLoc EndLoc = Tok.getEndLoc(); 2249 RegisterKind RegKind; 2250 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2251 2252 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2253 //FIXME: improve error messages (bug 41303). 2254 Error(StartLoc, "not a valid operand."); 2255 return nullptr; 2256 } 2257 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2258 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2259 return nullptr; 2260 } else 2261 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2262 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2263 } 2264 2265 OperandMatchResultTy 2266 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2267 // TODO: add syntactic sugar for 1/(2*PI) 2268 2269 assert(!isRegister()); 2270 assert(!isModifier()); 2271 2272 const auto& Tok = getToken(); 2273 const auto& NextTok = peekToken(); 2274 bool IsReal = Tok.is(AsmToken::Real); 2275 SMLoc S = getLoc(); 2276 bool Negate = false; 2277 2278 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2279 lex(); 2280 IsReal = true; 2281 Negate = true; 2282 } 2283 2284 if (IsReal) { 2285 // Floating-point expressions are not supported. 2286 // Can only allow floating-point literals with an 2287 // optional sign. 2288 2289 StringRef Num = getTokenStr(); 2290 lex(); 2291 2292 APFloat RealVal(APFloat::IEEEdouble()); 2293 auto roundMode = APFloat::rmNearestTiesToEven; 2294 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2295 return MatchOperand_ParseFail; 2296 } 2297 if (Negate) 2298 RealVal.changeSign(); 2299 2300 Operands.push_back( 2301 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2302 AMDGPUOperand::ImmTyNone, true)); 2303 2304 return MatchOperand_Success; 2305 2306 } else { 2307 int64_t IntVal; 2308 const MCExpr *Expr; 2309 SMLoc S = getLoc(); 2310 2311 if (HasSP3AbsModifier) { 2312 // This is a workaround for handling expressions 2313 // as arguments of SP3 'abs' modifier, for example: 2314 // |1.0| 2315 // |-1| 2316 // |1+x| 2317 // This syntax is not compatible with syntax of standard 2318 // MC expressions (due to the trailing '|'). 2319 SMLoc EndLoc; 2320 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2321 return MatchOperand_ParseFail; 2322 } else { 2323 if (Parser.parseExpression(Expr)) 2324 return MatchOperand_ParseFail; 2325 } 2326 2327 if (Expr->evaluateAsAbsolute(IntVal)) { 2328 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2329 } else { 2330 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2331 } 2332 2333 return MatchOperand_Success; 2334 } 2335 2336 return MatchOperand_NoMatch; 2337 } 2338 2339 OperandMatchResultTy 2340 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2341 if (!isRegister()) 2342 return MatchOperand_NoMatch; 2343 2344 if (auto R = parseRegister()) { 2345 assert(R->isReg()); 2346 Operands.push_back(std::move(R)); 2347 return MatchOperand_Success; 2348 } 2349 return MatchOperand_ParseFail; 2350 } 2351 2352 OperandMatchResultTy 2353 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2354 auto res = parseReg(Operands); 2355 if (res != MatchOperand_NoMatch) { 2356 return res; 2357 } else if (isModifier()) { 2358 return MatchOperand_NoMatch; 2359 } else { 2360 return parseImm(Operands, HasSP3AbsMod); 2361 } 2362 } 2363 2364 bool 2365 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2366 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2367 const auto &str = Token.getString(); 2368 return str == "abs" || str == "neg" || str == "sext"; 2369 } 2370 return false; 2371 } 2372 2373 bool 2374 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2375 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2376 } 2377 2378 bool 2379 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2380 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2381 } 2382 2383 bool 2384 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2385 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2386 } 2387 2388 // Check if this is an operand modifier or an opcode modifier 2389 // which may look like an expression but it is not. We should 2390 // avoid parsing these modifiers as expressions. Currently 2391 // recognized sequences are: 2392 // |...| 2393 // abs(...) 2394 // neg(...) 2395 // sext(...) 2396 // -reg 2397 // -|...| 2398 // -abs(...) 2399 // name:... 2400 // Note that simple opcode modifiers like 'gds' may be parsed as 2401 // expressions; this is a special case. See getExpressionAsToken. 2402 // 2403 bool 2404 AMDGPUAsmParser::isModifier() { 2405 2406 AsmToken Tok = getToken(); 2407 AsmToken NextToken[2]; 2408 peekTokens(NextToken); 2409 2410 return isOperandModifier(Tok, NextToken[0]) || 2411 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2412 isOpcodeModifierWithVal(Tok, NextToken[0]); 2413 } 2414 2415 // Check if the current token is an SP3 'neg' modifier. 2416 // Currently this modifier is allowed in the following context: 2417 // 2418 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2419 // 2. Before an 'abs' modifier: -abs(...) 2420 // 3. Before an SP3 'abs' modifier: -|...| 2421 // 2422 // In all other cases "-" is handled as a part 2423 // of an expression that follows the sign. 2424 // 2425 // Note: When "-" is followed by an integer literal, 2426 // this is interpreted as integer negation rather 2427 // than a floating-point NEG modifier applied to N. 2428 // Beside being contr-intuitive, such use of floating-point 2429 // NEG modifier would have resulted in different meaning 2430 // of integer literals used with VOP1/2/C and VOP3, 2431 // for example: 2432 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2433 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2434 // Negative fp literals with preceding "-" are 2435 // handled likewise for unifomtity 2436 // 2437 bool 2438 AMDGPUAsmParser::parseSP3NegModifier() { 2439 2440 AsmToken NextToken[2]; 2441 peekTokens(NextToken); 2442 2443 if (isToken(AsmToken::Minus) && 2444 (isRegister(NextToken[0], NextToken[1]) || 2445 NextToken[0].is(AsmToken::Pipe) || 2446 isId(NextToken[0], "abs"))) { 2447 lex(); 2448 return true; 2449 } 2450 2451 return false; 2452 } 2453 2454 OperandMatchResultTy 2455 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2456 bool AllowImm) { 2457 bool Neg, SP3Neg; 2458 bool Abs, SP3Abs; 2459 SMLoc Loc; 2460 2461 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2462 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2463 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2464 return MatchOperand_ParseFail; 2465 } 2466 2467 SP3Neg = parseSP3NegModifier(); 2468 2469 Loc = getLoc(); 2470 Neg = trySkipId("neg"); 2471 if (Neg && SP3Neg) { 2472 Error(Loc, "expected register or immediate"); 2473 return MatchOperand_ParseFail; 2474 } 2475 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2476 return MatchOperand_ParseFail; 2477 2478 Abs = trySkipId("abs"); 2479 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2480 return MatchOperand_ParseFail; 2481 2482 Loc = getLoc(); 2483 SP3Abs = trySkipToken(AsmToken::Pipe); 2484 if (Abs && SP3Abs) { 2485 Error(Loc, "expected register or immediate"); 2486 return MatchOperand_ParseFail; 2487 } 2488 2489 OperandMatchResultTy Res; 2490 if (AllowImm) { 2491 Res = parseRegOrImm(Operands, SP3Abs); 2492 } else { 2493 Res = parseReg(Operands); 2494 } 2495 if (Res != MatchOperand_Success) { 2496 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2497 } 2498 2499 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2500 return MatchOperand_ParseFail; 2501 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2502 return MatchOperand_ParseFail; 2503 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2504 return MatchOperand_ParseFail; 2505 2506 AMDGPUOperand::Modifiers Mods; 2507 Mods.Abs = Abs || SP3Abs; 2508 Mods.Neg = Neg || SP3Neg; 2509 2510 if (Mods.hasFPModifiers()) { 2511 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2512 if (Op.isExpr()) { 2513 Error(Op.getStartLoc(), "expected an absolute expression"); 2514 return MatchOperand_ParseFail; 2515 } 2516 Op.setModifiers(Mods); 2517 } 2518 return MatchOperand_Success; 2519 } 2520 2521 OperandMatchResultTy 2522 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2523 bool AllowImm) { 2524 bool Sext = trySkipId("sext"); 2525 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2526 return MatchOperand_ParseFail; 2527 2528 OperandMatchResultTy Res; 2529 if (AllowImm) { 2530 Res = parseRegOrImm(Operands); 2531 } else { 2532 Res = parseReg(Operands); 2533 } 2534 if (Res != MatchOperand_Success) { 2535 return Sext? MatchOperand_ParseFail : Res; 2536 } 2537 2538 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2539 return MatchOperand_ParseFail; 2540 2541 AMDGPUOperand::Modifiers Mods; 2542 Mods.Sext = Sext; 2543 2544 if (Mods.hasIntModifiers()) { 2545 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2546 if (Op.isExpr()) { 2547 Error(Op.getStartLoc(), "expected an absolute expression"); 2548 return MatchOperand_ParseFail; 2549 } 2550 Op.setModifiers(Mods); 2551 } 2552 2553 return MatchOperand_Success; 2554 } 2555 2556 OperandMatchResultTy 2557 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2558 return parseRegOrImmWithFPInputMods(Operands, false); 2559 } 2560 2561 OperandMatchResultTy 2562 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2563 return parseRegOrImmWithIntInputMods(Operands, false); 2564 } 2565 2566 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2567 auto Loc = getLoc(); 2568 if (trySkipId("off")) { 2569 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2570 AMDGPUOperand::ImmTyOff, false)); 2571 return MatchOperand_Success; 2572 } 2573 2574 if (!isRegister()) 2575 return MatchOperand_NoMatch; 2576 2577 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2578 if (Reg) { 2579 Operands.push_back(std::move(Reg)); 2580 return MatchOperand_Success; 2581 } 2582 2583 return MatchOperand_ParseFail; 2584 2585 } 2586 2587 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2588 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2589 2590 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2591 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2592 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2593 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2594 return Match_InvalidOperand; 2595 2596 if ((TSFlags & SIInstrFlags::VOP3) && 2597 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2598 getForcedEncodingSize() != 64) 2599 return Match_PreferE32; 2600 2601 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2602 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2603 // v_mac_f32/16 allow only dst_sel == DWORD; 2604 auto OpNum = 2605 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2606 const auto &Op = Inst.getOperand(OpNum); 2607 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2608 return Match_InvalidOperand; 2609 } 2610 } 2611 2612 return Match_Success; 2613 } 2614 2615 // What asm variants we should check 2616 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2617 if (getForcedEncodingSize() == 32) { 2618 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2619 return makeArrayRef(Variants); 2620 } 2621 2622 if (isForcedVOP3()) { 2623 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2624 return makeArrayRef(Variants); 2625 } 2626 2627 if (isForcedSDWA()) { 2628 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2629 AMDGPUAsmVariants::SDWA9}; 2630 return makeArrayRef(Variants); 2631 } 2632 2633 if (isForcedDPP()) { 2634 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2635 return makeArrayRef(Variants); 2636 } 2637 2638 static const unsigned Variants[] = { 2639 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2640 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2641 }; 2642 2643 return makeArrayRef(Variants); 2644 } 2645 2646 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2647 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2648 const unsigned Num = Desc.getNumImplicitUses(); 2649 for (unsigned i = 0; i < Num; ++i) { 2650 unsigned Reg = Desc.ImplicitUses[i]; 2651 switch (Reg) { 2652 case AMDGPU::FLAT_SCR: 2653 case AMDGPU::VCC: 2654 case AMDGPU::VCC_LO: 2655 case AMDGPU::VCC_HI: 2656 case AMDGPU::M0: 2657 case AMDGPU::SGPR_NULL: 2658 return Reg; 2659 default: 2660 break; 2661 } 2662 } 2663 return AMDGPU::NoRegister; 2664 } 2665 2666 // NB: This code is correct only when used to check constant 2667 // bus limitations because GFX7 support no f16 inline constants. 2668 // Note that there are no cases when a GFX7 opcode violates 2669 // constant bus limitations due to the use of an f16 constant. 2670 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2671 unsigned OpIdx) const { 2672 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2673 2674 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2675 return false; 2676 } 2677 2678 const MCOperand &MO = Inst.getOperand(OpIdx); 2679 2680 int64_t Val = MO.getImm(); 2681 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2682 2683 switch (OpSize) { // expected operand size 2684 case 8: 2685 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2686 case 4: 2687 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2688 case 2: { 2689 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2690 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2691 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2692 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2693 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2694 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2695 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2696 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2697 } else { 2698 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2699 } 2700 } 2701 default: 2702 llvm_unreachable("invalid operand size"); 2703 } 2704 } 2705 2706 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2707 const MCOperand &MO = Inst.getOperand(OpIdx); 2708 if (MO.isImm()) { 2709 return !isInlineConstant(Inst, OpIdx); 2710 } 2711 return !MO.isReg() || 2712 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2713 } 2714 2715 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2716 const unsigned Opcode = Inst.getOpcode(); 2717 const MCInstrDesc &Desc = MII.get(Opcode); 2718 unsigned ConstantBusUseCount = 0; 2719 unsigned NumLiterals = 0; 2720 unsigned LiteralSize; 2721 2722 if (Desc.TSFlags & 2723 (SIInstrFlags::VOPC | 2724 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2725 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2726 SIInstrFlags::SDWA)) { 2727 // Check special imm operands (used by madmk, etc) 2728 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2729 ++ConstantBusUseCount; 2730 } 2731 2732 SmallDenseSet<unsigned> SGPRsUsed; 2733 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2734 if (SGPRUsed != AMDGPU::NoRegister) { 2735 SGPRsUsed.insert(SGPRUsed); 2736 ++ConstantBusUseCount; 2737 } 2738 2739 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2740 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2741 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2742 2743 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2744 2745 for (int OpIdx : OpIndices) { 2746 if (OpIdx == -1) break; 2747 2748 const MCOperand &MO = Inst.getOperand(OpIdx); 2749 if (usesConstantBus(Inst, OpIdx)) { 2750 if (MO.isReg()) { 2751 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2752 // Pairs of registers with a partial intersections like these 2753 // s0, s[0:1] 2754 // flat_scratch_lo, flat_scratch 2755 // flat_scratch_lo, flat_scratch_hi 2756 // are theoretically valid but they are disabled anyway. 2757 // Note that this code mimics SIInstrInfo::verifyInstruction 2758 if (!SGPRsUsed.count(Reg)) { 2759 SGPRsUsed.insert(Reg); 2760 ++ConstantBusUseCount; 2761 } 2762 } else { // Expression or a literal 2763 2764 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2765 continue; // special operand like VINTERP attr_chan 2766 2767 // An instruction may use only one literal. 2768 // This has been validated on the previous step. 2769 // See validateVOP3Literal. 2770 // This literal may be used as more than one operand. 2771 // If all these operands are of the same size, 2772 // this literal counts as one scalar value. 2773 // Otherwise it counts as 2 scalar values. 2774 // See "GFX10 Shader Programming", section 3.6.2.3. 2775 2776 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2777 if (Size < 4) Size = 4; 2778 2779 if (NumLiterals == 0) { 2780 NumLiterals = 1; 2781 LiteralSize = Size; 2782 } else if (LiteralSize != Size) { 2783 NumLiterals = 2; 2784 } 2785 } 2786 } 2787 } 2788 } 2789 ConstantBusUseCount += NumLiterals; 2790 2791 if (isGFX10()) 2792 return ConstantBusUseCount <= 2; 2793 2794 return ConstantBusUseCount <= 1; 2795 } 2796 2797 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2798 const unsigned Opcode = Inst.getOpcode(); 2799 const MCInstrDesc &Desc = MII.get(Opcode); 2800 2801 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2802 if (DstIdx == -1 || 2803 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2804 return true; 2805 } 2806 2807 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2808 2809 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2810 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2811 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2812 2813 assert(DstIdx != -1); 2814 const MCOperand &Dst = Inst.getOperand(DstIdx); 2815 assert(Dst.isReg()); 2816 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2817 2818 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2819 2820 for (int SrcIdx : SrcIndices) { 2821 if (SrcIdx == -1) break; 2822 const MCOperand &Src = Inst.getOperand(SrcIdx); 2823 if (Src.isReg()) { 2824 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2825 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2826 return false; 2827 } 2828 } 2829 } 2830 2831 return true; 2832 } 2833 2834 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2835 2836 const unsigned Opc = Inst.getOpcode(); 2837 const MCInstrDesc &Desc = MII.get(Opc); 2838 2839 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2840 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2841 assert(ClampIdx != -1); 2842 return Inst.getOperand(ClampIdx).getImm() == 0; 2843 } 2844 2845 return true; 2846 } 2847 2848 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2849 2850 const unsigned Opc = Inst.getOpcode(); 2851 const MCInstrDesc &Desc = MII.get(Opc); 2852 2853 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2854 return true; 2855 2856 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2857 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2858 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2859 2860 assert(VDataIdx != -1); 2861 assert(DMaskIdx != -1); 2862 assert(TFEIdx != -1); 2863 2864 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2865 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2866 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2867 if (DMask == 0) 2868 DMask = 1; 2869 2870 unsigned DataSize = 2871 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2872 if (hasPackedD16()) { 2873 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2874 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2875 DataSize = (DataSize + 1) / 2; 2876 } 2877 2878 return (VDataSize / 4) == DataSize + TFESize; 2879 } 2880 2881 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2882 const unsigned Opc = Inst.getOpcode(); 2883 const MCInstrDesc &Desc = MII.get(Opc); 2884 2885 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2886 return true; 2887 2888 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2889 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2890 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2891 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2892 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2893 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2894 2895 assert(VAddr0Idx != -1); 2896 assert(SrsrcIdx != -1); 2897 assert(DimIdx != -1); 2898 assert(SrsrcIdx > VAddr0Idx); 2899 2900 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2901 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2902 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2903 unsigned VAddrSize = 2904 IsNSA ? SrsrcIdx - VAddr0Idx 2905 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2906 2907 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2908 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2909 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2910 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2911 if (!IsNSA) { 2912 if (AddrSize > 8) 2913 AddrSize = 16; 2914 else if (AddrSize > 4) 2915 AddrSize = 8; 2916 } 2917 2918 return VAddrSize == AddrSize; 2919 } 2920 2921 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2922 2923 const unsigned Opc = Inst.getOpcode(); 2924 const MCInstrDesc &Desc = MII.get(Opc); 2925 2926 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2927 return true; 2928 if (!Desc.mayLoad() || !Desc.mayStore()) 2929 return true; // Not atomic 2930 2931 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2932 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2933 2934 // This is an incomplete check because image_atomic_cmpswap 2935 // may only use 0x3 and 0xf while other atomic operations 2936 // may use 0x1 and 0x3. However these limitations are 2937 // verified when we check that dmask matches dst size. 2938 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2939 } 2940 2941 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2942 2943 const unsigned Opc = Inst.getOpcode(); 2944 const MCInstrDesc &Desc = MII.get(Opc); 2945 2946 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2947 return true; 2948 2949 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2950 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2951 2952 // GATHER4 instructions use dmask in a different fashion compared to 2953 // other MIMG instructions. The only useful DMASK values are 2954 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2955 // (red,red,red,red) etc.) The ISA document doesn't mention 2956 // this. 2957 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2958 } 2959 2960 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2961 2962 const unsigned Opc = Inst.getOpcode(); 2963 const MCInstrDesc &Desc = MII.get(Opc); 2964 2965 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2966 return true; 2967 2968 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2969 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2970 if (isCI() || isSI()) 2971 return false; 2972 } 2973 2974 return true; 2975 } 2976 2977 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2978 const unsigned Opc = Inst.getOpcode(); 2979 const MCInstrDesc &Desc = MII.get(Opc); 2980 2981 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2982 return true; 2983 2984 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2985 if (DimIdx < 0) 2986 return true; 2987 2988 long Imm = Inst.getOperand(DimIdx).getImm(); 2989 if (Imm < 0 || Imm >= 8) 2990 return false; 2991 2992 return true; 2993 } 2994 2995 static bool IsRevOpcode(const unsigned Opcode) 2996 { 2997 switch (Opcode) { 2998 case AMDGPU::V_SUBREV_F32_e32: 2999 case AMDGPU::V_SUBREV_F32_e64: 3000 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3001 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3002 case AMDGPU::V_SUBREV_F32_e32_vi: 3003 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3004 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3005 case AMDGPU::V_SUBREV_F32_e64_vi: 3006 3007 case AMDGPU::V_SUBREV_I32_e32: 3008 case AMDGPU::V_SUBREV_I32_e64: 3009 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3010 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3011 3012 case AMDGPU::V_SUBBREV_U32_e32: 3013 case AMDGPU::V_SUBBREV_U32_e64: 3014 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3015 case AMDGPU::V_SUBBREV_U32_e32_vi: 3016 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3017 case AMDGPU::V_SUBBREV_U32_e64_vi: 3018 3019 case AMDGPU::V_SUBREV_U32_e32: 3020 case AMDGPU::V_SUBREV_U32_e64: 3021 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3022 case AMDGPU::V_SUBREV_U32_e32_vi: 3023 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3024 case AMDGPU::V_SUBREV_U32_e64_vi: 3025 3026 case AMDGPU::V_SUBREV_F16_e32: 3027 case AMDGPU::V_SUBREV_F16_e64: 3028 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3029 case AMDGPU::V_SUBREV_F16_e32_vi: 3030 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3031 case AMDGPU::V_SUBREV_F16_e64_vi: 3032 3033 case AMDGPU::V_SUBREV_U16_e32: 3034 case AMDGPU::V_SUBREV_U16_e64: 3035 case AMDGPU::V_SUBREV_U16_e32_vi: 3036 case AMDGPU::V_SUBREV_U16_e64_vi: 3037 3038 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3039 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3040 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3041 3042 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3043 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3044 3045 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3046 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3047 3048 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3049 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3050 3051 case AMDGPU::V_LSHRREV_B32_e32: 3052 case AMDGPU::V_LSHRREV_B32_e64: 3053 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3054 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3055 case AMDGPU::V_LSHRREV_B32_e32_vi: 3056 case AMDGPU::V_LSHRREV_B32_e64_vi: 3057 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3058 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3059 3060 case AMDGPU::V_ASHRREV_I32_e32: 3061 case AMDGPU::V_ASHRREV_I32_e64: 3062 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3063 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3064 case AMDGPU::V_ASHRREV_I32_e32_vi: 3065 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3066 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3067 case AMDGPU::V_ASHRREV_I32_e64_vi: 3068 3069 case AMDGPU::V_LSHLREV_B32_e32: 3070 case AMDGPU::V_LSHLREV_B32_e64: 3071 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3072 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3073 case AMDGPU::V_LSHLREV_B32_e32_vi: 3074 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3075 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3076 case AMDGPU::V_LSHLREV_B32_e64_vi: 3077 3078 case AMDGPU::V_LSHLREV_B16_e32: 3079 case AMDGPU::V_LSHLREV_B16_e64: 3080 case AMDGPU::V_LSHLREV_B16_e32_vi: 3081 case AMDGPU::V_LSHLREV_B16_e64_vi: 3082 case AMDGPU::V_LSHLREV_B16_gfx10: 3083 3084 case AMDGPU::V_LSHRREV_B16_e32: 3085 case AMDGPU::V_LSHRREV_B16_e64: 3086 case AMDGPU::V_LSHRREV_B16_e32_vi: 3087 case AMDGPU::V_LSHRREV_B16_e64_vi: 3088 case AMDGPU::V_LSHRREV_B16_gfx10: 3089 3090 case AMDGPU::V_ASHRREV_I16_e32: 3091 case AMDGPU::V_ASHRREV_I16_e64: 3092 case AMDGPU::V_ASHRREV_I16_e32_vi: 3093 case AMDGPU::V_ASHRREV_I16_e64_vi: 3094 case AMDGPU::V_ASHRREV_I16_gfx10: 3095 3096 case AMDGPU::V_LSHLREV_B64: 3097 case AMDGPU::V_LSHLREV_B64_gfx10: 3098 case AMDGPU::V_LSHLREV_B64_vi: 3099 3100 case AMDGPU::V_LSHRREV_B64: 3101 case AMDGPU::V_LSHRREV_B64_gfx10: 3102 case AMDGPU::V_LSHRREV_B64_vi: 3103 3104 case AMDGPU::V_ASHRREV_I64: 3105 case AMDGPU::V_ASHRREV_I64_gfx10: 3106 case AMDGPU::V_ASHRREV_I64_vi: 3107 3108 case AMDGPU::V_PK_LSHLREV_B16: 3109 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3110 case AMDGPU::V_PK_LSHLREV_B16_vi: 3111 3112 case AMDGPU::V_PK_LSHRREV_B16: 3113 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3114 case AMDGPU::V_PK_LSHRREV_B16_vi: 3115 case AMDGPU::V_PK_ASHRREV_I16: 3116 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3117 case AMDGPU::V_PK_ASHRREV_I16_vi: 3118 return true; 3119 default: 3120 return false; 3121 } 3122 } 3123 3124 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3125 3126 using namespace SIInstrFlags; 3127 const unsigned Opcode = Inst.getOpcode(); 3128 const MCInstrDesc &Desc = MII.get(Opcode); 3129 3130 // lds_direct register is defined so that it can be used 3131 // with 9-bit operands only. Ignore encodings which do not accept these. 3132 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3133 return true; 3134 3135 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3136 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3137 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3138 3139 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3140 3141 // lds_direct cannot be specified as either src1 or src2. 3142 for (int SrcIdx : SrcIndices) { 3143 if (SrcIdx == -1) break; 3144 const MCOperand &Src = Inst.getOperand(SrcIdx); 3145 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3146 return false; 3147 } 3148 } 3149 3150 if (Src0Idx == -1) 3151 return true; 3152 3153 const MCOperand &Src = Inst.getOperand(Src0Idx); 3154 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3155 return true; 3156 3157 // lds_direct is specified as src0. Check additional limitations. 3158 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3159 } 3160 3161 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3162 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3163 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3164 if (Op.isFlatOffset()) 3165 return Op.getStartLoc(); 3166 } 3167 return getLoc(); 3168 } 3169 3170 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3171 const OperandVector &Operands) { 3172 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3173 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3174 return true; 3175 3176 auto Opcode = Inst.getOpcode(); 3177 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3178 assert(OpNum != -1); 3179 3180 const auto &Op = Inst.getOperand(OpNum); 3181 if (!hasFlatOffsets() && Op.getImm() != 0) { 3182 Error(getFlatOffsetLoc(Operands), 3183 "flat offset modifier is not supported on this GPU"); 3184 return false; 3185 } 3186 3187 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3188 // For FLAT segment the offset must be positive; 3189 // MSB is ignored and forced to zero. 3190 unsigned OffsetSize = isGFX9() ? 13 : 12; 3191 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3192 if (!isIntN(OffsetSize, Op.getImm())) { 3193 Error(getFlatOffsetLoc(Operands), 3194 isGFX9() ? "expected a 13-bit signed offset" : 3195 "expected a 12-bit signed offset"); 3196 return false; 3197 } 3198 } else { 3199 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3200 Error(getFlatOffsetLoc(Operands), 3201 isGFX9() ? "expected a 12-bit unsigned offset" : 3202 "expected an 11-bit unsigned offset"); 3203 return false; 3204 } 3205 } 3206 3207 return true; 3208 } 3209 3210 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3211 unsigned Opcode = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opcode); 3213 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3214 return true; 3215 3216 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3217 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3218 3219 const int OpIndices[] = { Src0Idx, Src1Idx }; 3220 3221 unsigned NumLiterals = 0; 3222 uint32_t LiteralValue; 3223 3224 for (int OpIdx : OpIndices) { 3225 if (OpIdx == -1) break; 3226 3227 const MCOperand &MO = Inst.getOperand(OpIdx); 3228 if (MO.isImm() && 3229 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3230 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3231 !isInlineConstant(Inst, OpIdx)) { 3232 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3233 if (NumLiterals == 0 || LiteralValue != Value) { 3234 LiteralValue = Value; 3235 ++NumLiterals; 3236 } 3237 } 3238 } 3239 3240 return NumLiterals <= 1; 3241 } 3242 3243 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3244 const unsigned Opc = Inst.getOpcode(); 3245 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3246 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3247 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3248 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3249 3250 if (OpSel & ~3) 3251 return false; 3252 } 3253 return true; 3254 } 3255 3256 // Check if VCC register matches wavefront size 3257 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3258 auto FB = getFeatureBits(); 3259 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3260 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3261 } 3262 3263 // VOP3 literal is only allowed in GFX10+ and only one can be used 3264 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3265 unsigned Opcode = Inst.getOpcode(); 3266 const MCInstrDesc &Desc = MII.get(Opcode); 3267 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3268 return true; 3269 3270 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3271 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3272 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3273 3274 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3275 3276 unsigned NumLiterals = 0; 3277 uint32_t LiteralValue; 3278 3279 for (int OpIdx : OpIndices) { 3280 if (OpIdx == -1) break; 3281 3282 const MCOperand &MO = Inst.getOperand(OpIdx); 3283 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3284 continue; 3285 3286 if (!isInlineConstant(Inst, OpIdx)) { 3287 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3288 if (NumLiterals == 0 || LiteralValue != Value) { 3289 LiteralValue = Value; 3290 ++NumLiterals; 3291 } 3292 } 3293 } 3294 3295 return !NumLiterals || 3296 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3297 } 3298 3299 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3300 const SMLoc &IDLoc, 3301 const OperandVector &Operands) { 3302 if (!validateLdsDirect(Inst)) { 3303 Error(IDLoc, 3304 "invalid use of lds_direct"); 3305 return false; 3306 } 3307 if (!validateSOPLiteral(Inst)) { 3308 Error(IDLoc, 3309 "only one literal operand is allowed"); 3310 return false; 3311 } 3312 if (!validateVOP3Literal(Inst)) { 3313 Error(IDLoc, 3314 "invalid literal operand"); 3315 return false; 3316 } 3317 if (!validateConstantBusLimitations(Inst)) { 3318 Error(IDLoc, 3319 "invalid operand (violates constant bus restrictions)"); 3320 return false; 3321 } 3322 if (!validateEarlyClobberLimitations(Inst)) { 3323 Error(IDLoc, 3324 "destination must be different than all sources"); 3325 return false; 3326 } 3327 if (!validateIntClampSupported(Inst)) { 3328 Error(IDLoc, 3329 "integer clamping is not supported on this GPU"); 3330 return false; 3331 } 3332 if (!validateOpSel(Inst)) { 3333 Error(IDLoc, 3334 "invalid op_sel operand"); 3335 return false; 3336 } 3337 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3338 if (!validateMIMGD16(Inst)) { 3339 Error(IDLoc, 3340 "d16 modifier is not supported on this GPU"); 3341 return false; 3342 } 3343 if (!validateMIMGDim(Inst)) { 3344 Error(IDLoc, "dim modifier is required on this GPU"); 3345 return false; 3346 } 3347 if (!validateMIMGDataSize(Inst)) { 3348 Error(IDLoc, 3349 "image data size does not match dmask and tfe"); 3350 return false; 3351 } 3352 if (!validateMIMGAddrSize(Inst)) { 3353 Error(IDLoc, 3354 "image address size does not match dim and a16"); 3355 return false; 3356 } 3357 if (!validateMIMGAtomicDMask(Inst)) { 3358 Error(IDLoc, 3359 "invalid atomic image dmask"); 3360 return false; 3361 } 3362 if (!validateMIMGGatherDMask(Inst)) { 3363 Error(IDLoc, 3364 "invalid image_gather dmask: only one bit must be set"); 3365 return false; 3366 } 3367 if (!validateFlatOffset(Inst, Operands)) { 3368 return false; 3369 } 3370 3371 return true; 3372 } 3373 3374 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3375 const FeatureBitset &FBS, 3376 unsigned VariantID = 0); 3377 3378 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3379 OperandVector &Operands, 3380 MCStreamer &Out, 3381 uint64_t &ErrorInfo, 3382 bool MatchingInlineAsm) { 3383 MCInst Inst; 3384 unsigned Result = Match_Success; 3385 for (auto Variant : getMatchedVariants()) { 3386 uint64_t EI; 3387 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3388 Variant); 3389 // We order match statuses from least to most specific. We use most specific 3390 // status as resulting 3391 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3392 if ((R == Match_Success) || 3393 (R == Match_PreferE32) || 3394 (R == Match_MissingFeature && Result != Match_PreferE32) || 3395 (R == Match_InvalidOperand && Result != Match_MissingFeature 3396 && Result != Match_PreferE32) || 3397 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3398 && Result != Match_MissingFeature 3399 && Result != Match_PreferE32)) { 3400 Result = R; 3401 ErrorInfo = EI; 3402 } 3403 if (R == Match_Success) 3404 break; 3405 } 3406 3407 switch (Result) { 3408 default: break; 3409 case Match_Success: 3410 if (!validateInstruction(Inst, IDLoc, Operands)) { 3411 return true; 3412 } 3413 Inst.setLoc(IDLoc); 3414 Out.EmitInstruction(Inst, getSTI()); 3415 return false; 3416 3417 case Match_MissingFeature: 3418 return Error(IDLoc, "instruction not supported on this GPU"); 3419 3420 case Match_MnemonicFail: { 3421 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3422 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3423 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3424 return Error(IDLoc, "invalid instruction" + Suggestion, 3425 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3426 } 3427 3428 case Match_InvalidOperand: { 3429 SMLoc ErrorLoc = IDLoc; 3430 if (ErrorInfo != ~0ULL) { 3431 if (ErrorInfo >= Operands.size()) { 3432 return Error(IDLoc, "too few operands for instruction"); 3433 } 3434 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3435 if (ErrorLoc == SMLoc()) 3436 ErrorLoc = IDLoc; 3437 } 3438 return Error(ErrorLoc, "invalid operand for instruction"); 3439 } 3440 3441 case Match_PreferE32: 3442 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3443 "should be encoded as e32"); 3444 } 3445 llvm_unreachable("Implement any new match types added!"); 3446 } 3447 3448 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3449 int64_t Tmp = -1; 3450 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3451 return true; 3452 } 3453 if (getParser().parseAbsoluteExpression(Tmp)) { 3454 return true; 3455 } 3456 Ret = static_cast<uint32_t>(Tmp); 3457 return false; 3458 } 3459 3460 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3461 uint32_t &Minor) { 3462 if (ParseAsAbsoluteExpression(Major)) 3463 return TokError("invalid major version"); 3464 3465 if (getLexer().isNot(AsmToken::Comma)) 3466 return TokError("minor version number required, comma expected"); 3467 Lex(); 3468 3469 if (ParseAsAbsoluteExpression(Minor)) 3470 return TokError("invalid minor version"); 3471 3472 return false; 3473 } 3474 3475 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3476 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3477 return TokError("directive only supported for amdgcn architecture"); 3478 3479 std::string Target; 3480 3481 SMLoc TargetStart = getTok().getLoc(); 3482 if (getParser().parseEscapedString(Target)) 3483 return true; 3484 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3485 3486 std::string ExpectedTarget; 3487 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3488 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3489 3490 if (Target != ExpectedTargetOS.str()) 3491 return getParser().Error(TargetRange.Start, "target must match options", 3492 TargetRange); 3493 3494 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3495 return false; 3496 } 3497 3498 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3499 return getParser().Error(Range.Start, "value out of range", Range); 3500 } 3501 3502 bool AMDGPUAsmParser::calculateGPRBlocks( 3503 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3504 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3505 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3506 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3507 // TODO(scott.linder): These calculations are duplicated from 3508 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3509 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3510 3511 unsigned NumVGPRs = NextFreeVGPR; 3512 unsigned NumSGPRs = NextFreeSGPR; 3513 3514 if (Version.Major >= 10) 3515 NumSGPRs = 0; 3516 else { 3517 unsigned MaxAddressableNumSGPRs = 3518 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3519 3520 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3521 NumSGPRs > MaxAddressableNumSGPRs) 3522 return OutOfRangeError(SGPRRange); 3523 3524 NumSGPRs += 3525 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3526 3527 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3528 NumSGPRs > MaxAddressableNumSGPRs) 3529 return OutOfRangeError(SGPRRange); 3530 3531 if (Features.test(FeatureSGPRInitBug)) 3532 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3533 } 3534 3535 VGPRBlocks = 3536 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3537 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3538 3539 return false; 3540 } 3541 3542 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3543 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3544 return TokError("directive only supported for amdgcn architecture"); 3545 3546 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3547 return TokError("directive only supported for amdhsa OS"); 3548 3549 StringRef KernelName; 3550 if (getParser().parseIdentifier(KernelName)) 3551 return true; 3552 3553 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3554 3555 StringSet<> Seen; 3556 3557 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3558 3559 SMRange VGPRRange; 3560 uint64_t NextFreeVGPR = 0; 3561 SMRange SGPRRange; 3562 uint64_t NextFreeSGPR = 0; 3563 unsigned UserSGPRCount = 0; 3564 bool ReserveVCC = true; 3565 bool ReserveFlatScr = true; 3566 bool ReserveXNACK = hasXNACK(); 3567 Optional<bool> EnableWavefrontSize32; 3568 3569 while (true) { 3570 while (getLexer().is(AsmToken::EndOfStatement)) 3571 Lex(); 3572 3573 if (getLexer().isNot(AsmToken::Identifier)) 3574 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3575 3576 StringRef ID = getTok().getIdentifier(); 3577 SMRange IDRange = getTok().getLocRange(); 3578 Lex(); 3579 3580 if (ID == ".end_amdhsa_kernel") 3581 break; 3582 3583 if (Seen.find(ID) != Seen.end()) 3584 return TokError(".amdhsa_ directives cannot be repeated"); 3585 Seen.insert(ID); 3586 3587 SMLoc ValStart = getTok().getLoc(); 3588 int64_t IVal; 3589 if (getParser().parseAbsoluteExpression(IVal)) 3590 return true; 3591 SMLoc ValEnd = getTok().getLoc(); 3592 SMRange ValRange = SMRange(ValStart, ValEnd); 3593 3594 if (IVal < 0) 3595 return OutOfRangeError(ValRange); 3596 3597 uint64_t Val = IVal; 3598 3599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3600 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3601 return OutOfRangeError(RANGE); \ 3602 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3603 3604 if (ID == ".amdhsa_group_segment_fixed_size") { 3605 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3606 return OutOfRangeError(ValRange); 3607 KD.group_segment_fixed_size = Val; 3608 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3609 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3610 return OutOfRangeError(ValRange); 3611 KD.private_segment_fixed_size = Val; 3612 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3613 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3614 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3615 Val, ValRange); 3616 UserSGPRCount += 4; 3617 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3618 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3619 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3620 ValRange); 3621 UserSGPRCount += 2; 3622 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3623 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3624 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3625 ValRange); 3626 UserSGPRCount += 2; 3627 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3628 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3629 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3630 Val, ValRange); 3631 UserSGPRCount += 2; 3632 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3633 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3635 ValRange); 3636 UserSGPRCount += 2; 3637 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3638 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3640 ValRange); 3641 UserSGPRCount += 2; 3642 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3643 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3644 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3645 Val, ValRange); 3646 UserSGPRCount += 1; 3647 } else if (ID == ".amdhsa_wavefront_size32") { 3648 if (IVersion.Major < 10) 3649 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3650 IDRange); 3651 EnableWavefrontSize32 = Val; 3652 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3653 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3654 Val, ValRange); 3655 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3656 PARSE_BITS_ENTRY( 3657 KD.compute_pgm_rsrc2, 3658 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3659 ValRange); 3660 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3661 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3662 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3663 ValRange); 3664 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3666 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3667 ValRange); 3668 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3669 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3670 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3671 ValRange); 3672 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3674 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3675 ValRange); 3676 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3677 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3678 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3679 ValRange); 3680 } else if (ID == ".amdhsa_next_free_vgpr") { 3681 VGPRRange = ValRange; 3682 NextFreeVGPR = Val; 3683 } else if (ID == ".amdhsa_next_free_sgpr") { 3684 SGPRRange = ValRange; 3685 NextFreeSGPR = Val; 3686 } else if (ID == ".amdhsa_reserve_vcc") { 3687 if (!isUInt<1>(Val)) 3688 return OutOfRangeError(ValRange); 3689 ReserveVCC = Val; 3690 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3691 if (IVersion.Major < 7) 3692 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3693 IDRange); 3694 if (!isUInt<1>(Val)) 3695 return OutOfRangeError(ValRange); 3696 ReserveFlatScr = Val; 3697 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3698 if (IVersion.Major < 8) 3699 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3700 IDRange); 3701 if (!isUInt<1>(Val)) 3702 return OutOfRangeError(ValRange); 3703 ReserveXNACK = Val; 3704 } else if (ID == ".amdhsa_float_round_mode_32") { 3705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3706 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3707 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3708 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3709 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3710 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3711 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3712 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3713 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3714 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3715 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3716 ValRange); 3717 } else if (ID == ".amdhsa_dx10_clamp") { 3718 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3719 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3720 } else if (ID == ".amdhsa_ieee_mode") { 3721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3722 Val, ValRange); 3723 } else if (ID == ".amdhsa_fp16_overflow") { 3724 if (IVersion.Major < 9) 3725 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3726 IDRange); 3727 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3728 ValRange); 3729 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3730 if (IVersion.Major < 10) 3731 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3732 IDRange); 3733 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3734 ValRange); 3735 } else if (ID == ".amdhsa_memory_ordered") { 3736 if (IVersion.Major < 10) 3737 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3738 IDRange); 3739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3740 ValRange); 3741 } else if (ID == ".amdhsa_forward_progress") { 3742 if (IVersion.Major < 10) 3743 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3744 IDRange); 3745 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3746 ValRange); 3747 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3748 PARSE_BITS_ENTRY( 3749 KD.compute_pgm_rsrc2, 3750 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3751 ValRange); 3752 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3753 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3754 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3755 Val, ValRange); 3756 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3757 PARSE_BITS_ENTRY( 3758 KD.compute_pgm_rsrc2, 3759 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3760 ValRange); 3761 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3764 Val, ValRange); 3765 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3768 Val, ValRange); 3769 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3772 Val, ValRange); 3773 } else if (ID == ".amdhsa_exception_int_div_zero") { 3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3776 Val, ValRange); 3777 } else { 3778 return getParser().Error(IDRange.Start, 3779 "unknown .amdhsa_kernel directive", IDRange); 3780 } 3781 3782 #undef PARSE_BITS_ENTRY 3783 } 3784 3785 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3786 return TokError(".amdhsa_next_free_vgpr directive is required"); 3787 3788 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3789 return TokError(".amdhsa_next_free_sgpr directive is required"); 3790 3791 unsigned VGPRBlocks; 3792 unsigned SGPRBlocks; 3793 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3794 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3795 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3796 SGPRBlocks)) 3797 return true; 3798 3799 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3800 VGPRBlocks)) 3801 return OutOfRangeError(VGPRRange); 3802 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3803 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3804 3805 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3806 SGPRBlocks)) 3807 return OutOfRangeError(SGPRRange); 3808 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3809 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3810 SGPRBlocks); 3811 3812 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3813 return TokError("too many user SGPRs enabled"); 3814 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3815 UserSGPRCount); 3816 3817 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3818 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3819 ReserveFlatScr, ReserveXNACK); 3820 return false; 3821 } 3822 3823 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3824 uint32_t Major; 3825 uint32_t Minor; 3826 3827 if (ParseDirectiveMajorMinor(Major, Minor)) 3828 return true; 3829 3830 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3831 return false; 3832 } 3833 3834 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3835 uint32_t Major; 3836 uint32_t Minor; 3837 uint32_t Stepping; 3838 StringRef VendorName; 3839 StringRef ArchName; 3840 3841 // If this directive has no arguments, then use the ISA version for the 3842 // targeted GPU. 3843 if (getLexer().is(AsmToken::EndOfStatement)) { 3844 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3845 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3846 ISA.Stepping, 3847 "AMD", "AMDGPU"); 3848 return false; 3849 } 3850 3851 if (ParseDirectiveMajorMinor(Major, Minor)) 3852 return true; 3853 3854 if (getLexer().isNot(AsmToken::Comma)) 3855 return TokError("stepping version number required, comma expected"); 3856 Lex(); 3857 3858 if (ParseAsAbsoluteExpression(Stepping)) 3859 return TokError("invalid stepping version"); 3860 3861 if (getLexer().isNot(AsmToken::Comma)) 3862 return TokError("vendor name required, comma expected"); 3863 Lex(); 3864 3865 if (getLexer().isNot(AsmToken::String)) 3866 return TokError("invalid vendor name"); 3867 3868 VendorName = getLexer().getTok().getStringContents(); 3869 Lex(); 3870 3871 if (getLexer().isNot(AsmToken::Comma)) 3872 return TokError("arch name required, comma expected"); 3873 Lex(); 3874 3875 if (getLexer().isNot(AsmToken::String)) 3876 return TokError("invalid arch name"); 3877 3878 ArchName = getLexer().getTok().getStringContents(); 3879 Lex(); 3880 3881 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3882 VendorName, ArchName); 3883 return false; 3884 } 3885 3886 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3887 amd_kernel_code_t &Header) { 3888 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3889 // assembly for backwards compatibility. 3890 if (ID == "max_scratch_backing_memory_byte_size") { 3891 Parser.eatToEndOfStatement(); 3892 return false; 3893 } 3894 3895 SmallString<40> ErrStr; 3896 raw_svector_ostream Err(ErrStr); 3897 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3898 return TokError(Err.str()); 3899 } 3900 Lex(); 3901 3902 if (ID == "enable_wavefront_size32") { 3903 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3904 if (!isGFX10()) 3905 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3906 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3907 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3908 } else { 3909 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3910 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3911 } 3912 } 3913 3914 if (ID == "wavefront_size") { 3915 if (Header.wavefront_size == 5) { 3916 if (!isGFX10()) 3917 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3918 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3919 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3920 } else if (Header.wavefront_size == 6) { 3921 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3922 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3923 } 3924 } 3925 3926 if (ID == "enable_wgp_mode") { 3927 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3928 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3929 } 3930 3931 if (ID == "enable_mem_ordered") { 3932 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3933 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3934 } 3935 3936 if (ID == "enable_fwd_progress") { 3937 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3938 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3939 } 3940 3941 return false; 3942 } 3943 3944 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3945 amd_kernel_code_t Header; 3946 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3947 3948 while (true) { 3949 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3950 // will set the current token to EndOfStatement. 3951 while(getLexer().is(AsmToken::EndOfStatement)) 3952 Lex(); 3953 3954 if (getLexer().isNot(AsmToken::Identifier)) 3955 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3956 3957 StringRef ID = getLexer().getTok().getIdentifier(); 3958 Lex(); 3959 3960 if (ID == ".end_amd_kernel_code_t") 3961 break; 3962 3963 if (ParseAMDKernelCodeTValue(ID, Header)) 3964 return true; 3965 } 3966 3967 getTargetStreamer().EmitAMDKernelCodeT(Header); 3968 3969 return false; 3970 } 3971 3972 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3973 if (getLexer().isNot(AsmToken::Identifier)) 3974 return TokError("expected symbol name"); 3975 3976 StringRef KernelName = Parser.getTok().getString(); 3977 3978 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3979 ELF::STT_AMDGPU_HSA_KERNEL); 3980 Lex(); 3981 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3982 KernelScope.initialize(getContext()); 3983 return false; 3984 } 3985 3986 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3987 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3988 return Error(getParser().getTok().getLoc(), 3989 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3990 "architectures"); 3991 } 3992 3993 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3994 3995 std::string ISAVersionStringFromSTI; 3996 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3997 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3998 3999 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4000 return Error(getParser().getTok().getLoc(), 4001 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4002 "arguments specified through the command line"); 4003 } 4004 4005 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4006 Lex(); 4007 4008 return false; 4009 } 4010 4011 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4012 const char *AssemblerDirectiveBegin; 4013 const char *AssemblerDirectiveEnd; 4014 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4015 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4016 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4017 HSAMD::V3::AssemblerDirectiveEnd) 4018 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4019 HSAMD::AssemblerDirectiveEnd); 4020 4021 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4022 return Error(getParser().getTok().getLoc(), 4023 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4024 "not available on non-amdhsa OSes")).str()); 4025 } 4026 4027 std::string HSAMetadataString; 4028 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4029 HSAMetadataString)) 4030 return true; 4031 4032 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4033 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4034 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4035 } else { 4036 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4037 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4038 } 4039 4040 return false; 4041 } 4042 4043 /// Common code to parse out a block of text (typically YAML) between start and 4044 /// end directives. 4045 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4046 const char *AssemblerDirectiveEnd, 4047 std::string &CollectString) { 4048 4049 raw_string_ostream CollectStream(CollectString); 4050 4051 getLexer().setSkipSpace(false); 4052 4053 bool FoundEnd = false; 4054 while (!getLexer().is(AsmToken::Eof)) { 4055 while (getLexer().is(AsmToken::Space)) { 4056 CollectStream << getLexer().getTok().getString(); 4057 Lex(); 4058 } 4059 4060 if (getLexer().is(AsmToken::Identifier)) { 4061 StringRef ID = getLexer().getTok().getIdentifier(); 4062 if (ID == AssemblerDirectiveEnd) { 4063 Lex(); 4064 FoundEnd = true; 4065 break; 4066 } 4067 } 4068 4069 CollectStream << Parser.parseStringToEndOfStatement() 4070 << getContext().getAsmInfo()->getSeparatorString(); 4071 4072 Parser.eatToEndOfStatement(); 4073 } 4074 4075 getLexer().setSkipSpace(true); 4076 4077 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4078 return TokError(Twine("expected directive ") + 4079 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4080 } 4081 4082 CollectStream.flush(); 4083 return false; 4084 } 4085 4086 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4087 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4088 std::string String; 4089 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4090 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4091 return true; 4092 4093 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4094 if (!PALMetadata->setFromString(String)) 4095 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4096 return false; 4097 } 4098 4099 /// Parse the assembler directive for old linear-format PAL metadata. 4100 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4101 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4102 return Error(getParser().getTok().getLoc(), 4103 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4104 "not available on non-amdpal OSes")).str()); 4105 } 4106 4107 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4108 PALMetadata->setLegacy(); 4109 for (;;) { 4110 uint32_t Key, Value; 4111 if (ParseAsAbsoluteExpression(Key)) { 4112 return TokError(Twine("invalid value in ") + 4113 Twine(PALMD::AssemblerDirective)); 4114 } 4115 if (getLexer().isNot(AsmToken::Comma)) { 4116 return TokError(Twine("expected an even number of values in ") + 4117 Twine(PALMD::AssemblerDirective)); 4118 } 4119 Lex(); 4120 if (ParseAsAbsoluteExpression(Value)) { 4121 return TokError(Twine("invalid value in ") + 4122 Twine(PALMD::AssemblerDirective)); 4123 } 4124 PALMetadata->setRegister(Key, Value); 4125 if (getLexer().isNot(AsmToken::Comma)) 4126 break; 4127 Lex(); 4128 } 4129 return false; 4130 } 4131 4132 /// ParseDirectiveAMDGPULDS 4133 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4134 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4135 if (getParser().checkForValidSection()) 4136 return true; 4137 4138 StringRef Name; 4139 SMLoc NameLoc = getLexer().getLoc(); 4140 if (getParser().parseIdentifier(Name)) 4141 return TokError("expected identifier in directive"); 4142 4143 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4144 if (parseToken(AsmToken::Comma, "expected ','")) 4145 return true; 4146 4147 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4148 4149 int64_t Size; 4150 SMLoc SizeLoc = getLexer().getLoc(); 4151 if (getParser().parseAbsoluteExpression(Size)) 4152 return true; 4153 if (Size < 0) 4154 return Error(SizeLoc, "size must be non-negative"); 4155 if (Size > LocalMemorySize) 4156 return Error(SizeLoc, "size is too large"); 4157 4158 int64_t Align = 4; 4159 if (getLexer().is(AsmToken::Comma)) { 4160 Lex(); 4161 SMLoc AlignLoc = getLexer().getLoc(); 4162 if (getParser().parseAbsoluteExpression(Align)) 4163 return true; 4164 if (Align < 0 || !isPowerOf2_64(Align)) 4165 return Error(AlignLoc, "alignment must be a power of two"); 4166 4167 // Alignment larger than the size of LDS is possible in theory, as long 4168 // as the linker manages to place to symbol at address 0, but we do want 4169 // to make sure the alignment fits nicely into a 32-bit integer. 4170 if (Align >= 1u << 31) 4171 return Error(AlignLoc, "alignment is too large"); 4172 } 4173 4174 if (parseToken(AsmToken::EndOfStatement, 4175 "unexpected token in '.amdgpu_lds' directive")) 4176 return true; 4177 4178 Symbol->redefineIfPossible(); 4179 if (!Symbol->isUndefined()) 4180 return Error(NameLoc, "invalid symbol redefinition"); 4181 4182 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4183 return false; 4184 } 4185 4186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4187 StringRef IDVal = DirectiveID.getString(); 4188 4189 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4190 if (IDVal == ".amdgcn_target") 4191 return ParseDirectiveAMDGCNTarget(); 4192 4193 if (IDVal == ".amdhsa_kernel") 4194 return ParseDirectiveAMDHSAKernel(); 4195 4196 // TODO: Restructure/combine with PAL metadata directive. 4197 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4198 return ParseDirectiveHSAMetadata(); 4199 } else { 4200 if (IDVal == ".hsa_code_object_version") 4201 return ParseDirectiveHSACodeObjectVersion(); 4202 4203 if (IDVal == ".hsa_code_object_isa") 4204 return ParseDirectiveHSACodeObjectISA(); 4205 4206 if (IDVal == ".amd_kernel_code_t") 4207 return ParseDirectiveAMDKernelCodeT(); 4208 4209 if (IDVal == ".amdgpu_hsa_kernel") 4210 return ParseDirectiveAMDGPUHsaKernel(); 4211 4212 if (IDVal == ".amd_amdgpu_isa") 4213 return ParseDirectiveISAVersion(); 4214 4215 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4216 return ParseDirectiveHSAMetadata(); 4217 } 4218 4219 if (IDVal == ".amdgpu_lds") 4220 return ParseDirectiveAMDGPULDS(); 4221 4222 if (IDVal == PALMD::AssemblerDirectiveBegin) 4223 return ParseDirectivePALMetadataBegin(); 4224 4225 if (IDVal == PALMD::AssemblerDirective) 4226 return ParseDirectivePALMetadata(); 4227 4228 return true; 4229 } 4230 4231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4232 unsigned RegNo) const { 4233 4234 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4235 R.isValid(); ++R) { 4236 if (*R == RegNo) 4237 return isGFX9() || isGFX10(); 4238 } 4239 4240 // GFX10 has 2 more SGPRs 104 and 105. 4241 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4242 R.isValid(); ++R) { 4243 if (*R == RegNo) 4244 return hasSGPR104_SGPR105(); 4245 } 4246 4247 switch (RegNo) { 4248 case AMDGPU::SRC_SHARED_BASE: 4249 case AMDGPU::SRC_SHARED_LIMIT: 4250 case AMDGPU::SRC_PRIVATE_BASE: 4251 case AMDGPU::SRC_PRIVATE_LIMIT: 4252 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4253 return !isCI() && !isSI() && !isVI(); 4254 case AMDGPU::TBA: 4255 case AMDGPU::TBA_LO: 4256 case AMDGPU::TBA_HI: 4257 case AMDGPU::TMA: 4258 case AMDGPU::TMA_LO: 4259 case AMDGPU::TMA_HI: 4260 return !isGFX9() && !isGFX10(); 4261 case AMDGPU::XNACK_MASK: 4262 case AMDGPU::XNACK_MASK_LO: 4263 case AMDGPU::XNACK_MASK_HI: 4264 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4265 case AMDGPU::SGPR_NULL: 4266 return isGFX10(); 4267 default: 4268 break; 4269 } 4270 4271 if (isCI()) 4272 return true; 4273 4274 if (isSI() || isGFX10()) { 4275 // No flat_scr on SI. 4276 // On GFX10 flat scratch is not a valid register operand and can only be 4277 // accessed with s_setreg/s_getreg. 4278 switch (RegNo) { 4279 case AMDGPU::FLAT_SCR: 4280 case AMDGPU::FLAT_SCR_LO: 4281 case AMDGPU::FLAT_SCR_HI: 4282 return false; 4283 default: 4284 return true; 4285 } 4286 } 4287 4288 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4289 // SI/CI have. 4290 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4291 R.isValid(); ++R) { 4292 if (*R == RegNo) 4293 return hasSGPR102_SGPR103(); 4294 } 4295 4296 return true; 4297 } 4298 4299 OperandMatchResultTy 4300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4301 OperandMode Mode) { 4302 // Try to parse with a custom parser 4303 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4304 4305 // If we successfully parsed the operand or if there as an error parsing, 4306 // we are done. 4307 // 4308 // If we are parsing after we reach EndOfStatement then this means we 4309 // are appending default values to the Operands list. This is only done 4310 // by custom parser, so we shouldn't continue on to the generic parsing. 4311 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4312 getLexer().is(AsmToken::EndOfStatement)) 4313 return ResTy; 4314 4315 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4316 unsigned Prefix = Operands.size(); 4317 SMLoc LBraceLoc = getTok().getLoc(); 4318 Parser.Lex(); // eat the '[' 4319 4320 for (;;) { 4321 ResTy = parseReg(Operands); 4322 if (ResTy != MatchOperand_Success) 4323 return ResTy; 4324 4325 if (getLexer().is(AsmToken::RBrac)) 4326 break; 4327 4328 if (getLexer().isNot(AsmToken::Comma)) 4329 return MatchOperand_ParseFail; 4330 Parser.Lex(); 4331 } 4332 4333 if (Operands.size() - Prefix > 1) { 4334 Operands.insert(Operands.begin() + Prefix, 4335 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4336 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4337 getTok().getLoc())); 4338 } 4339 4340 Parser.Lex(); // eat the ']' 4341 return MatchOperand_Success; 4342 } 4343 4344 return parseRegOrImm(Operands); 4345 } 4346 4347 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4348 // Clear any forced encodings from the previous instruction. 4349 setForcedEncodingSize(0); 4350 setForcedDPP(false); 4351 setForcedSDWA(false); 4352 4353 if (Name.endswith("_e64")) { 4354 setForcedEncodingSize(64); 4355 return Name.substr(0, Name.size() - 4); 4356 } else if (Name.endswith("_e32")) { 4357 setForcedEncodingSize(32); 4358 return Name.substr(0, Name.size() - 4); 4359 } else if (Name.endswith("_dpp")) { 4360 setForcedDPP(true); 4361 return Name.substr(0, Name.size() - 4); 4362 } else if (Name.endswith("_sdwa")) { 4363 setForcedSDWA(true); 4364 return Name.substr(0, Name.size() - 5); 4365 } 4366 return Name; 4367 } 4368 4369 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4370 StringRef Name, 4371 SMLoc NameLoc, OperandVector &Operands) { 4372 // Add the instruction mnemonic 4373 Name = parseMnemonicSuffix(Name); 4374 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4375 4376 bool IsMIMG = Name.startswith("image_"); 4377 4378 while (!getLexer().is(AsmToken::EndOfStatement)) { 4379 OperandMode Mode = OperandMode_Default; 4380 if (IsMIMG && isGFX10() && Operands.size() == 2) 4381 Mode = OperandMode_NSA; 4382 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4383 4384 // Eat the comma or space if there is one. 4385 if (getLexer().is(AsmToken::Comma)) 4386 Parser.Lex(); 4387 4388 switch (Res) { 4389 case MatchOperand_Success: break; 4390 case MatchOperand_ParseFail: 4391 // FIXME: use real operand location rather than the current location. 4392 Error(getLexer().getLoc(), "failed parsing operand."); 4393 while (!getLexer().is(AsmToken::EndOfStatement)) { 4394 Parser.Lex(); 4395 } 4396 return true; 4397 case MatchOperand_NoMatch: 4398 // FIXME: use real operand location rather than the current location. 4399 Error(getLexer().getLoc(), "not a valid operand."); 4400 while (!getLexer().is(AsmToken::EndOfStatement)) { 4401 Parser.Lex(); 4402 } 4403 return true; 4404 } 4405 } 4406 4407 return false; 4408 } 4409 4410 //===----------------------------------------------------------------------===// 4411 // Utility functions 4412 //===----------------------------------------------------------------------===// 4413 4414 OperandMatchResultTy 4415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4416 4417 if (!trySkipId(Prefix, AsmToken::Colon)) 4418 return MatchOperand_NoMatch; 4419 4420 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4421 } 4422 4423 OperandMatchResultTy 4424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4425 AMDGPUOperand::ImmTy ImmTy, 4426 bool (*ConvertResult)(int64_t&)) { 4427 SMLoc S = getLoc(); 4428 int64_t Value = 0; 4429 4430 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4431 if (Res != MatchOperand_Success) 4432 return Res; 4433 4434 if (ConvertResult && !ConvertResult(Value)) { 4435 Error(S, "invalid " + StringRef(Prefix) + " value."); 4436 } 4437 4438 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4439 return MatchOperand_Success; 4440 } 4441 4442 OperandMatchResultTy 4443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4444 OperandVector &Operands, 4445 AMDGPUOperand::ImmTy ImmTy, 4446 bool (*ConvertResult)(int64_t&)) { 4447 SMLoc S = getLoc(); 4448 if (!trySkipId(Prefix, AsmToken::Colon)) 4449 return MatchOperand_NoMatch; 4450 4451 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4452 return MatchOperand_ParseFail; 4453 4454 unsigned Val = 0; 4455 const unsigned MaxSize = 4; 4456 4457 // FIXME: How to verify the number of elements matches the number of src 4458 // operands? 4459 for (int I = 0; ; ++I) { 4460 int64_t Op; 4461 SMLoc Loc = getLoc(); 4462 if (!parseExpr(Op)) 4463 return MatchOperand_ParseFail; 4464 4465 if (Op != 0 && Op != 1) { 4466 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4467 return MatchOperand_ParseFail; 4468 } 4469 4470 Val |= (Op << I); 4471 4472 if (trySkipToken(AsmToken::RBrac)) 4473 break; 4474 4475 if (I + 1 == MaxSize) { 4476 Error(getLoc(), "expected a closing square bracket"); 4477 return MatchOperand_ParseFail; 4478 } 4479 4480 if (!skipToken(AsmToken::Comma, "expected a comma")) 4481 return MatchOperand_ParseFail; 4482 } 4483 4484 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4485 return MatchOperand_Success; 4486 } 4487 4488 OperandMatchResultTy 4489 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4490 AMDGPUOperand::ImmTy ImmTy) { 4491 int64_t Bit = 0; 4492 SMLoc S = Parser.getTok().getLoc(); 4493 4494 // We are at the end of the statement, and this is a default argument, so 4495 // use a default value. 4496 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4497 switch(getLexer().getKind()) { 4498 case AsmToken::Identifier: { 4499 StringRef Tok = Parser.getTok().getString(); 4500 if (Tok == Name) { 4501 if (Tok == "r128" && isGFX9()) 4502 Error(S, "r128 modifier is not supported on this GPU"); 4503 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4504 Error(S, "a16 modifier is not supported on this GPU"); 4505 Bit = 1; 4506 Parser.Lex(); 4507 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4508 Bit = 0; 4509 Parser.Lex(); 4510 } else { 4511 return MatchOperand_NoMatch; 4512 } 4513 break; 4514 } 4515 default: 4516 return MatchOperand_NoMatch; 4517 } 4518 } 4519 4520 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4521 return MatchOperand_ParseFail; 4522 4523 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4524 return MatchOperand_Success; 4525 } 4526 4527 static void addOptionalImmOperand( 4528 MCInst& Inst, const OperandVector& Operands, 4529 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4530 AMDGPUOperand::ImmTy ImmT, 4531 int64_t Default = 0) { 4532 auto i = OptionalIdx.find(ImmT); 4533 if (i != OptionalIdx.end()) { 4534 unsigned Idx = i->second; 4535 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4536 } else { 4537 Inst.addOperand(MCOperand::createImm(Default)); 4538 } 4539 } 4540 4541 OperandMatchResultTy 4542 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4543 if (getLexer().isNot(AsmToken::Identifier)) { 4544 return MatchOperand_NoMatch; 4545 } 4546 StringRef Tok = Parser.getTok().getString(); 4547 if (Tok != Prefix) { 4548 return MatchOperand_NoMatch; 4549 } 4550 4551 Parser.Lex(); 4552 if (getLexer().isNot(AsmToken::Colon)) { 4553 return MatchOperand_ParseFail; 4554 } 4555 4556 Parser.Lex(); 4557 if (getLexer().isNot(AsmToken::Identifier)) { 4558 return MatchOperand_ParseFail; 4559 } 4560 4561 Value = Parser.getTok().getString(); 4562 return MatchOperand_Success; 4563 } 4564 4565 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4566 // values to live in a joint format operand in the MCInst encoding. 4567 OperandMatchResultTy 4568 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4569 SMLoc S = Parser.getTok().getLoc(); 4570 int64_t Dfmt = 0, Nfmt = 0; 4571 // dfmt and nfmt can appear in either order, and each is optional. 4572 bool GotDfmt = false, GotNfmt = false; 4573 while (!GotDfmt || !GotNfmt) { 4574 if (!GotDfmt) { 4575 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4576 if (Res != MatchOperand_NoMatch) { 4577 if (Res != MatchOperand_Success) 4578 return Res; 4579 if (Dfmt >= 16) { 4580 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4581 return MatchOperand_ParseFail; 4582 } 4583 GotDfmt = true; 4584 Parser.Lex(); 4585 continue; 4586 } 4587 } 4588 if (!GotNfmt) { 4589 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4590 if (Res != MatchOperand_NoMatch) { 4591 if (Res != MatchOperand_Success) 4592 return Res; 4593 if (Nfmt >= 8) { 4594 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4595 return MatchOperand_ParseFail; 4596 } 4597 GotNfmt = true; 4598 Parser.Lex(); 4599 continue; 4600 } 4601 } 4602 break; 4603 } 4604 if (!GotDfmt && !GotNfmt) 4605 return MatchOperand_NoMatch; 4606 auto Format = Dfmt | Nfmt << 4; 4607 Operands.push_back( 4608 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4609 return MatchOperand_Success; 4610 } 4611 4612 //===----------------------------------------------------------------------===// 4613 // ds 4614 //===----------------------------------------------------------------------===// 4615 4616 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4617 const OperandVector &Operands) { 4618 OptionalImmIndexMap OptionalIdx; 4619 4620 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4621 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4622 4623 // Add the register arguments 4624 if (Op.isReg()) { 4625 Op.addRegOperands(Inst, 1); 4626 continue; 4627 } 4628 4629 // Handle optional arguments 4630 OptionalIdx[Op.getImmTy()] = i; 4631 } 4632 4633 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4636 4637 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4638 } 4639 4640 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4641 bool IsGdsHardcoded) { 4642 OptionalImmIndexMap OptionalIdx; 4643 4644 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4645 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4646 4647 // Add the register arguments 4648 if (Op.isReg()) { 4649 Op.addRegOperands(Inst, 1); 4650 continue; 4651 } 4652 4653 if (Op.isToken() && Op.getToken() == "gds") { 4654 IsGdsHardcoded = true; 4655 continue; 4656 } 4657 4658 // Handle optional arguments 4659 OptionalIdx[Op.getImmTy()] = i; 4660 } 4661 4662 AMDGPUOperand::ImmTy OffsetType = 4663 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4664 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4665 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4666 AMDGPUOperand::ImmTyOffset; 4667 4668 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4669 4670 if (!IsGdsHardcoded) { 4671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4672 } 4673 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4674 } 4675 4676 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4677 OptionalImmIndexMap OptionalIdx; 4678 4679 unsigned OperandIdx[4]; 4680 unsigned EnMask = 0; 4681 int SrcIdx = 0; 4682 4683 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4684 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4685 4686 // Add the register arguments 4687 if (Op.isReg()) { 4688 assert(SrcIdx < 4); 4689 OperandIdx[SrcIdx] = Inst.size(); 4690 Op.addRegOperands(Inst, 1); 4691 ++SrcIdx; 4692 continue; 4693 } 4694 4695 if (Op.isOff()) { 4696 assert(SrcIdx < 4); 4697 OperandIdx[SrcIdx] = Inst.size(); 4698 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4699 ++SrcIdx; 4700 continue; 4701 } 4702 4703 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4704 Op.addImmOperands(Inst, 1); 4705 continue; 4706 } 4707 4708 if (Op.isToken() && Op.getToken() == "done") 4709 continue; 4710 4711 // Handle optional arguments 4712 OptionalIdx[Op.getImmTy()] = i; 4713 } 4714 4715 assert(SrcIdx == 4); 4716 4717 bool Compr = false; 4718 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4719 Compr = true; 4720 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4721 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4722 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4723 } 4724 4725 for (auto i = 0; i < SrcIdx; ++i) { 4726 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4727 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4728 } 4729 } 4730 4731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4733 4734 Inst.addOperand(MCOperand::createImm(EnMask)); 4735 } 4736 4737 //===----------------------------------------------------------------------===// 4738 // s_waitcnt 4739 //===----------------------------------------------------------------------===// 4740 4741 static bool 4742 encodeCnt( 4743 const AMDGPU::IsaVersion ISA, 4744 int64_t &IntVal, 4745 int64_t CntVal, 4746 bool Saturate, 4747 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4748 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4749 { 4750 bool Failed = false; 4751 4752 IntVal = encode(ISA, IntVal, CntVal); 4753 if (CntVal != decode(ISA, IntVal)) { 4754 if (Saturate) { 4755 IntVal = encode(ISA, IntVal, -1); 4756 } else { 4757 Failed = true; 4758 } 4759 } 4760 return Failed; 4761 } 4762 4763 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4764 4765 SMLoc CntLoc = getLoc(); 4766 StringRef CntName = getTokenStr(); 4767 4768 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4769 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4770 return false; 4771 4772 int64_t CntVal; 4773 SMLoc ValLoc = getLoc(); 4774 if (!parseExpr(CntVal)) 4775 return false; 4776 4777 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4778 4779 bool Failed = true; 4780 bool Sat = CntName.endswith("_sat"); 4781 4782 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4783 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4784 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4785 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4786 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4787 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4788 } else { 4789 Error(CntLoc, "invalid counter name " + CntName); 4790 return false; 4791 } 4792 4793 if (Failed) { 4794 Error(ValLoc, "too large value for " + CntName); 4795 return false; 4796 } 4797 4798 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4799 return false; 4800 4801 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4802 if (isToken(AsmToken::EndOfStatement)) { 4803 Error(getLoc(), "expected a counter name"); 4804 return false; 4805 } 4806 } 4807 4808 return true; 4809 } 4810 4811 OperandMatchResultTy 4812 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4813 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4814 int64_t Waitcnt = getWaitcntBitMask(ISA); 4815 SMLoc S = getLoc(); 4816 4817 // If parse failed, do not return error code 4818 // to avoid excessive error messages. 4819 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4820 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4821 } else { 4822 parseExpr(Waitcnt); 4823 } 4824 4825 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4826 return MatchOperand_Success; 4827 } 4828 4829 bool 4830 AMDGPUOperand::isSWaitCnt() const { 4831 return isImm(); 4832 } 4833 4834 //===----------------------------------------------------------------------===// 4835 // hwreg 4836 //===----------------------------------------------------------------------===// 4837 4838 bool 4839 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4840 int64_t &Offset, 4841 int64_t &Width) { 4842 using namespace llvm::AMDGPU::Hwreg; 4843 4844 // The register may be specified by name or using a numeric code 4845 if (isToken(AsmToken::Identifier) && 4846 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4847 HwReg.IsSymbolic = true; 4848 lex(); // skip message name 4849 } else if (!parseExpr(HwReg.Id)) { 4850 return false; 4851 } 4852 4853 if (trySkipToken(AsmToken::RParen)) 4854 return true; 4855 4856 // parse optional params 4857 return 4858 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4859 parseExpr(Offset) && 4860 skipToken(AsmToken::Comma, "expected a comma") && 4861 parseExpr(Width) && 4862 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4863 } 4864 4865 bool 4866 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4867 const int64_t Offset, 4868 const int64_t Width, 4869 const SMLoc Loc) { 4870 4871 using namespace llvm::AMDGPU::Hwreg; 4872 4873 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4874 Error(Loc, "specified hardware register is not supported on this GPU"); 4875 return false; 4876 } else if (!isValidHwreg(HwReg.Id)) { 4877 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4878 return false; 4879 } else if (!isValidHwregOffset(Offset)) { 4880 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4881 return false; 4882 } else if (!isValidHwregWidth(Width)) { 4883 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4884 return false; 4885 } 4886 return true; 4887 } 4888 4889 OperandMatchResultTy 4890 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4891 using namespace llvm::AMDGPU::Hwreg; 4892 4893 int64_t ImmVal = 0; 4894 SMLoc Loc = getLoc(); 4895 4896 // If parse failed, do not return error code 4897 // to avoid excessive error messages. 4898 if (trySkipId("hwreg", AsmToken::LParen)) { 4899 OperandInfoTy HwReg(ID_UNKNOWN_); 4900 int64_t Offset = OFFSET_DEFAULT_; 4901 int64_t Width = WIDTH_DEFAULT_; 4902 if (parseHwregBody(HwReg, Offset, Width) && 4903 validateHwreg(HwReg, Offset, Width, Loc)) { 4904 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4905 } 4906 } else if (parseExpr(ImmVal)) { 4907 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4908 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4909 } 4910 4911 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4912 return MatchOperand_Success; 4913 } 4914 4915 bool AMDGPUOperand::isHwreg() const { 4916 return isImmTy(ImmTyHwreg); 4917 } 4918 4919 //===----------------------------------------------------------------------===// 4920 // sendmsg 4921 //===----------------------------------------------------------------------===// 4922 4923 bool 4924 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4925 OperandInfoTy &Op, 4926 OperandInfoTy &Stream) { 4927 using namespace llvm::AMDGPU::SendMsg; 4928 4929 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4930 Msg.IsSymbolic = true; 4931 lex(); // skip message name 4932 } else if (!parseExpr(Msg.Id)) { 4933 return false; 4934 } 4935 4936 if (trySkipToken(AsmToken::Comma)) { 4937 Op.IsDefined = true; 4938 if (isToken(AsmToken::Identifier) && 4939 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4940 lex(); // skip operation name 4941 } else if (!parseExpr(Op.Id)) { 4942 return false; 4943 } 4944 4945 if (trySkipToken(AsmToken::Comma)) { 4946 Stream.IsDefined = true; 4947 if (!parseExpr(Stream.Id)) 4948 return false; 4949 } 4950 } 4951 4952 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4953 } 4954 4955 bool 4956 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4957 const OperandInfoTy &Op, 4958 const OperandInfoTy &Stream, 4959 const SMLoc S) { 4960 using namespace llvm::AMDGPU::SendMsg; 4961 4962 // Validation strictness depends on whether message is specified 4963 // in a symbolc or in a numeric form. In the latter case 4964 // only encoding possibility is checked. 4965 bool Strict = Msg.IsSymbolic; 4966 4967 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 4968 Error(S, "invalid message id"); 4969 return false; 4970 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 4971 Error(S, Op.IsDefined ? 4972 "message does not support operations" : 4973 "missing message operation"); 4974 return false; 4975 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 4976 Error(S, "invalid operation id"); 4977 return false; 4978 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 4979 Error(S, "message operation does not support streams"); 4980 return false; 4981 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 4982 Error(S, "invalid message stream id"); 4983 return false; 4984 } 4985 return true; 4986 } 4987 4988 OperandMatchResultTy 4989 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4990 using namespace llvm::AMDGPU::SendMsg; 4991 4992 int64_t ImmVal = 0; 4993 SMLoc Loc = getLoc(); 4994 4995 // If parse failed, do not return error code 4996 // to avoid excessive error messages. 4997 if (trySkipId("sendmsg", AsmToken::LParen)) { 4998 OperandInfoTy Msg(ID_UNKNOWN_); 4999 OperandInfoTy Op(OP_NONE_); 5000 OperandInfoTy Stream(STREAM_ID_NONE_); 5001 if (parseSendMsgBody(Msg, Op, Stream) && 5002 validateSendMsg(Msg, Op, Stream, Loc)) { 5003 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5004 } 5005 } else if (parseExpr(ImmVal)) { 5006 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5007 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5008 } 5009 5010 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5011 return MatchOperand_Success; 5012 } 5013 5014 bool AMDGPUOperand::isSendMsg() const { 5015 return isImmTy(ImmTySendMsg); 5016 } 5017 5018 //===----------------------------------------------------------------------===// 5019 // v_interp 5020 //===----------------------------------------------------------------------===// 5021 5022 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5023 if (getLexer().getKind() != AsmToken::Identifier) 5024 return MatchOperand_NoMatch; 5025 5026 StringRef Str = Parser.getTok().getString(); 5027 int Slot = StringSwitch<int>(Str) 5028 .Case("p10", 0) 5029 .Case("p20", 1) 5030 .Case("p0", 2) 5031 .Default(-1); 5032 5033 SMLoc S = Parser.getTok().getLoc(); 5034 if (Slot == -1) 5035 return MatchOperand_ParseFail; 5036 5037 Parser.Lex(); 5038 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5039 AMDGPUOperand::ImmTyInterpSlot)); 5040 return MatchOperand_Success; 5041 } 5042 5043 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5044 if (getLexer().getKind() != AsmToken::Identifier) 5045 return MatchOperand_NoMatch; 5046 5047 StringRef Str = Parser.getTok().getString(); 5048 if (!Str.startswith("attr")) 5049 return MatchOperand_NoMatch; 5050 5051 StringRef Chan = Str.take_back(2); 5052 int AttrChan = StringSwitch<int>(Chan) 5053 .Case(".x", 0) 5054 .Case(".y", 1) 5055 .Case(".z", 2) 5056 .Case(".w", 3) 5057 .Default(-1); 5058 if (AttrChan == -1) 5059 return MatchOperand_ParseFail; 5060 5061 Str = Str.drop_back(2).drop_front(4); 5062 5063 uint8_t Attr; 5064 if (Str.getAsInteger(10, Attr)) 5065 return MatchOperand_ParseFail; 5066 5067 SMLoc S = Parser.getTok().getLoc(); 5068 Parser.Lex(); 5069 if (Attr > 63) { 5070 Error(S, "out of bounds attr"); 5071 return MatchOperand_Success; 5072 } 5073 5074 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5075 5076 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5077 AMDGPUOperand::ImmTyInterpAttr)); 5078 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5079 AMDGPUOperand::ImmTyAttrChan)); 5080 return MatchOperand_Success; 5081 } 5082 5083 //===----------------------------------------------------------------------===// 5084 // exp 5085 //===----------------------------------------------------------------------===// 5086 5087 void AMDGPUAsmParser::errorExpTgt() { 5088 Error(Parser.getTok().getLoc(), "invalid exp target"); 5089 } 5090 5091 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5092 uint8_t &Val) { 5093 if (Str == "null") { 5094 Val = 9; 5095 return MatchOperand_Success; 5096 } 5097 5098 if (Str.startswith("mrt")) { 5099 Str = Str.drop_front(3); 5100 if (Str == "z") { // == mrtz 5101 Val = 8; 5102 return MatchOperand_Success; 5103 } 5104 5105 if (Str.getAsInteger(10, Val)) 5106 return MatchOperand_ParseFail; 5107 5108 if (Val > 7) 5109 errorExpTgt(); 5110 5111 return MatchOperand_Success; 5112 } 5113 5114 if (Str.startswith("pos")) { 5115 Str = Str.drop_front(3); 5116 if (Str.getAsInteger(10, Val)) 5117 return MatchOperand_ParseFail; 5118 5119 if (Val > 4 || (Val == 4 && !isGFX10())) 5120 errorExpTgt(); 5121 5122 Val += 12; 5123 return MatchOperand_Success; 5124 } 5125 5126 if (isGFX10() && Str == "prim") { 5127 Val = 20; 5128 return MatchOperand_Success; 5129 } 5130 5131 if (Str.startswith("param")) { 5132 Str = Str.drop_front(5); 5133 if (Str.getAsInteger(10, Val)) 5134 return MatchOperand_ParseFail; 5135 5136 if (Val >= 32) 5137 errorExpTgt(); 5138 5139 Val += 32; 5140 return MatchOperand_Success; 5141 } 5142 5143 if (Str.startswith("invalid_target_")) { 5144 Str = Str.drop_front(15); 5145 if (Str.getAsInteger(10, Val)) 5146 return MatchOperand_ParseFail; 5147 5148 errorExpTgt(); 5149 return MatchOperand_Success; 5150 } 5151 5152 return MatchOperand_NoMatch; 5153 } 5154 5155 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5156 uint8_t Val; 5157 StringRef Str = Parser.getTok().getString(); 5158 5159 auto Res = parseExpTgtImpl(Str, Val); 5160 if (Res != MatchOperand_Success) 5161 return Res; 5162 5163 SMLoc S = Parser.getTok().getLoc(); 5164 Parser.Lex(); 5165 5166 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5167 AMDGPUOperand::ImmTyExpTgt)); 5168 return MatchOperand_Success; 5169 } 5170 5171 //===----------------------------------------------------------------------===// 5172 // parser helpers 5173 //===----------------------------------------------------------------------===// 5174 5175 bool 5176 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5177 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5178 } 5179 5180 bool 5181 AMDGPUAsmParser::isId(const StringRef Id) const { 5182 return isId(getToken(), Id); 5183 } 5184 5185 bool 5186 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5187 return getTokenKind() == Kind; 5188 } 5189 5190 bool 5191 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5192 if (isId(Id)) { 5193 lex(); 5194 return true; 5195 } 5196 return false; 5197 } 5198 5199 bool 5200 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5201 if (isId(Id) && peekToken().is(Kind)) { 5202 lex(); 5203 lex(); 5204 return true; 5205 } 5206 return false; 5207 } 5208 5209 bool 5210 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5211 if (isToken(Kind)) { 5212 lex(); 5213 return true; 5214 } 5215 return false; 5216 } 5217 5218 bool 5219 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5220 const StringRef ErrMsg) { 5221 if (!trySkipToken(Kind)) { 5222 Error(getLoc(), ErrMsg); 5223 return false; 5224 } 5225 return true; 5226 } 5227 5228 bool 5229 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5230 return !getParser().parseAbsoluteExpression(Imm); 5231 } 5232 5233 bool 5234 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5235 SMLoc S = getLoc(); 5236 5237 const MCExpr *Expr; 5238 if (Parser.parseExpression(Expr)) 5239 return false; 5240 5241 int64_t IntVal; 5242 if (Expr->evaluateAsAbsolute(IntVal)) { 5243 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5244 } else { 5245 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5246 } 5247 return true; 5248 } 5249 5250 bool 5251 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5252 if (isToken(AsmToken::String)) { 5253 Val = getToken().getStringContents(); 5254 lex(); 5255 return true; 5256 } else { 5257 Error(getLoc(), ErrMsg); 5258 return false; 5259 } 5260 } 5261 5262 AsmToken 5263 AMDGPUAsmParser::getToken() const { 5264 return Parser.getTok(); 5265 } 5266 5267 AsmToken 5268 AMDGPUAsmParser::peekToken() { 5269 return getLexer().peekTok(); 5270 } 5271 5272 void 5273 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5274 auto TokCount = getLexer().peekTokens(Tokens); 5275 5276 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5277 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5278 } 5279 5280 AsmToken::TokenKind 5281 AMDGPUAsmParser::getTokenKind() const { 5282 return getLexer().getKind(); 5283 } 5284 5285 SMLoc 5286 AMDGPUAsmParser::getLoc() const { 5287 return getToken().getLoc(); 5288 } 5289 5290 StringRef 5291 AMDGPUAsmParser::getTokenStr() const { 5292 return getToken().getString(); 5293 } 5294 5295 void 5296 AMDGPUAsmParser::lex() { 5297 Parser.Lex(); 5298 } 5299 5300 //===----------------------------------------------------------------------===// 5301 // swizzle 5302 //===----------------------------------------------------------------------===// 5303 5304 LLVM_READNONE 5305 static unsigned 5306 encodeBitmaskPerm(const unsigned AndMask, 5307 const unsigned OrMask, 5308 const unsigned XorMask) { 5309 using namespace llvm::AMDGPU::Swizzle; 5310 5311 return BITMASK_PERM_ENC | 5312 (AndMask << BITMASK_AND_SHIFT) | 5313 (OrMask << BITMASK_OR_SHIFT) | 5314 (XorMask << BITMASK_XOR_SHIFT); 5315 } 5316 5317 bool 5318 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5319 const unsigned MinVal, 5320 const unsigned MaxVal, 5321 const StringRef ErrMsg) { 5322 for (unsigned i = 0; i < OpNum; ++i) { 5323 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5324 return false; 5325 } 5326 SMLoc ExprLoc = Parser.getTok().getLoc(); 5327 if (!parseExpr(Op[i])) { 5328 return false; 5329 } 5330 if (Op[i] < MinVal || Op[i] > MaxVal) { 5331 Error(ExprLoc, ErrMsg); 5332 return false; 5333 } 5334 } 5335 5336 return true; 5337 } 5338 5339 bool 5340 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5341 using namespace llvm::AMDGPU::Swizzle; 5342 5343 int64_t Lane[LANE_NUM]; 5344 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5345 "expected a 2-bit lane id")) { 5346 Imm = QUAD_PERM_ENC; 5347 for (unsigned I = 0; I < LANE_NUM; ++I) { 5348 Imm |= Lane[I] << (LANE_SHIFT * I); 5349 } 5350 return true; 5351 } 5352 return false; 5353 } 5354 5355 bool 5356 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5357 using namespace llvm::AMDGPU::Swizzle; 5358 5359 SMLoc S = Parser.getTok().getLoc(); 5360 int64_t GroupSize; 5361 int64_t LaneIdx; 5362 5363 if (!parseSwizzleOperands(1, &GroupSize, 5364 2, 32, 5365 "group size must be in the interval [2,32]")) { 5366 return false; 5367 } 5368 if (!isPowerOf2_64(GroupSize)) { 5369 Error(S, "group size must be a power of two"); 5370 return false; 5371 } 5372 if (parseSwizzleOperands(1, &LaneIdx, 5373 0, GroupSize - 1, 5374 "lane id must be in the interval [0,group size - 1]")) { 5375 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5376 return true; 5377 } 5378 return false; 5379 } 5380 5381 bool 5382 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5383 using namespace llvm::AMDGPU::Swizzle; 5384 5385 SMLoc S = Parser.getTok().getLoc(); 5386 int64_t GroupSize; 5387 5388 if (!parseSwizzleOperands(1, &GroupSize, 5389 2, 32, "group size must be in the interval [2,32]")) { 5390 return false; 5391 } 5392 if (!isPowerOf2_64(GroupSize)) { 5393 Error(S, "group size must be a power of two"); 5394 return false; 5395 } 5396 5397 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5398 return true; 5399 } 5400 5401 bool 5402 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5403 using namespace llvm::AMDGPU::Swizzle; 5404 5405 SMLoc S = Parser.getTok().getLoc(); 5406 int64_t GroupSize; 5407 5408 if (!parseSwizzleOperands(1, &GroupSize, 5409 1, 16, "group size must be in the interval [1,16]")) { 5410 return false; 5411 } 5412 if (!isPowerOf2_64(GroupSize)) { 5413 Error(S, "group size must be a power of two"); 5414 return false; 5415 } 5416 5417 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5418 return true; 5419 } 5420 5421 bool 5422 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5423 using namespace llvm::AMDGPU::Swizzle; 5424 5425 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5426 return false; 5427 } 5428 5429 StringRef Ctl; 5430 SMLoc StrLoc = Parser.getTok().getLoc(); 5431 if (!parseString(Ctl)) { 5432 return false; 5433 } 5434 if (Ctl.size() != BITMASK_WIDTH) { 5435 Error(StrLoc, "expected a 5-character mask"); 5436 return false; 5437 } 5438 5439 unsigned AndMask = 0; 5440 unsigned OrMask = 0; 5441 unsigned XorMask = 0; 5442 5443 for (size_t i = 0; i < Ctl.size(); ++i) { 5444 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5445 switch(Ctl[i]) { 5446 default: 5447 Error(StrLoc, "invalid mask"); 5448 return false; 5449 case '0': 5450 break; 5451 case '1': 5452 OrMask |= Mask; 5453 break; 5454 case 'p': 5455 AndMask |= Mask; 5456 break; 5457 case 'i': 5458 AndMask |= Mask; 5459 XorMask |= Mask; 5460 break; 5461 } 5462 } 5463 5464 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5465 return true; 5466 } 5467 5468 bool 5469 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5470 5471 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5472 5473 if (!parseExpr(Imm)) { 5474 return false; 5475 } 5476 if (!isUInt<16>(Imm)) { 5477 Error(OffsetLoc, "expected a 16-bit offset"); 5478 return false; 5479 } 5480 return true; 5481 } 5482 5483 bool 5484 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5485 using namespace llvm::AMDGPU::Swizzle; 5486 5487 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5488 5489 SMLoc ModeLoc = Parser.getTok().getLoc(); 5490 bool Ok = false; 5491 5492 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5493 Ok = parseSwizzleQuadPerm(Imm); 5494 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5495 Ok = parseSwizzleBitmaskPerm(Imm); 5496 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5497 Ok = parseSwizzleBroadcast(Imm); 5498 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5499 Ok = parseSwizzleSwap(Imm); 5500 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5501 Ok = parseSwizzleReverse(Imm); 5502 } else { 5503 Error(ModeLoc, "expected a swizzle mode"); 5504 } 5505 5506 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5507 } 5508 5509 return false; 5510 } 5511 5512 OperandMatchResultTy 5513 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5514 SMLoc S = Parser.getTok().getLoc(); 5515 int64_t Imm = 0; 5516 5517 if (trySkipId("offset")) { 5518 5519 bool Ok = false; 5520 if (skipToken(AsmToken::Colon, "expected a colon")) { 5521 if (trySkipId("swizzle")) { 5522 Ok = parseSwizzleMacro(Imm); 5523 } else { 5524 Ok = parseSwizzleOffset(Imm); 5525 } 5526 } 5527 5528 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5529 5530 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5531 } else { 5532 // Swizzle "offset" operand is optional. 5533 // If it is omitted, try parsing other optional operands. 5534 return parseOptionalOpr(Operands); 5535 } 5536 } 5537 5538 bool 5539 AMDGPUOperand::isSwizzle() const { 5540 return isImmTy(ImmTySwizzle); 5541 } 5542 5543 //===----------------------------------------------------------------------===// 5544 // VGPR Index Mode 5545 //===----------------------------------------------------------------------===// 5546 5547 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5548 5549 using namespace llvm::AMDGPU::VGPRIndexMode; 5550 5551 if (trySkipToken(AsmToken::RParen)) { 5552 return OFF; 5553 } 5554 5555 int64_t Imm = 0; 5556 5557 while (true) { 5558 unsigned Mode = 0; 5559 SMLoc S = Parser.getTok().getLoc(); 5560 5561 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5562 if (trySkipId(IdSymbolic[ModeId])) { 5563 Mode = 1 << ModeId; 5564 break; 5565 } 5566 } 5567 5568 if (Mode == 0) { 5569 Error(S, (Imm == 0)? 5570 "expected a VGPR index mode or a closing parenthesis" : 5571 "expected a VGPR index mode"); 5572 break; 5573 } 5574 5575 if (Imm & Mode) { 5576 Error(S, "duplicate VGPR index mode"); 5577 break; 5578 } 5579 Imm |= Mode; 5580 5581 if (trySkipToken(AsmToken::RParen)) 5582 break; 5583 if (!skipToken(AsmToken::Comma, 5584 "expected a comma or a closing parenthesis")) 5585 break; 5586 } 5587 5588 return Imm; 5589 } 5590 5591 OperandMatchResultTy 5592 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5593 5594 int64_t Imm = 0; 5595 SMLoc S = Parser.getTok().getLoc(); 5596 5597 if (getLexer().getKind() == AsmToken::Identifier && 5598 Parser.getTok().getString() == "gpr_idx" && 5599 getLexer().peekTok().is(AsmToken::LParen)) { 5600 5601 Parser.Lex(); 5602 Parser.Lex(); 5603 5604 // If parse failed, trigger an error but do not return error code 5605 // to avoid excessive error messages. 5606 Imm = parseGPRIdxMacro(); 5607 5608 } else { 5609 if (getParser().parseAbsoluteExpression(Imm)) 5610 return MatchOperand_NoMatch; 5611 if (Imm < 0 || !isUInt<4>(Imm)) { 5612 Error(S, "invalid immediate: only 4-bit values are legal"); 5613 } 5614 } 5615 5616 Operands.push_back( 5617 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5618 return MatchOperand_Success; 5619 } 5620 5621 bool AMDGPUOperand::isGPRIdxMode() const { 5622 return isImmTy(ImmTyGprIdxMode); 5623 } 5624 5625 //===----------------------------------------------------------------------===// 5626 // sopp branch targets 5627 //===----------------------------------------------------------------------===// 5628 5629 OperandMatchResultTy 5630 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5631 5632 // Make sure we are not parsing something 5633 // that looks like a label or an expression but is not. 5634 // This will improve error messages. 5635 if (isRegister() || isModifier()) 5636 return MatchOperand_NoMatch; 5637 5638 if (parseExpr(Operands)) { 5639 5640 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5641 assert(Opr.isImm() || Opr.isExpr()); 5642 SMLoc Loc = Opr.getStartLoc(); 5643 5644 // Currently we do not support arbitrary expressions as branch targets. 5645 // Only labels and absolute expressions are accepted. 5646 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5647 Error(Loc, "expected an absolute expression or a label"); 5648 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5649 Error(Loc, "expected a 16-bit signed jump offset"); 5650 } 5651 } 5652 5653 return MatchOperand_Success; // avoid excessive error messages 5654 } 5655 5656 //===----------------------------------------------------------------------===// 5657 // Boolean holding registers 5658 //===----------------------------------------------------------------------===// 5659 5660 OperandMatchResultTy 5661 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5662 return parseReg(Operands); 5663 } 5664 5665 //===----------------------------------------------------------------------===// 5666 // mubuf 5667 //===----------------------------------------------------------------------===// 5668 5669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5670 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5671 } 5672 5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5674 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5675 } 5676 5677 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5678 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5679 } 5680 5681 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5682 const OperandVector &Operands, 5683 bool IsAtomic, 5684 bool IsAtomicReturn, 5685 bool IsLds) { 5686 bool IsLdsOpcode = IsLds; 5687 bool HasLdsModifier = false; 5688 OptionalImmIndexMap OptionalIdx; 5689 assert(IsAtomicReturn ? IsAtomic : true); 5690 unsigned FirstOperandIdx = 1; 5691 5692 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5693 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5694 5695 // Add the register arguments 5696 if (Op.isReg()) { 5697 Op.addRegOperands(Inst, 1); 5698 // Insert a tied src for atomic return dst. 5699 // This cannot be postponed as subsequent calls to 5700 // addImmOperands rely on correct number of MC operands. 5701 if (IsAtomicReturn && i == FirstOperandIdx) 5702 Op.addRegOperands(Inst, 1); 5703 continue; 5704 } 5705 5706 // Handle the case where soffset is an immediate 5707 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5708 Op.addImmOperands(Inst, 1); 5709 continue; 5710 } 5711 5712 HasLdsModifier |= Op.isLDS(); 5713 5714 // Handle tokens like 'offen' which are sometimes hard-coded into the 5715 // asm string. There are no MCInst operands for these. 5716 if (Op.isToken()) { 5717 continue; 5718 } 5719 assert(Op.isImm()); 5720 5721 // Handle optional arguments 5722 OptionalIdx[Op.getImmTy()] = i; 5723 } 5724 5725 // This is a workaround for an llvm quirk which may result in an 5726 // incorrect instruction selection. Lds and non-lds versions of 5727 // MUBUF instructions are identical except that lds versions 5728 // have mandatory 'lds' modifier. However this modifier follows 5729 // optional modifiers and llvm asm matcher regards this 'lds' 5730 // modifier as an optional one. As a result, an lds version 5731 // of opcode may be selected even if it has no 'lds' modifier. 5732 if (IsLdsOpcode && !HasLdsModifier) { 5733 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5734 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5735 Inst.setOpcode(NoLdsOpcode); 5736 IsLdsOpcode = false; 5737 } 5738 } 5739 5740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5741 if (!IsAtomic) { // glc is hard-coded. 5742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5743 } 5744 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5745 5746 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5748 } 5749 5750 if (isGFX10()) 5751 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5752 } 5753 5754 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5755 OptionalImmIndexMap OptionalIdx; 5756 5757 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5758 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5759 5760 // Add the register arguments 5761 if (Op.isReg()) { 5762 Op.addRegOperands(Inst, 1); 5763 continue; 5764 } 5765 5766 // Handle the case where soffset is an immediate 5767 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5768 Op.addImmOperands(Inst, 1); 5769 continue; 5770 } 5771 5772 // Handle tokens like 'offen' which are sometimes hard-coded into the 5773 // asm string. There are no MCInst operands for these. 5774 if (Op.isToken()) { 5775 continue; 5776 } 5777 assert(Op.isImm()); 5778 5779 // Handle optional arguments 5780 OptionalIdx[Op.getImmTy()] = i; 5781 } 5782 5783 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5784 AMDGPUOperand::ImmTyOffset); 5785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5789 5790 if (isGFX10()) 5791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5792 } 5793 5794 //===----------------------------------------------------------------------===// 5795 // mimg 5796 //===----------------------------------------------------------------------===// 5797 5798 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5799 bool IsAtomic) { 5800 unsigned I = 1; 5801 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5802 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5803 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5804 } 5805 5806 if (IsAtomic) { 5807 // Add src, same as dst 5808 assert(Desc.getNumDefs() == 1); 5809 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5810 } 5811 5812 OptionalImmIndexMap OptionalIdx; 5813 5814 for (unsigned E = Operands.size(); I != E; ++I) { 5815 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5816 5817 // Add the register arguments 5818 if (Op.isReg()) { 5819 Op.addRegOperands(Inst, 1); 5820 } else if (Op.isImmModifier()) { 5821 OptionalIdx[Op.getImmTy()] = I; 5822 } else if (!Op.isToken()) { 5823 llvm_unreachable("unexpected operand type"); 5824 } 5825 } 5826 5827 bool IsGFX10 = isGFX10(); 5828 5829 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5830 if (IsGFX10) 5831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5833 if (IsGFX10) 5834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5839 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5840 if (!IsGFX10) 5841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5842 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5843 } 5844 5845 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5846 cvtMIMG(Inst, Operands, true); 5847 } 5848 5849 //===----------------------------------------------------------------------===// 5850 // smrd 5851 //===----------------------------------------------------------------------===// 5852 5853 bool AMDGPUOperand::isSMRDOffset8() const { 5854 return isImm() && isUInt<8>(getImm()); 5855 } 5856 5857 bool AMDGPUOperand::isSMRDOffset20() const { 5858 return isImm() && isUInt<20>(getImm()); 5859 } 5860 5861 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5862 // 32-bit literals are only supported on CI and we only want to use them 5863 // when the offset is > 8-bits. 5864 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5865 } 5866 5867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5868 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5869 } 5870 5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5872 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5873 } 5874 5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5876 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5877 } 5878 5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5880 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5881 } 5882 5883 //===----------------------------------------------------------------------===// 5884 // vop3 5885 //===----------------------------------------------------------------------===// 5886 5887 static bool ConvertOmodMul(int64_t &Mul) { 5888 if (Mul != 1 && Mul != 2 && Mul != 4) 5889 return false; 5890 5891 Mul >>= 1; 5892 return true; 5893 } 5894 5895 static bool ConvertOmodDiv(int64_t &Div) { 5896 if (Div == 1) { 5897 Div = 0; 5898 return true; 5899 } 5900 5901 if (Div == 2) { 5902 Div = 3; 5903 return true; 5904 } 5905 5906 return false; 5907 } 5908 5909 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5910 if (BoundCtrl == 0) { 5911 BoundCtrl = 1; 5912 return true; 5913 } 5914 5915 if (BoundCtrl == -1) { 5916 BoundCtrl = 0; 5917 return true; 5918 } 5919 5920 return false; 5921 } 5922 5923 // Note: the order in this table matches the order of operands in AsmString. 5924 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5925 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5926 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5927 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5928 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5929 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5930 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5931 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5932 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5933 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5934 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5935 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5936 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5937 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5938 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5939 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5940 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5941 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5942 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5943 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5944 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5945 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5946 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5947 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5948 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5949 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5950 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5951 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5952 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5953 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5954 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5955 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5956 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5957 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5958 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5959 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5960 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5961 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5962 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5963 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5964 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 5965 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 5966 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 5967 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 5968 }; 5969 5970 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5971 unsigned size = Operands.size(); 5972 assert(size > 0); 5973 5974 OperandMatchResultTy res = parseOptionalOpr(Operands); 5975 5976 // This is a hack to enable hardcoded mandatory operands which follow 5977 // optional operands. 5978 // 5979 // Current design assumes that all operands after the first optional operand 5980 // are also optional. However implementation of some instructions violates 5981 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5982 // 5983 // To alleviate this problem, we have to (implicitly) parse extra operands 5984 // to make sure autogenerated parser of custom operands never hit hardcoded 5985 // mandatory operands. 5986 5987 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5988 5989 // We have parsed the first optional operand. 5990 // Parse as many operands as necessary to skip all mandatory operands. 5991 5992 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5993 if (res != MatchOperand_Success || 5994 getLexer().is(AsmToken::EndOfStatement)) break; 5995 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5996 res = parseOptionalOpr(Operands); 5997 } 5998 } 5999 6000 return res; 6001 } 6002 6003 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6004 OperandMatchResultTy res; 6005 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6006 // try to parse any optional operand here 6007 if (Op.IsBit) { 6008 res = parseNamedBit(Op.Name, Operands, Op.Type); 6009 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6010 res = parseOModOperand(Operands); 6011 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6012 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6013 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6014 res = parseSDWASel(Operands, Op.Name, Op.Type); 6015 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6016 res = parseSDWADstUnused(Operands); 6017 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6018 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6019 Op.Type == AMDGPUOperand::ImmTyNegLo || 6020 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6021 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6022 Op.ConvertResult); 6023 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6024 res = parseDim(Operands); 6025 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6026 res = parseDfmtNfmt(Operands); 6027 } else { 6028 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6029 } 6030 if (res != MatchOperand_NoMatch) { 6031 return res; 6032 } 6033 } 6034 return MatchOperand_NoMatch; 6035 } 6036 6037 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6038 StringRef Name = Parser.getTok().getString(); 6039 if (Name == "mul") { 6040 return parseIntWithPrefix("mul", Operands, 6041 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6042 } 6043 6044 if (Name == "div") { 6045 return parseIntWithPrefix("div", Operands, 6046 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6047 } 6048 6049 return MatchOperand_NoMatch; 6050 } 6051 6052 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6053 cvtVOP3P(Inst, Operands); 6054 6055 int Opc = Inst.getOpcode(); 6056 6057 int SrcNum; 6058 const int Ops[] = { AMDGPU::OpName::src0, 6059 AMDGPU::OpName::src1, 6060 AMDGPU::OpName::src2 }; 6061 for (SrcNum = 0; 6062 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6063 ++SrcNum); 6064 assert(SrcNum > 0); 6065 6066 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6067 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6068 6069 if ((OpSel & (1 << SrcNum)) != 0) { 6070 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6071 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6072 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6073 } 6074 } 6075 6076 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6077 // 1. This operand is input modifiers 6078 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6079 // 2. This is not last operand 6080 && Desc.NumOperands > (OpNum + 1) 6081 // 3. Next operand is register class 6082 && Desc.OpInfo[OpNum + 1].RegClass != -1 6083 // 4. Next register is not tied to any other operand 6084 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6085 } 6086 6087 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6088 { 6089 OptionalImmIndexMap OptionalIdx; 6090 unsigned Opc = Inst.getOpcode(); 6091 6092 unsigned I = 1; 6093 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6094 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6095 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6096 } 6097 6098 for (unsigned E = Operands.size(); I != E; ++I) { 6099 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6100 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6101 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6102 } else if (Op.isInterpSlot() || 6103 Op.isInterpAttr() || 6104 Op.isAttrChan()) { 6105 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6106 } else if (Op.isImmModifier()) { 6107 OptionalIdx[Op.getImmTy()] = I; 6108 } else { 6109 llvm_unreachable("unhandled operand type"); 6110 } 6111 } 6112 6113 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6115 } 6116 6117 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6119 } 6120 6121 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6123 } 6124 } 6125 6126 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6127 OptionalImmIndexMap &OptionalIdx) { 6128 unsigned Opc = Inst.getOpcode(); 6129 6130 unsigned I = 1; 6131 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6132 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6133 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6134 } 6135 6136 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6137 // This instruction has src modifiers 6138 for (unsigned E = Operands.size(); I != E; ++I) { 6139 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6140 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6141 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6142 } else if (Op.isImmModifier()) { 6143 OptionalIdx[Op.getImmTy()] = I; 6144 } else if (Op.isRegOrImm()) { 6145 Op.addRegOrImmOperands(Inst, 1); 6146 } else { 6147 llvm_unreachable("unhandled operand type"); 6148 } 6149 } 6150 } else { 6151 // No src modifiers 6152 for (unsigned E = Operands.size(); I != E; ++I) { 6153 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6154 if (Op.isMod()) { 6155 OptionalIdx[Op.getImmTy()] = I; 6156 } else { 6157 Op.addRegOrImmOperands(Inst, 1); 6158 } 6159 } 6160 } 6161 6162 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6164 } 6165 6166 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6168 } 6169 6170 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6171 // it has src2 register operand that is tied to dst operand 6172 // we don't allow modifiers for this operand in assembler so src2_modifiers 6173 // should be 0. 6174 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6175 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6176 Opc == AMDGPU::V_MAC_F32_e64_vi || 6177 Opc == AMDGPU::V_MAC_F16_e64_vi || 6178 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6179 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6180 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6181 auto it = Inst.begin(); 6182 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6183 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6184 ++it; 6185 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6186 } 6187 } 6188 6189 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6190 OptionalImmIndexMap OptionalIdx; 6191 cvtVOP3(Inst, Operands, OptionalIdx); 6192 } 6193 6194 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6195 const OperandVector &Operands) { 6196 OptionalImmIndexMap OptIdx; 6197 const int Opc = Inst.getOpcode(); 6198 const MCInstrDesc &Desc = MII.get(Opc); 6199 6200 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6201 6202 cvtVOP3(Inst, Operands, OptIdx); 6203 6204 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6205 assert(!IsPacked); 6206 Inst.addOperand(Inst.getOperand(0)); 6207 } 6208 6209 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6210 // instruction, and then figure out where to actually put the modifiers 6211 6212 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6213 6214 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6215 if (OpSelHiIdx != -1) { 6216 int DefaultVal = IsPacked ? -1 : 0; 6217 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6218 DefaultVal); 6219 } 6220 6221 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6222 if (NegLoIdx != -1) { 6223 assert(IsPacked); 6224 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6225 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6226 } 6227 6228 const int Ops[] = { AMDGPU::OpName::src0, 6229 AMDGPU::OpName::src1, 6230 AMDGPU::OpName::src2 }; 6231 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6232 AMDGPU::OpName::src1_modifiers, 6233 AMDGPU::OpName::src2_modifiers }; 6234 6235 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6236 6237 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6238 unsigned OpSelHi = 0; 6239 unsigned NegLo = 0; 6240 unsigned NegHi = 0; 6241 6242 if (OpSelHiIdx != -1) { 6243 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6244 } 6245 6246 if (NegLoIdx != -1) { 6247 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6248 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6249 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6250 } 6251 6252 for (int J = 0; J < 3; ++J) { 6253 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6254 if (OpIdx == -1) 6255 break; 6256 6257 uint32_t ModVal = 0; 6258 6259 if ((OpSel & (1 << J)) != 0) 6260 ModVal |= SISrcMods::OP_SEL_0; 6261 6262 if ((OpSelHi & (1 << J)) != 0) 6263 ModVal |= SISrcMods::OP_SEL_1; 6264 6265 if ((NegLo & (1 << J)) != 0) 6266 ModVal |= SISrcMods::NEG; 6267 6268 if ((NegHi & (1 << J)) != 0) 6269 ModVal |= SISrcMods::NEG_HI; 6270 6271 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6272 6273 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6274 } 6275 } 6276 6277 //===----------------------------------------------------------------------===// 6278 // dpp 6279 //===----------------------------------------------------------------------===// 6280 6281 bool AMDGPUOperand::isDPP8() const { 6282 return isImmTy(ImmTyDPP8); 6283 } 6284 6285 bool AMDGPUOperand::isDPPCtrl() const { 6286 using namespace AMDGPU::DPP; 6287 6288 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6289 if (result) { 6290 int64_t Imm = getImm(); 6291 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6292 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6293 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6294 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6295 (Imm == DppCtrl::WAVE_SHL1) || 6296 (Imm == DppCtrl::WAVE_ROL1) || 6297 (Imm == DppCtrl::WAVE_SHR1) || 6298 (Imm == DppCtrl::WAVE_ROR1) || 6299 (Imm == DppCtrl::ROW_MIRROR) || 6300 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6301 (Imm == DppCtrl::BCAST15) || 6302 (Imm == DppCtrl::BCAST31) || 6303 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6304 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6305 } 6306 return false; 6307 } 6308 6309 //===----------------------------------------------------------------------===// 6310 // mAI 6311 //===----------------------------------------------------------------------===// 6312 6313 bool AMDGPUOperand::isBLGP() const { 6314 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6315 } 6316 6317 bool AMDGPUOperand::isCBSZ() const { 6318 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6319 } 6320 6321 bool AMDGPUOperand::isABID() const { 6322 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6323 } 6324 6325 bool AMDGPUOperand::isS16Imm() const { 6326 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6327 } 6328 6329 bool AMDGPUOperand::isU16Imm() const { 6330 return isImm() && isUInt<16>(getImm()); 6331 } 6332 6333 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6334 if (!isGFX10()) 6335 return MatchOperand_NoMatch; 6336 6337 SMLoc S = Parser.getTok().getLoc(); 6338 6339 if (getLexer().isNot(AsmToken::Identifier)) 6340 return MatchOperand_NoMatch; 6341 if (getLexer().getTok().getString() != "dim") 6342 return MatchOperand_NoMatch; 6343 6344 Parser.Lex(); 6345 if (getLexer().isNot(AsmToken::Colon)) 6346 return MatchOperand_ParseFail; 6347 6348 Parser.Lex(); 6349 6350 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6351 // integer. 6352 std::string Token; 6353 if (getLexer().is(AsmToken::Integer)) { 6354 SMLoc Loc = getLexer().getTok().getEndLoc(); 6355 Token = getLexer().getTok().getString(); 6356 Parser.Lex(); 6357 if (getLexer().getTok().getLoc() != Loc) 6358 return MatchOperand_ParseFail; 6359 } 6360 if (getLexer().isNot(AsmToken::Identifier)) 6361 return MatchOperand_ParseFail; 6362 Token += getLexer().getTok().getString(); 6363 6364 StringRef DimId = Token; 6365 if (DimId.startswith("SQ_RSRC_IMG_")) 6366 DimId = DimId.substr(12); 6367 6368 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6369 if (!DimInfo) 6370 return MatchOperand_ParseFail; 6371 6372 Parser.Lex(); 6373 6374 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6375 AMDGPUOperand::ImmTyDim)); 6376 return MatchOperand_Success; 6377 } 6378 6379 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6380 SMLoc S = Parser.getTok().getLoc(); 6381 StringRef Prefix; 6382 6383 if (getLexer().getKind() == AsmToken::Identifier) { 6384 Prefix = Parser.getTok().getString(); 6385 } else { 6386 return MatchOperand_NoMatch; 6387 } 6388 6389 if (Prefix != "dpp8") 6390 return parseDPPCtrl(Operands); 6391 if (!isGFX10()) 6392 return MatchOperand_NoMatch; 6393 6394 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6395 6396 int64_t Sels[8]; 6397 6398 Parser.Lex(); 6399 if (getLexer().isNot(AsmToken::Colon)) 6400 return MatchOperand_ParseFail; 6401 6402 Parser.Lex(); 6403 if (getLexer().isNot(AsmToken::LBrac)) 6404 return MatchOperand_ParseFail; 6405 6406 Parser.Lex(); 6407 if (getParser().parseAbsoluteExpression(Sels[0])) 6408 return MatchOperand_ParseFail; 6409 if (0 > Sels[0] || 7 < Sels[0]) 6410 return MatchOperand_ParseFail; 6411 6412 for (size_t i = 1; i < 8; ++i) { 6413 if (getLexer().isNot(AsmToken::Comma)) 6414 return MatchOperand_ParseFail; 6415 6416 Parser.Lex(); 6417 if (getParser().parseAbsoluteExpression(Sels[i])) 6418 return MatchOperand_ParseFail; 6419 if (0 > Sels[i] || 7 < Sels[i]) 6420 return MatchOperand_ParseFail; 6421 } 6422 6423 if (getLexer().isNot(AsmToken::RBrac)) 6424 return MatchOperand_ParseFail; 6425 Parser.Lex(); 6426 6427 unsigned DPP8 = 0; 6428 for (size_t i = 0; i < 8; ++i) 6429 DPP8 |= (Sels[i] << (i * 3)); 6430 6431 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6432 return MatchOperand_Success; 6433 } 6434 6435 OperandMatchResultTy 6436 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6437 using namespace AMDGPU::DPP; 6438 6439 SMLoc S = Parser.getTok().getLoc(); 6440 StringRef Prefix; 6441 int64_t Int; 6442 6443 if (getLexer().getKind() == AsmToken::Identifier) { 6444 Prefix = Parser.getTok().getString(); 6445 } else { 6446 return MatchOperand_NoMatch; 6447 } 6448 6449 if (Prefix == "row_mirror") { 6450 Int = DppCtrl::ROW_MIRROR; 6451 Parser.Lex(); 6452 } else if (Prefix == "row_half_mirror") { 6453 Int = DppCtrl::ROW_HALF_MIRROR; 6454 Parser.Lex(); 6455 } else { 6456 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6457 if (Prefix != "quad_perm" 6458 && Prefix != "row_shl" 6459 && Prefix != "row_shr" 6460 && Prefix != "row_ror" 6461 && Prefix != "wave_shl" 6462 && Prefix != "wave_rol" 6463 && Prefix != "wave_shr" 6464 && Prefix != "wave_ror" 6465 && Prefix != "row_bcast" 6466 && Prefix != "row_share" 6467 && Prefix != "row_xmask") { 6468 return MatchOperand_NoMatch; 6469 } 6470 6471 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6472 return MatchOperand_NoMatch; 6473 6474 if (!isVI() && !isGFX9() && 6475 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6476 Prefix == "wave_rol" || Prefix == "wave_ror" || 6477 Prefix == "row_bcast")) 6478 return MatchOperand_NoMatch; 6479 6480 Parser.Lex(); 6481 if (getLexer().isNot(AsmToken::Colon)) 6482 return MatchOperand_ParseFail; 6483 6484 if (Prefix == "quad_perm") { 6485 // quad_perm:[%d,%d,%d,%d] 6486 Parser.Lex(); 6487 if (getLexer().isNot(AsmToken::LBrac)) 6488 return MatchOperand_ParseFail; 6489 Parser.Lex(); 6490 6491 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6492 return MatchOperand_ParseFail; 6493 6494 for (int i = 0; i < 3; ++i) { 6495 if (getLexer().isNot(AsmToken::Comma)) 6496 return MatchOperand_ParseFail; 6497 Parser.Lex(); 6498 6499 int64_t Temp; 6500 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6501 return MatchOperand_ParseFail; 6502 const int shift = i*2 + 2; 6503 Int += (Temp << shift); 6504 } 6505 6506 if (getLexer().isNot(AsmToken::RBrac)) 6507 return MatchOperand_ParseFail; 6508 Parser.Lex(); 6509 } else { 6510 // sel:%d 6511 Parser.Lex(); 6512 if (getParser().parseAbsoluteExpression(Int)) 6513 return MatchOperand_ParseFail; 6514 6515 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6516 Int |= DppCtrl::ROW_SHL0; 6517 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6518 Int |= DppCtrl::ROW_SHR0; 6519 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6520 Int |= DppCtrl::ROW_ROR0; 6521 } else if (Prefix == "wave_shl" && 1 == Int) { 6522 Int = DppCtrl::WAVE_SHL1; 6523 } else if (Prefix == "wave_rol" && 1 == Int) { 6524 Int = DppCtrl::WAVE_ROL1; 6525 } else if (Prefix == "wave_shr" && 1 == Int) { 6526 Int = DppCtrl::WAVE_SHR1; 6527 } else if (Prefix == "wave_ror" && 1 == Int) { 6528 Int = DppCtrl::WAVE_ROR1; 6529 } else if (Prefix == "row_bcast") { 6530 if (Int == 15) { 6531 Int = DppCtrl::BCAST15; 6532 } else if (Int == 31) { 6533 Int = DppCtrl::BCAST31; 6534 } else { 6535 return MatchOperand_ParseFail; 6536 } 6537 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6538 Int |= DppCtrl::ROW_SHARE_FIRST; 6539 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6540 Int |= DppCtrl::ROW_XMASK_FIRST; 6541 } else { 6542 return MatchOperand_ParseFail; 6543 } 6544 } 6545 } 6546 6547 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6548 return MatchOperand_Success; 6549 } 6550 6551 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6552 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6553 } 6554 6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6556 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6557 } 6558 6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6560 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6561 } 6562 6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6564 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6565 } 6566 6567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6568 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6569 } 6570 6571 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6572 OptionalImmIndexMap OptionalIdx; 6573 6574 unsigned I = 1; 6575 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6576 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6577 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6578 } 6579 6580 int Fi = 0; 6581 for (unsigned E = Operands.size(); I != E; ++I) { 6582 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6583 MCOI::TIED_TO); 6584 if (TiedTo != -1) { 6585 assert((unsigned)TiedTo < Inst.getNumOperands()); 6586 // handle tied old or src2 for MAC instructions 6587 Inst.addOperand(Inst.getOperand(TiedTo)); 6588 } 6589 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6590 // Add the register arguments 6591 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6592 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6593 // Skip it. 6594 continue; 6595 } 6596 6597 if (IsDPP8) { 6598 if (Op.isDPP8()) { 6599 Op.addImmOperands(Inst, 1); 6600 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6601 Op.addRegWithFPInputModsOperands(Inst, 2); 6602 } else if (Op.isFI()) { 6603 Fi = Op.getImm(); 6604 } else if (Op.isReg()) { 6605 Op.addRegOperands(Inst, 1); 6606 } else { 6607 llvm_unreachable("Invalid operand type"); 6608 } 6609 } else { 6610 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6611 Op.addRegWithFPInputModsOperands(Inst, 2); 6612 } else if (Op.isDPPCtrl()) { 6613 Op.addImmOperands(Inst, 1); 6614 } else if (Op.isImm()) { 6615 // Handle optional arguments 6616 OptionalIdx[Op.getImmTy()] = I; 6617 } else { 6618 llvm_unreachable("Invalid operand type"); 6619 } 6620 } 6621 } 6622 6623 if (IsDPP8) { 6624 using namespace llvm::AMDGPU::DPP; 6625 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6626 } else { 6627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6630 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6632 } 6633 } 6634 } 6635 6636 //===----------------------------------------------------------------------===// 6637 // sdwa 6638 //===----------------------------------------------------------------------===// 6639 6640 OperandMatchResultTy 6641 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6642 AMDGPUOperand::ImmTy Type) { 6643 using namespace llvm::AMDGPU::SDWA; 6644 6645 SMLoc S = Parser.getTok().getLoc(); 6646 StringRef Value; 6647 OperandMatchResultTy res; 6648 6649 res = parseStringWithPrefix(Prefix, Value); 6650 if (res != MatchOperand_Success) { 6651 return res; 6652 } 6653 6654 int64_t Int; 6655 Int = StringSwitch<int64_t>(Value) 6656 .Case("BYTE_0", SdwaSel::BYTE_0) 6657 .Case("BYTE_1", SdwaSel::BYTE_1) 6658 .Case("BYTE_2", SdwaSel::BYTE_2) 6659 .Case("BYTE_3", SdwaSel::BYTE_3) 6660 .Case("WORD_0", SdwaSel::WORD_0) 6661 .Case("WORD_1", SdwaSel::WORD_1) 6662 .Case("DWORD", SdwaSel::DWORD) 6663 .Default(0xffffffff); 6664 Parser.Lex(); // eat last token 6665 6666 if (Int == 0xffffffff) { 6667 return MatchOperand_ParseFail; 6668 } 6669 6670 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6671 return MatchOperand_Success; 6672 } 6673 6674 OperandMatchResultTy 6675 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6676 using namespace llvm::AMDGPU::SDWA; 6677 6678 SMLoc S = Parser.getTok().getLoc(); 6679 StringRef Value; 6680 OperandMatchResultTy res; 6681 6682 res = parseStringWithPrefix("dst_unused", Value); 6683 if (res != MatchOperand_Success) { 6684 return res; 6685 } 6686 6687 int64_t Int; 6688 Int = StringSwitch<int64_t>(Value) 6689 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6690 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6691 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6692 .Default(0xffffffff); 6693 Parser.Lex(); // eat last token 6694 6695 if (Int == 0xffffffff) { 6696 return MatchOperand_ParseFail; 6697 } 6698 6699 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6700 return MatchOperand_Success; 6701 } 6702 6703 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6704 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6705 } 6706 6707 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6708 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6709 } 6710 6711 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6712 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6713 } 6714 6715 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6716 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6717 } 6718 6719 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6720 uint64_t BasicInstType, bool skipVcc) { 6721 using namespace llvm::AMDGPU::SDWA; 6722 6723 OptionalImmIndexMap OptionalIdx; 6724 bool skippedVcc = false; 6725 6726 unsigned I = 1; 6727 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6728 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6729 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6730 } 6731 6732 for (unsigned E = Operands.size(); I != E; ++I) { 6733 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6734 if (skipVcc && !skippedVcc && Op.isReg() && 6735 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6736 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6737 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6738 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6739 // Skip VCC only if we didn't skip it on previous iteration. 6740 if (BasicInstType == SIInstrFlags::VOP2 && 6741 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6742 skippedVcc = true; 6743 continue; 6744 } else if (BasicInstType == SIInstrFlags::VOPC && 6745 Inst.getNumOperands() == 0) { 6746 skippedVcc = true; 6747 continue; 6748 } 6749 } 6750 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6751 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6752 } else if (Op.isImm()) { 6753 // Handle optional arguments 6754 OptionalIdx[Op.getImmTy()] = I; 6755 } else { 6756 llvm_unreachable("Invalid operand type"); 6757 } 6758 skippedVcc = false; 6759 } 6760 6761 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6762 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6763 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6764 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6765 switch (BasicInstType) { 6766 case SIInstrFlags::VOP1: 6767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6768 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6770 } 6771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6772 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6774 break; 6775 6776 case SIInstrFlags::VOP2: 6777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6778 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6780 } 6781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6785 break; 6786 6787 case SIInstrFlags::VOPC: 6788 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6792 break; 6793 6794 default: 6795 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6796 } 6797 } 6798 6799 // special case v_mac_{f16, f32}: 6800 // it has src2 register operand that is tied to dst operand 6801 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6802 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6803 auto it = Inst.begin(); 6804 std::advance( 6805 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6806 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6807 } 6808 } 6809 6810 //===----------------------------------------------------------------------===// 6811 // mAI 6812 //===----------------------------------------------------------------------===// 6813 6814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6815 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6816 } 6817 6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6819 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6820 } 6821 6822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6823 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6824 } 6825 6826 /// Force static initialization. 6827 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6828 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6829 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6830 } 6831 6832 #define GET_REGISTER_MATCHER 6833 #define GET_MATCHER_IMPLEMENTATION 6834 #define GET_MNEMONIC_SPELL_CHECKER 6835 #include "AMDGPUGenAsmMatcher.inc" 6836 6837 // This fuction should be defined after auto-generated include so that we have 6838 // MatchClassKind enum defined 6839 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6840 unsigned Kind) { 6841 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6842 // But MatchInstructionImpl() expects to meet token and fails to validate 6843 // operand. This method checks if we are given immediate operand but expect to 6844 // get corresponding token. 6845 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6846 switch (Kind) { 6847 case MCK_addr64: 6848 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6849 case MCK_gds: 6850 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6851 case MCK_lds: 6852 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6853 case MCK_glc: 6854 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6855 case MCK_idxen: 6856 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6857 case MCK_offen: 6858 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6859 case MCK_SSrcB32: 6860 // When operands have expression values, they will return true for isToken, 6861 // because it is not possible to distinguish between a token and an 6862 // expression at parse time. MatchInstructionImpl() will always try to 6863 // match an operand as a token, when isToken returns true, and when the 6864 // name of the expression is not a valid token, the match will fail, 6865 // so we need to handle it here. 6866 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6867 case MCK_SSrcF32: 6868 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6869 case MCK_SoppBrTarget: 6870 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6871 case MCK_VReg32OrOff: 6872 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6873 case MCK_InterpSlot: 6874 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6875 case MCK_Attr: 6876 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6877 case MCK_AttrChan: 6878 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6879 default: 6880 return Match_InvalidOperand; 6881 } 6882 } 6883 6884 //===----------------------------------------------------------------------===// 6885 // endpgm 6886 //===----------------------------------------------------------------------===// 6887 6888 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6889 SMLoc S = Parser.getTok().getLoc(); 6890 int64_t Imm = 0; 6891 6892 if (!parseExpr(Imm)) { 6893 // The operand is optional, if not present default to 0 6894 Imm = 0; 6895 } 6896 6897 if (!isUInt<16>(Imm)) { 6898 Error(S, "expected a 16-bit value"); 6899 return MatchOperand_ParseFail; 6900 } 6901 6902 Operands.push_back( 6903 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6904 return MatchOperand_Success; 6905 } 6906 6907 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6908