1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isSymbolRefExpr(); 224 } 225 226 bool isSymbolRefExpr() const { 227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 228 } 229 230 bool isImm() const override { 231 return Kind == Immediate; 232 } 233 234 bool isInlinableImm(MVT type) const; 235 bool isLiteralImm(MVT type) const; 236 237 bool isRegKind() const { 238 return Kind == Register; 239 } 240 241 bool isReg() const override { 242 return isRegKind() && !hasModifiers(); 243 } 244 245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 247 } 248 249 bool isRegOrImmWithInt16InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 251 } 252 253 bool isRegOrImmWithInt32InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 255 } 256 257 bool isRegOrImmWithInt64InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 259 } 260 261 bool isRegOrImmWithFP16InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 263 } 264 265 bool isRegOrImmWithFP32InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 267 } 268 269 bool isRegOrImmWithFP64InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 271 } 272 273 bool isVReg() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID) || 275 isRegClass(AMDGPU::VReg_64RegClassID) || 276 isRegClass(AMDGPU::VReg_96RegClassID) || 277 isRegClass(AMDGPU::VReg_128RegClassID) || 278 isRegClass(AMDGPU::VReg_160RegClassID) || 279 isRegClass(AMDGPU::VReg_256RegClassID) || 280 isRegClass(AMDGPU::VReg_512RegClassID) || 281 isRegClass(AMDGPU::VReg_1024RegClassID); 282 } 283 284 bool isVReg32() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID); 286 } 287 288 bool isVReg32OrOff() const { 289 return isOff() || isVReg32(); 290 } 291 292 bool isSDWAOperand(MVT type) const; 293 bool isSDWAFP16Operand() const; 294 bool isSDWAFP32Operand() const; 295 bool isSDWAInt16Operand() const; 296 bool isSDWAInt32Operand() const; 297 298 bool isImmTy(ImmTy ImmT) const { 299 return isImm() && Imm.Type == ImmT; 300 } 301 302 bool isImmModifier() const { 303 return isImm() && Imm.Type != ImmTyNone; 304 } 305 306 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 307 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 308 bool isDMask() const { return isImmTy(ImmTyDMask); } 309 bool isDim() const { return isImmTy(ImmTyDim); } 310 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 311 bool isDA() const { return isImmTy(ImmTyDA); } 312 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 313 bool isLWE() const { return isImmTy(ImmTyLWE); } 314 bool isOff() const { return isImmTy(ImmTyOff); } 315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 316 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 318 bool isOffen() const { return isImmTy(ImmTyOffen); } 319 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 320 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 324 325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 326 bool isGDS() const { return isImmTy(ImmTyGDS); } 327 bool isLDS() const { return isImmTy(ImmTyLDS); } 328 bool isDLC() const { return isImmTy(ImmTyDLC); } 329 bool isGLC() const { return isImmTy(ImmTyGLC); } 330 bool isSLC() const { return isImmTy(ImmTySLC); } 331 bool isTFE() const { return isImmTy(ImmTyTFE); } 332 bool isD16() const { return isImmTy(ImmTyD16); } 333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 337 bool isFI() const { return isImmTy(ImmTyDppFi); } 338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 345 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 347 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 348 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 349 bool isHigh() const { return isImmTy(ImmTyHigh); } 350 351 bool isMod() const { 352 return isClampSI() || isOModSI(); 353 } 354 355 bool isRegOrImm() const { 356 return isReg() || isImm(); 357 } 358 359 bool isRegClass(unsigned RCID) const; 360 361 bool isInlineValue() const; 362 363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 365 } 366 367 bool isSCSrcB16() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 369 } 370 371 bool isSCSrcV2B16() const { 372 return isSCSrcB16(); 373 } 374 375 bool isSCSrcB32() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 377 } 378 379 bool isSCSrcB64() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 381 } 382 383 bool isBoolReg() const; 384 385 bool isSCSrcF16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 387 } 388 389 bool isSCSrcV2F16() const { 390 return isSCSrcF16(); 391 } 392 393 bool isSCSrcF32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 395 } 396 397 bool isSCSrcF64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 399 } 400 401 bool isSSrcB32() const { 402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 403 } 404 405 bool isSSrcB16() const { 406 return isSCSrcB16() || isLiteralImm(MVT::i16); 407 } 408 409 bool isSSrcV2B16() const { 410 llvm_unreachable("cannot happen"); 411 return isSSrcB16(); 412 } 413 414 bool isSSrcB64() const { 415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 416 // See isVSrc64(). 417 return isSCSrcB64() || isLiteralImm(MVT::i64); 418 } 419 420 bool isSSrcF32() const { 421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 422 } 423 424 bool isSSrcF64() const { 425 return isSCSrcB64() || isLiteralImm(MVT::f64); 426 } 427 428 bool isSSrcF16() const { 429 return isSCSrcB16() || isLiteralImm(MVT::f16); 430 } 431 432 bool isSSrcV2F16() const { 433 llvm_unreachable("cannot happen"); 434 return isSSrcF16(); 435 } 436 437 bool isSSrcOrLdsB32() const { 438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 439 isLiteralImm(MVT::i32) || isExpr(); 440 } 441 442 bool isVCSrcB32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 444 } 445 446 bool isVCSrcB64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 448 } 449 450 bool isVCSrcB16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 452 } 453 454 bool isVCSrcV2B16() const { 455 return isVCSrcB16(); 456 } 457 458 bool isVCSrcF32() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 460 } 461 462 bool isVCSrcF64() const { 463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 464 } 465 466 bool isVCSrcF16() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 468 } 469 470 bool isVCSrcV2F16() const { 471 return isVCSrcF16(); 472 } 473 474 bool isVSrcB32() const { 475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 476 } 477 478 bool isVSrcB64() const { 479 return isVCSrcF64() || isLiteralImm(MVT::i64); 480 } 481 482 bool isVSrcB16() const { 483 return isVCSrcF16() || isLiteralImm(MVT::i16); 484 } 485 486 bool isVSrcV2B16() const { 487 return isVSrcB16() || isLiteralImm(MVT::v2i16); 488 } 489 490 bool isVSrcF32() const { 491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 492 } 493 494 bool isVSrcF64() const { 495 return isVCSrcF64() || isLiteralImm(MVT::f64); 496 } 497 498 bool isVSrcF16() const { 499 return isVCSrcF16() || isLiteralImm(MVT::f16); 500 } 501 502 bool isVSrcV2F16() const { 503 return isVSrcF16() || isLiteralImm(MVT::v2f16); 504 } 505 506 bool isVISrcB32() const { 507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 508 } 509 510 bool isVISrcB16() const { 511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 512 } 513 514 bool isVISrcV2B16() const { 515 return isVISrcB16(); 516 } 517 518 bool isVISrcF32() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 520 } 521 522 bool isVISrcF16() const { 523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 524 } 525 526 bool isVISrcV2F16() const { 527 return isVISrcF16() || isVISrcB32(); 528 } 529 530 bool isAISrcB32() const { 531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 532 } 533 534 bool isAISrcB16() const { 535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 536 } 537 538 bool isAISrcV2B16() const { 539 return isAISrcB16(); 540 } 541 542 bool isAISrcF32() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 544 } 545 546 bool isAISrcF16() const { 547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 548 } 549 550 bool isAISrcV2F16() const { 551 return isAISrcF16() || isAISrcB32(); 552 } 553 554 bool isAISrc_128B32() const { 555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 556 } 557 558 bool isAISrc_128B16() const { 559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 560 } 561 562 bool isAISrc_128V2B16() const { 563 return isAISrc_128B16(); 564 } 565 566 bool isAISrc_128F32() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 568 } 569 570 bool isAISrc_128F16() const { 571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 572 } 573 574 bool isAISrc_128V2F16() const { 575 return isAISrc_128F16() || isAISrc_128B32(); 576 } 577 578 bool isAISrc_512B32() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 580 } 581 582 bool isAISrc_512B16() const { 583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 584 } 585 586 bool isAISrc_512V2B16() const { 587 return isAISrc_512B16(); 588 } 589 590 bool isAISrc_512F32() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 592 } 593 594 bool isAISrc_512F16() const { 595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 596 } 597 598 bool isAISrc_512V2F16() const { 599 return isAISrc_512F16() || isAISrc_512B32(); 600 } 601 602 bool isAISrc_1024B32() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 604 } 605 606 bool isAISrc_1024B16() const { 607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 608 } 609 610 bool isAISrc_1024V2B16() const { 611 return isAISrc_1024B16(); 612 } 613 614 bool isAISrc_1024F32() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 616 } 617 618 bool isAISrc_1024F16() const { 619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 620 } 621 622 bool isAISrc_1024V2F16() const { 623 return isAISrc_1024F16() || isAISrc_1024B32(); 624 } 625 626 bool isKImmFP32() const { 627 return isLiteralImm(MVT::f32); 628 } 629 630 bool isKImmFP16() const { 631 return isLiteralImm(MVT::f16); 632 } 633 634 bool isMem() const override { 635 return false; 636 } 637 638 bool isExpr() const { 639 return Kind == Expression; 640 } 641 642 bool isSoppBrTarget() const { 643 return isExpr() || isImm(); 644 } 645 646 bool isSWaitCnt() const; 647 bool isHwreg() const; 648 bool isSendMsg() const; 649 bool isSwizzle() const; 650 bool isSMRDOffset8() const; 651 bool isSMRDOffset20() const; 652 bool isSMRDLiteralOffset() const; 653 bool isDPP8() const; 654 bool isDPPCtrl() const; 655 bool isBLGP() const; 656 bool isCBSZ() const; 657 bool isABID() const; 658 bool isGPRIdxMode() const; 659 bool isS16Imm() const; 660 bool isU16Imm() const; 661 bool isEndpgm() const; 662 663 StringRef getExpressionAsToken() const { 664 assert(isExpr()); 665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 666 return S->getSymbol().getName(); 667 } 668 669 StringRef getToken() const { 670 assert(isToken()); 671 672 if (Kind == Expression) 673 return getExpressionAsToken(); 674 675 return StringRef(Tok.Data, Tok.Length); 676 } 677 678 int64_t getImm() const { 679 assert(isImm()); 680 return Imm.Val; 681 } 682 683 ImmTy getImmTy() const { 684 assert(isImm()); 685 return Imm.Type; 686 } 687 688 unsigned getReg() const override { 689 assert(isRegKind()); 690 return Reg.RegNo; 691 } 692 693 SMLoc getStartLoc() const override { 694 return StartLoc; 695 } 696 697 SMLoc getEndLoc() const override { 698 return EndLoc; 699 } 700 701 SMRange getLocRange() const { 702 return SMRange(StartLoc, EndLoc); 703 } 704 705 Modifiers getModifiers() const { 706 assert(isRegKind() || isImmTy(ImmTyNone)); 707 return isRegKind() ? Reg.Mods : Imm.Mods; 708 } 709 710 void setModifiers(Modifiers Mods) { 711 assert(isRegKind() || isImmTy(ImmTyNone)); 712 if (isRegKind()) 713 Reg.Mods = Mods; 714 else 715 Imm.Mods = Mods; 716 } 717 718 bool hasModifiers() const { 719 return getModifiers().hasModifiers(); 720 } 721 722 bool hasFPModifiers() const { 723 return getModifiers().hasFPModifiers(); 724 } 725 726 bool hasIntModifiers() const { 727 return getModifiers().hasIntModifiers(); 728 } 729 730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 731 732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 733 734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 735 736 template <unsigned Bitwidth> 737 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 738 739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 740 addKImmFPOperands<16>(Inst, N); 741 } 742 743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 744 addKImmFPOperands<32>(Inst, N); 745 } 746 747 void addRegOperands(MCInst &Inst, unsigned N) const; 748 749 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 750 addRegOperands(Inst, N); 751 } 752 753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 754 if (isRegKind()) 755 addRegOperands(Inst, N); 756 else if (isExpr()) 757 Inst.addOperand(MCOperand::createExpr(Expr)); 758 else 759 addImmOperands(Inst, N); 760 } 761 762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 763 Modifiers Mods = getModifiers(); 764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 765 if (isRegKind()) { 766 addRegOperands(Inst, N); 767 } else { 768 addImmOperands(Inst, N, false); 769 } 770 } 771 772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 773 assert(!hasIntModifiers()); 774 addRegOrImmWithInputModsOperands(Inst, N); 775 } 776 777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 778 assert(!hasFPModifiers()); 779 addRegOrImmWithInputModsOperands(Inst, N); 780 } 781 782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 assert(isRegKind()); 786 addRegOperands(Inst, N); 787 } 788 789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 790 assert(!hasIntModifiers()); 791 addRegWithInputModsOperands(Inst, N); 792 } 793 794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 795 assert(!hasFPModifiers()); 796 addRegWithInputModsOperands(Inst, N); 797 } 798 799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 800 if (isImm()) 801 addImmOperands(Inst, N); 802 else { 803 assert(isExpr()); 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 } 806 } 807 808 static void printImmTy(raw_ostream& OS, ImmTy Type) { 809 switch (Type) { 810 case ImmTyNone: OS << "None"; break; 811 case ImmTyGDS: OS << "GDS"; break; 812 case ImmTyLDS: OS << "LDS"; break; 813 case ImmTyOffen: OS << "Offen"; break; 814 case ImmTyIdxen: OS << "Idxen"; break; 815 case ImmTyAddr64: OS << "Addr64"; break; 816 case ImmTyOffset: OS << "Offset"; break; 817 case ImmTyInstOffset: OS << "InstOffset"; break; 818 case ImmTyOffset0: OS << "Offset0"; break; 819 case ImmTyOffset1: OS << "Offset1"; break; 820 case ImmTyDLC: OS << "DLC"; break; 821 case ImmTyGLC: OS << "GLC"; break; 822 case ImmTySLC: OS << "SLC"; break; 823 case ImmTyTFE: OS << "TFE"; break; 824 case ImmTyD16: OS << "D16"; break; 825 case ImmTyFORMAT: OS << "FORMAT"; break; 826 case ImmTyClampSI: OS << "ClampSI"; break; 827 case ImmTyOModSI: OS << "OModSI"; break; 828 case ImmTyDPP8: OS << "DPP8"; break; 829 case ImmTyDppCtrl: OS << "DppCtrl"; break; 830 case ImmTyDppRowMask: OS << "DppRowMask"; break; 831 case ImmTyDppBankMask: OS << "DppBankMask"; break; 832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 833 case ImmTyDppFi: OS << "FI"; break; 834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 838 case ImmTyDMask: OS << "DMask"; break; 839 case ImmTyDim: OS << "Dim"; break; 840 case ImmTyUNorm: OS << "UNorm"; break; 841 case ImmTyDA: OS << "DA"; break; 842 case ImmTyR128A16: OS << "R128A16"; break; 843 case ImmTyLWE: OS << "LWE"; break; 844 case ImmTyOff: OS << "Off"; break; 845 case ImmTyExpTgt: OS << "ExpTgt"; break; 846 case ImmTyExpCompr: OS << "ExpCompr"; break; 847 case ImmTyExpVM: OS << "ExpVM"; break; 848 case ImmTyHwreg: OS << "Hwreg"; break; 849 case ImmTySendMsg: OS << "SendMsg"; break; 850 case ImmTyInterpSlot: OS << "InterpSlot"; break; 851 case ImmTyInterpAttr: OS << "InterpAttr"; break; 852 case ImmTyAttrChan: OS << "AttrChan"; break; 853 case ImmTyOpSel: OS << "OpSel"; break; 854 case ImmTyOpSelHi: OS << "OpSelHi"; break; 855 case ImmTyNegLo: OS << "NegLo"; break; 856 case ImmTyNegHi: OS << "NegHi"; break; 857 case ImmTySwizzle: OS << "Swizzle"; break; 858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 859 case ImmTyHigh: OS << "High"; break; 860 case ImmTyBLGP: OS << "BLGP"; break; 861 case ImmTyCBSZ: OS << "CBSZ"; break; 862 case ImmTyABID: OS << "ABID"; break; 863 case ImmTyEndpgm: OS << "Endpgm"; break; 864 } 865 } 866 867 void print(raw_ostream &OS) const override { 868 switch (Kind) { 869 case Register: 870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 871 break; 872 case Immediate: 873 OS << '<' << getImm(); 874 if (getImmTy() != ImmTyNone) { 875 OS << " type: "; printImmTy(OS, getImmTy()); 876 } 877 OS << " mods: " << Imm.Mods << '>'; 878 break; 879 case Token: 880 OS << '\'' << getToken() << '\''; 881 break; 882 case Expression: 883 OS << "<expr " << *Expr << '>'; 884 break; 885 } 886 } 887 888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 889 int64_t Val, SMLoc Loc, 890 ImmTy Type = ImmTyNone, 891 bool IsFPImm = false) { 892 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 893 Op->Imm.Val = Val; 894 Op->Imm.IsFPImm = IsFPImm; 895 Op->Imm.Type = Type; 896 Op->Imm.Mods = Modifiers(); 897 Op->StartLoc = Loc; 898 Op->EndLoc = Loc; 899 return Op; 900 } 901 902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 903 StringRef Str, SMLoc Loc, 904 bool HasExplicitEncodingSize = true) { 905 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 906 Res->Tok.Data = Str.data(); 907 Res->Tok.Length = Str.size(); 908 Res->StartLoc = Loc; 909 Res->EndLoc = Loc; 910 return Res; 911 } 912 913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 914 unsigned RegNo, SMLoc S, 915 SMLoc E) { 916 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 917 Op->Reg.RegNo = RegNo; 918 Op->Reg.Mods = Modifiers(); 919 Op->StartLoc = S; 920 Op->EndLoc = E; 921 return Op; 922 } 923 924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 925 const class MCExpr *Expr, SMLoc S) { 926 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 927 Op->Expr = Expr; 928 Op->StartLoc = S; 929 Op->EndLoc = S; 930 return Op; 931 } 932 }; 933 934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 936 return OS; 937 } 938 939 //===----------------------------------------------------------------------===// 940 // AsmParser 941 //===----------------------------------------------------------------------===// 942 943 // Holds info related to the current kernel, e.g. count of SGPRs used. 944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 945 // .amdgpu_hsa_kernel or at EOF. 946 class KernelScopeInfo { 947 int SgprIndexUnusedMin = -1; 948 int VgprIndexUnusedMin = -1; 949 MCContext *Ctx = nullptr; 950 951 void usesSgprAt(int i) { 952 if (i >= SgprIndexUnusedMin) { 953 SgprIndexUnusedMin = ++i; 954 if (Ctx) { 955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 957 } 958 } 959 } 960 961 void usesVgprAt(int i) { 962 if (i >= VgprIndexUnusedMin) { 963 VgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 public: 972 KernelScopeInfo() = default; 973 974 void initialize(MCContext &Context) { 975 Ctx = &Context; 976 usesSgprAt(SgprIndexUnusedMin = -1); 977 usesVgprAt(VgprIndexUnusedMin = -1); 978 } 979 980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 981 switch (RegKind) { 982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 983 case IS_AGPR: // fall through 984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 985 default: break; 986 } 987 } 988 }; 989 990 class AMDGPUAsmParser : public MCTargetAsmParser { 991 MCAsmParser &Parser; 992 993 // Number of extra operands parsed after the first optional operand. 994 // This may be necessary to skip hardcoded mandatory operands. 995 static const unsigned MAX_OPR_LOOKAHEAD = 8; 996 997 unsigned ForcedEncodingSize = 0; 998 bool ForcedDPP = false; 999 bool ForcedSDWA = false; 1000 KernelScopeInfo KernelScope; 1001 1002 /// @name Auto-generated Match Functions 1003 /// { 1004 1005 #define GET_ASSEMBLER_HEADER 1006 #include "AMDGPUGenAsmMatcher.inc" 1007 1008 /// } 1009 1010 private: 1011 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1012 bool OutOfRangeError(SMRange Range); 1013 /// Calculate VGPR/SGPR blocks required for given target, reserved 1014 /// registers, and user-specified NextFreeXGPR values. 1015 /// 1016 /// \param Features [in] Target features, used for bug corrections. 1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1021 /// descriptor field, if valid. 1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1026 /// \param VGPRBlocks [out] Result VGPR block count. 1027 /// \param SGPRBlocks [out] Result SGPR block count. 1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1029 bool FlatScrUsed, bool XNACKUsed, 1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1031 SMRange VGPRRange, unsigned NextFreeSGPR, 1032 SMRange SGPRRange, unsigned &VGPRBlocks, 1033 unsigned &SGPRBlocks); 1034 bool ParseDirectiveAMDGCNTarget(); 1035 bool ParseDirectiveAMDHSAKernel(); 1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1037 bool ParseDirectiveHSACodeObjectVersion(); 1038 bool ParseDirectiveHSACodeObjectISA(); 1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1040 bool ParseDirectiveAMDKernelCodeT(); 1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1042 bool ParseDirectiveAMDGPUHsaKernel(); 1043 1044 bool ParseDirectiveISAVersion(); 1045 bool ParseDirectiveHSAMetadata(); 1046 bool ParseDirectivePALMetadataBegin(); 1047 bool ParseDirectivePALMetadata(); 1048 bool ParseDirectiveAMDGPULDS(); 1049 1050 /// Common code to parse out a block of text (typically YAML) between start and 1051 /// end directives. 1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1053 const char *AssemblerDirectiveEnd, 1054 std::string &CollectString); 1055 1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1057 RegisterKind RegKind, unsigned Reg1, 1058 unsigned RegNum); 1059 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1060 unsigned& RegNum, unsigned& RegWidth, 1061 unsigned *DwordRegIndex); 1062 bool isRegister(); 1063 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1064 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1065 void initializeGprCountSymbol(RegisterKind RegKind); 1066 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1067 unsigned RegWidth); 1068 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1069 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1070 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1071 bool IsGdsHardcoded); 1072 1073 public: 1074 enum AMDGPUMatchResultTy { 1075 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1076 }; 1077 enum OperandMode { 1078 OperandMode_Default, 1079 OperandMode_NSA, 1080 }; 1081 1082 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1083 1084 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1085 const MCInstrInfo &MII, 1086 const MCTargetOptions &Options) 1087 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1088 MCAsmParserExtension::Initialize(Parser); 1089 1090 if (getFeatureBits().none()) { 1091 // Set default features. 1092 copySTI().ToggleFeature("southern-islands"); 1093 } 1094 1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1096 1097 { 1098 // TODO: make those pre-defined variables read-only. 1099 // Currently there is none suitable machinery in the core llvm-mc for this. 1100 // MCSymbol::isRedefinable is intended for another purpose, and 1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1103 MCContext &Ctx = getContext(); 1104 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1105 MCSymbol *Sym = 1106 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1110 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1111 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1112 } else { 1113 MCSymbol *Sym = 1114 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1118 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1120 } 1121 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1122 initializeGprCountSymbol(IS_VGPR); 1123 initializeGprCountSymbol(IS_SGPR); 1124 } else 1125 KernelScope.initialize(getContext()); 1126 } 1127 } 1128 1129 bool hasXNACK() const { 1130 return AMDGPU::hasXNACK(getSTI()); 1131 } 1132 1133 bool hasMIMG_R128() const { 1134 return AMDGPU::hasMIMG_R128(getSTI()); 1135 } 1136 1137 bool hasPackedD16() const { 1138 return AMDGPU::hasPackedD16(getSTI()); 1139 } 1140 1141 bool isSI() const { 1142 return AMDGPU::isSI(getSTI()); 1143 } 1144 1145 bool isCI() const { 1146 return AMDGPU::isCI(getSTI()); 1147 } 1148 1149 bool isVI() const { 1150 return AMDGPU::isVI(getSTI()); 1151 } 1152 1153 bool isGFX9() const { 1154 return AMDGPU::isGFX9(getSTI()); 1155 } 1156 1157 bool isGFX10() const { 1158 return AMDGPU::isGFX10(getSTI()); 1159 } 1160 1161 bool hasInv2PiInlineImm() const { 1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1163 } 1164 1165 bool hasFlatOffsets() const { 1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1167 } 1168 1169 bool hasSGPR102_SGPR103() const { 1170 return !isVI() && !isGFX9(); 1171 } 1172 1173 bool hasSGPR104_SGPR105() const { 1174 return isGFX10(); 1175 } 1176 1177 bool hasIntClamp() const { 1178 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1179 } 1180 1181 AMDGPUTargetStreamer &getTargetStreamer() { 1182 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1183 return static_cast<AMDGPUTargetStreamer &>(TS); 1184 } 1185 1186 const MCRegisterInfo *getMRI() const { 1187 // We need this const_cast because for some reason getContext() is not const 1188 // in MCAsmParser. 1189 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1190 } 1191 1192 const MCInstrInfo *getMII() const { 1193 return &MII; 1194 } 1195 1196 const FeatureBitset &getFeatureBits() const { 1197 return getSTI().getFeatureBits(); 1198 } 1199 1200 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1201 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1202 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1203 1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1205 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1206 bool isForcedDPP() const { return ForcedDPP; } 1207 bool isForcedSDWA() const { return ForcedSDWA; } 1208 ArrayRef<unsigned> getMatchedVariants() const; 1209 1210 std::unique_ptr<AMDGPUOperand> parseRegister(); 1211 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1212 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1213 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1214 unsigned Kind) override; 1215 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1216 OperandVector &Operands, MCStreamer &Out, 1217 uint64_t &ErrorInfo, 1218 bool MatchingInlineAsm) override; 1219 bool ParseDirective(AsmToken DirectiveID) override; 1220 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1221 OperandMode Mode = OperandMode_Default); 1222 StringRef parseMnemonicSuffix(StringRef Name); 1223 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1224 SMLoc NameLoc, OperandVector &Operands) override; 1225 //bool ProcessInstruction(MCInst &Inst); 1226 1227 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1228 1229 OperandMatchResultTy 1230 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1231 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1232 bool (*ConvertResult)(int64_t &) = nullptr); 1233 1234 OperandMatchResultTy 1235 parseOperandArrayWithPrefix(const char *Prefix, 1236 OperandVector &Operands, 1237 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1238 bool (*ConvertResult)(int64_t&) = nullptr); 1239 1240 OperandMatchResultTy 1241 parseNamedBit(const char *Name, OperandVector &Operands, 1242 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1243 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1244 StringRef &Value); 1245 1246 bool isModifier(); 1247 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1248 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1249 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1250 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1251 bool parseSP3NegModifier(); 1252 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1253 OperandMatchResultTy parseReg(OperandVector &Operands); 1254 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1255 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1256 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1257 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1258 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1259 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1260 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1261 1262 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1263 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1264 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1265 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1266 1267 bool parseCnt(int64_t &IntVal); 1268 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1269 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1270 1271 private: 1272 struct OperandInfoTy { 1273 int64_t Id; 1274 bool IsSymbolic = false; 1275 bool IsDefined = false; 1276 1277 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1278 }; 1279 1280 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1281 bool validateSendMsg(const OperandInfoTy &Msg, 1282 const OperandInfoTy &Op, 1283 const OperandInfoTy &Stream, 1284 const SMLoc Loc); 1285 1286 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1287 bool validateHwreg(const OperandInfoTy &HwReg, 1288 const int64_t Offset, 1289 const int64_t Width, 1290 const SMLoc Loc); 1291 1292 void errorExpTgt(); 1293 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1294 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1295 1296 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1297 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1298 bool validateSOPLiteral(const MCInst &Inst) const; 1299 bool validateConstantBusLimitations(const MCInst &Inst); 1300 bool validateEarlyClobberLimitations(const MCInst &Inst); 1301 bool validateIntClampSupported(const MCInst &Inst); 1302 bool validateMIMGAtomicDMask(const MCInst &Inst); 1303 bool validateMIMGGatherDMask(const MCInst &Inst); 1304 bool validateMIMGDataSize(const MCInst &Inst); 1305 bool validateMIMGAddrSize(const MCInst &Inst); 1306 bool validateMIMGD16(const MCInst &Inst); 1307 bool validateMIMGDim(const MCInst &Inst); 1308 bool validateLdsDirect(const MCInst &Inst); 1309 bool validateOpSel(const MCInst &Inst); 1310 bool validateVccOperand(unsigned Reg) const; 1311 bool validateVOP3Literal(const MCInst &Inst) const; 1312 unsigned getConstantBusLimit(unsigned Opcode) const; 1313 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1314 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1315 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1316 1317 bool isId(const StringRef Id) const; 1318 bool isId(const AsmToken &Token, const StringRef Id) const; 1319 bool isToken(const AsmToken::TokenKind Kind) const; 1320 bool trySkipId(const StringRef Id); 1321 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1322 bool trySkipToken(const AsmToken::TokenKind Kind); 1323 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1324 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1325 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1326 AsmToken::TokenKind getTokenKind() const; 1327 bool parseExpr(int64_t &Imm); 1328 bool parseExpr(OperandVector &Operands); 1329 StringRef getTokenStr() const; 1330 AsmToken peekToken(); 1331 AsmToken getToken() const; 1332 SMLoc getLoc() const; 1333 void lex(); 1334 1335 public: 1336 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1337 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1338 1339 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1340 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1341 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1342 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1343 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1344 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1345 1346 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1347 const unsigned MinVal, 1348 const unsigned MaxVal, 1349 const StringRef ErrMsg); 1350 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1351 bool parseSwizzleOffset(int64_t &Imm); 1352 bool parseSwizzleMacro(int64_t &Imm); 1353 bool parseSwizzleQuadPerm(int64_t &Imm); 1354 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1355 bool parseSwizzleBroadcast(int64_t &Imm); 1356 bool parseSwizzleSwap(int64_t &Imm); 1357 bool parseSwizzleReverse(int64_t &Imm); 1358 1359 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1360 int64_t parseGPRIdxMacro(); 1361 1362 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1363 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1364 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1365 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1366 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1367 1368 AMDGPUOperand::Ptr defaultDLC() const; 1369 AMDGPUOperand::Ptr defaultGLC() const; 1370 AMDGPUOperand::Ptr defaultSLC() const; 1371 1372 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1373 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1374 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1375 AMDGPUOperand::Ptr defaultFlatOffset() const; 1376 1377 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1378 1379 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1380 OptionalImmIndexMap &OptionalIdx); 1381 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1382 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1383 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1384 1385 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1386 1387 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1388 bool IsAtomic = false); 1389 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1390 1391 OperandMatchResultTy parseDim(OperandVector &Operands); 1392 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1393 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1394 AMDGPUOperand::Ptr defaultRowMask() const; 1395 AMDGPUOperand::Ptr defaultBankMask() const; 1396 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1397 AMDGPUOperand::Ptr defaultFI() const; 1398 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1399 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1400 1401 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1402 AMDGPUOperand::ImmTy Type); 1403 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1404 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1405 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1406 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1407 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1408 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1409 uint64_t BasicInstType, bool skipVcc = false); 1410 1411 AMDGPUOperand::Ptr defaultBLGP() const; 1412 AMDGPUOperand::Ptr defaultCBSZ() const; 1413 AMDGPUOperand::Ptr defaultABID() const; 1414 1415 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1416 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1417 }; 1418 1419 struct OptionalOperand { 1420 const char *Name; 1421 AMDGPUOperand::ImmTy Type; 1422 bool IsBit; 1423 bool (*ConvertResult)(int64_t&); 1424 }; 1425 1426 } // end anonymous namespace 1427 1428 // May be called with integer type with equivalent bitwidth. 1429 static const fltSemantics *getFltSemantics(unsigned Size) { 1430 switch (Size) { 1431 case 4: 1432 return &APFloat::IEEEsingle(); 1433 case 8: 1434 return &APFloat::IEEEdouble(); 1435 case 2: 1436 return &APFloat::IEEEhalf(); 1437 default: 1438 llvm_unreachable("unsupported fp type"); 1439 } 1440 } 1441 1442 static const fltSemantics *getFltSemantics(MVT VT) { 1443 return getFltSemantics(VT.getSizeInBits() / 8); 1444 } 1445 1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1447 switch (OperandType) { 1448 case AMDGPU::OPERAND_REG_IMM_INT32: 1449 case AMDGPU::OPERAND_REG_IMM_FP32: 1450 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1451 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1452 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1453 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1454 return &APFloat::IEEEsingle(); 1455 case AMDGPU::OPERAND_REG_IMM_INT64: 1456 case AMDGPU::OPERAND_REG_IMM_FP64: 1457 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1458 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1459 return &APFloat::IEEEdouble(); 1460 case AMDGPU::OPERAND_REG_IMM_INT16: 1461 case AMDGPU::OPERAND_REG_IMM_FP16: 1462 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1463 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1464 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1465 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1466 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1467 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1469 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1470 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1471 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1472 return &APFloat::IEEEhalf(); 1473 default: 1474 llvm_unreachable("unsupported fp type"); 1475 } 1476 } 1477 1478 //===----------------------------------------------------------------------===// 1479 // Operand 1480 //===----------------------------------------------------------------------===// 1481 1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1483 bool Lost; 1484 1485 // Convert literal to single precision 1486 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1487 APFloat::rmNearestTiesToEven, 1488 &Lost); 1489 // We allow precision lost but not overflow or underflow 1490 if (Status != APFloat::opOK && 1491 Lost && 1492 ((Status & APFloat::opOverflow) != 0 || 1493 (Status & APFloat::opUnderflow) != 0)) { 1494 return false; 1495 } 1496 1497 return true; 1498 } 1499 1500 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1501 return isUIntN(Size, Val) || isIntN(Size, Val); 1502 } 1503 1504 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1505 1506 // This is a hack to enable named inline values like 1507 // shared_base with both 32-bit and 64-bit operands. 1508 // Note that these values are defined as 1509 // 32-bit operands only. 1510 if (isInlineValue()) { 1511 return true; 1512 } 1513 1514 if (!isImmTy(ImmTyNone)) { 1515 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1516 return false; 1517 } 1518 // TODO: We should avoid using host float here. It would be better to 1519 // check the float bit values which is what a few other places do. 1520 // We've had bot failures before due to weird NaN support on mips hosts. 1521 1522 APInt Literal(64, Imm.Val); 1523 1524 if (Imm.IsFPImm) { // We got fp literal token 1525 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1526 return AMDGPU::isInlinableLiteral64(Imm.Val, 1527 AsmParser->hasInv2PiInlineImm()); 1528 } 1529 1530 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1531 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1532 return false; 1533 1534 if (type.getScalarSizeInBits() == 16) { 1535 return AMDGPU::isInlinableLiteral16( 1536 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1537 AsmParser->hasInv2PiInlineImm()); 1538 } 1539 1540 // Check if single precision literal is inlinable 1541 return AMDGPU::isInlinableLiteral32( 1542 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1543 AsmParser->hasInv2PiInlineImm()); 1544 } 1545 1546 // We got int literal token. 1547 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1548 return AMDGPU::isInlinableLiteral64(Imm.Val, 1549 AsmParser->hasInv2PiInlineImm()); 1550 } 1551 1552 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1553 return false; 1554 } 1555 1556 if (type.getScalarSizeInBits() == 16) { 1557 return AMDGPU::isInlinableLiteral16( 1558 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1559 AsmParser->hasInv2PiInlineImm()); 1560 } 1561 1562 return AMDGPU::isInlinableLiteral32( 1563 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1564 AsmParser->hasInv2PiInlineImm()); 1565 } 1566 1567 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1568 // Check that this immediate can be added as literal 1569 if (!isImmTy(ImmTyNone)) { 1570 return false; 1571 } 1572 1573 if (!Imm.IsFPImm) { 1574 // We got int literal token. 1575 1576 if (type == MVT::f64 && hasFPModifiers()) { 1577 // Cannot apply fp modifiers to int literals preserving the same semantics 1578 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1579 // disable these cases. 1580 return false; 1581 } 1582 1583 unsigned Size = type.getSizeInBits(); 1584 if (Size == 64) 1585 Size = 32; 1586 1587 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1588 // types. 1589 return isSafeTruncation(Imm.Val, Size); 1590 } 1591 1592 // We got fp literal token 1593 if (type == MVT::f64) { // Expected 64-bit fp operand 1594 // We would set low 64-bits of literal to zeroes but we accept this literals 1595 return true; 1596 } 1597 1598 if (type == MVT::i64) { // Expected 64-bit int operand 1599 // We don't allow fp literals in 64-bit integer instructions. It is 1600 // unclear how we should encode them. 1601 return false; 1602 } 1603 1604 // We allow fp literals with f16x2 operands assuming that the specified 1605 // literal goes into the lower half and the upper half is zero. We also 1606 // require that the literal may be losslesly converted to f16. 1607 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1608 (type == MVT::v2i16)? MVT::i16 : type; 1609 1610 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1611 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1612 } 1613 1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1615 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1616 } 1617 1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1619 if (AsmParser->isVI()) 1620 return isVReg32(); 1621 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1622 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1623 else 1624 return false; 1625 } 1626 1627 bool AMDGPUOperand::isSDWAFP16Operand() const { 1628 return isSDWAOperand(MVT::f16); 1629 } 1630 1631 bool AMDGPUOperand::isSDWAFP32Operand() const { 1632 return isSDWAOperand(MVT::f32); 1633 } 1634 1635 bool AMDGPUOperand::isSDWAInt16Operand() const { 1636 return isSDWAOperand(MVT::i16); 1637 } 1638 1639 bool AMDGPUOperand::isSDWAInt32Operand() const { 1640 return isSDWAOperand(MVT::i32); 1641 } 1642 1643 bool AMDGPUOperand::isBoolReg() const { 1644 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1645 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1646 } 1647 1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1649 { 1650 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1651 assert(Size == 2 || Size == 4 || Size == 8); 1652 1653 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1654 1655 if (Imm.Mods.Abs) { 1656 Val &= ~FpSignMask; 1657 } 1658 if (Imm.Mods.Neg) { 1659 Val ^= FpSignMask; 1660 } 1661 1662 return Val; 1663 } 1664 1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1666 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1667 Inst.getNumOperands())) { 1668 addLiteralImmOperand(Inst, Imm.Val, 1669 ApplyModifiers & 1670 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1671 } else { 1672 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1673 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1674 } 1675 } 1676 1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1678 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1679 auto OpNum = Inst.getNumOperands(); 1680 // Check that this operand accepts literals 1681 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1682 1683 if (ApplyModifiers) { 1684 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1685 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1686 Val = applyInputFPModifiers(Val, Size); 1687 } 1688 1689 APInt Literal(64, Val); 1690 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1691 1692 if (Imm.IsFPImm) { // We got fp literal token 1693 switch (OpTy) { 1694 case AMDGPU::OPERAND_REG_IMM_INT64: 1695 case AMDGPU::OPERAND_REG_IMM_FP64: 1696 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1697 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1698 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1699 AsmParser->hasInv2PiInlineImm())) { 1700 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1701 return; 1702 } 1703 1704 // Non-inlineable 1705 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1706 // For fp operands we check if low 32 bits are zeros 1707 if (Literal.getLoBits(32) != 0) { 1708 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1709 "Can't encode literal as exact 64-bit floating-point operand. " 1710 "Low 32-bits will be set to zero"); 1711 } 1712 1713 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1714 return; 1715 } 1716 1717 // We don't allow fp literals in 64-bit integer instructions. It is 1718 // unclear how we should encode them. This case should be checked earlier 1719 // in predicate methods (isLiteralImm()) 1720 llvm_unreachable("fp literal in 64-bit integer instruction."); 1721 1722 case AMDGPU::OPERAND_REG_IMM_INT32: 1723 case AMDGPU::OPERAND_REG_IMM_FP32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1725 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1727 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1728 case AMDGPU::OPERAND_REG_IMM_INT16: 1729 case AMDGPU::OPERAND_REG_IMM_FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1738 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1739 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1740 bool lost; 1741 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1742 // Convert literal to single precision 1743 FPLiteral.convert(*getOpFltSemantics(OpTy), 1744 APFloat::rmNearestTiesToEven, &lost); 1745 // We allow precision lost but not overflow or underflow. This should be 1746 // checked earlier in isLiteralImm() 1747 1748 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1749 Inst.addOperand(MCOperand::createImm(ImmVal)); 1750 return; 1751 } 1752 default: 1753 llvm_unreachable("invalid operand size"); 1754 } 1755 1756 return; 1757 } 1758 1759 // We got int literal token. 1760 // Only sign extend inline immediates. 1761 switch (OpTy) { 1762 case AMDGPU::OPERAND_REG_IMM_INT32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32: 1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1768 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1769 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1770 if (isSafeTruncation(Val, 32) && 1771 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1772 AsmParser->hasInv2PiInlineImm())) { 1773 Inst.addOperand(MCOperand::createImm(Val)); 1774 return; 1775 } 1776 1777 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1778 return; 1779 1780 case AMDGPU::OPERAND_REG_IMM_INT64: 1781 case AMDGPU::OPERAND_REG_IMM_FP64: 1782 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1783 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1784 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1785 Inst.addOperand(MCOperand::createImm(Val)); 1786 return; 1787 } 1788 1789 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1790 return; 1791 1792 case AMDGPU::OPERAND_REG_IMM_INT16: 1793 case AMDGPU::OPERAND_REG_IMM_FP16: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1798 if (isSafeTruncation(Val, 16) && 1799 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1800 AsmParser->hasInv2PiInlineImm())) { 1801 Inst.addOperand(MCOperand::createImm(Val)); 1802 return; 1803 } 1804 1805 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1806 return; 1807 1808 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1811 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1812 assert(isSafeTruncation(Val, 16)); 1813 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1814 AsmParser->hasInv2PiInlineImm())); 1815 1816 Inst.addOperand(MCOperand::createImm(Val)); 1817 return; 1818 } 1819 default: 1820 llvm_unreachable("invalid operand size"); 1821 } 1822 } 1823 1824 template <unsigned Bitwidth> 1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1826 APInt Literal(64, Imm.Val); 1827 1828 if (!Imm.IsFPImm) { 1829 // We got int literal token. 1830 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1831 return; 1832 } 1833 1834 bool Lost; 1835 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1836 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1837 APFloat::rmNearestTiesToEven, &Lost); 1838 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1839 } 1840 1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1842 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1843 } 1844 1845 static bool isInlineValue(unsigned Reg) { 1846 switch (Reg) { 1847 case AMDGPU::SRC_SHARED_BASE: 1848 case AMDGPU::SRC_SHARED_LIMIT: 1849 case AMDGPU::SRC_PRIVATE_BASE: 1850 case AMDGPU::SRC_PRIVATE_LIMIT: 1851 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1852 return true; 1853 case AMDGPU::SRC_VCCZ: 1854 case AMDGPU::SRC_EXECZ: 1855 case AMDGPU::SRC_SCC: 1856 return true; 1857 case AMDGPU::SGPR_NULL: 1858 return true; 1859 default: 1860 return false; 1861 } 1862 } 1863 1864 bool AMDGPUOperand::isInlineValue() const { 1865 return isRegKind() && ::isInlineValue(getReg()); 1866 } 1867 1868 //===----------------------------------------------------------------------===// 1869 // AsmParser 1870 //===----------------------------------------------------------------------===// 1871 1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1873 if (Is == IS_VGPR) { 1874 switch (RegWidth) { 1875 default: return -1; 1876 case 1: return AMDGPU::VGPR_32RegClassID; 1877 case 2: return AMDGPU::VReg_64RegClassID; 1878 case 3: return AMDGPU::VReg_96RegClassID; 1879 case 4: return AMDGPU::VReg_128RegClassID; 1880 case 5: return AMDGPU::VReg_160RegClassID; 1881 case 8: return AMDGPU::VReg_256RegClassID; 1882 case 16: return AMDGPU::VReg_512RegClassID; 1883 case 32: return AMDGPU::VReg_1024RegClassID; 1884 } 1885 } else if (Is == IS_TTMP) { 1886 switch (RegWidth) { 1887 default: return -1; 1888 case 1: return AMDGPU::TTMP_32RegClassID; 1889 case 2: return AMDGPU::TTMP_64RegClassID; 1890 case 4: return AMDGPU::TTMP_128RegClassID; 1891 case 8: return AMDGPU::TTMP_256RegClassID; 1892 case 16: return AMDGPU::TTMP_512RegClassID; 1893 } 1894 } else if (Is == IS_SGPR) { 1895 switch (RegWidth) { 1896 default: return -1; 1897 case 1: return AMDGPU::SGPR_32RegClassID; 1898 case 2: return AMDGPU::SGPR_64RegClassID; 1899 case 4: return AMDGPU::SGPR_128RegClassID; 1900 case 8: return AMDGPU::SGPR_256RegClassID; 1901 case 16: return AMDGPU::SGPR_512RegClassID; 1902 } 1903 } else if (Is == IS_AGPR) { 1904 switch (RegWidth) { 1905 default: return -1; 1906 case 1: return AMDGPU::AGPR_32RegClassID; 1907 case 2: return AMDGPU::AReg_64RegClassID; 1908 case 4: return AMDGPU::AReg_128RegClassID; 1909 case 16: return AMDGPU::AReg_512RegClassID; 1910 case 32: return AMDGPU::AReg_1024RegClassID; 1911 } 1912 } 1913 return -1; 1914 } 1915 1916 static unsigned getSpecialRegForName(StringRef RegName) { 1917 return StringSwitch<unsigned>(RegName) 1918 .Case("exec", AMDGPU::EXEC) 1919 .Case("vcc", AMDGPU::VCC) 1920 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1921 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1922 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1923 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1924 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1925 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1926 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1927 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1928 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1929 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1930 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1931 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1932 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1933 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1934 .Case("m0", AMDGPU::M0) 1935 .Case("vccz", AMDGPU::SRC_VCCZ) 1936 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1937 .Case("execz", AMDGPU::SRC_EXECZ) 1938 .Case("src_execz", AMDGPU::SRC_EXECZ) 1939 .Case("scc", AMDGPU::SRC_SCC) 1940 .Case("src_scc", AMDGPU::SRC_SCC) 1941 .Case("tba", AMDGPU::TBA) 1942 .Case("tma", AMDGPU::TMA) 1943 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1944 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1945 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1946 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1947 .Case("vcc_lo", AMDGPU::VCC_LO) 1948 .Case("vcc_hi", AMDGPU::VCC_HI) 1949 .Case("exec_lo", AMDGPU::EXEC_LO) 1950 .Case("exec_hi", AMDGPU::EXEC_HI) 1951 .Case("tma_lo", AMDGPU::TMA_LO) 1952 .Case("tma_hi", AMDGPU::TMA_HI) 1953 .Case("tba_lo", AMDGPU::TBA_LO) 1954 .Case("tba_hi", AMDGPU::TBA_HI) 1955 .Case("null", AMDGPU::SGPR_NULL) 1956 .Default(0); 1957 } 1958 1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1960 SMLoc &EndLoc) { 1961 auto R = parseRegister(); 1962 if (!R) return true; 1963 assert(R->isReg()); 1964 RegNo = R->getReg(); 1965 StartLoc = R->getStartLoc(); 1966 EndLoc = R->getEndLoc(); 1967 return false; 1968 } 1969 1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1971 RegisterKind RegKind, unsigned Reg1, 1972 unsigned RegNum) { 1973 switch (RegKind) { 1974 case IS_SPECIAL: 1975 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1976 Reg = AMDGPU::EXEC; 1977 RegWidth = 2; 1978 return true; 1979 } 1980 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1981 Reg = AMDGPU::FLAT_SCR; 1982 RegWidth = 2; 1983 return true; 1984 } 1985 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1986 Reg = AMDGPU::XNACK_MASK; 1987 RegWidth = 2; 1988 return true; 1989 } 1990 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1991 Reg = AMDGPU::VCC; 1992 RegWidth = 2; 1993 return true; 1994 } 1995 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1996 Reg = AMDGPU::TBA; 1997 RegWidth = 2; 1998 return true; 1999 } 2000 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2001 Reg = AMDGPU::TMA; 2002 RegWidth = 2; 2003 return true; 2004 } 2005 return false; 2006 case IS_VGPR: 2007 case IS_SGPR: 2008 case IS_AGPR: 2009 case IS_TTMP: 2010 if (Reg1 != Reg + RegWidth) { 2011 return false; 2012 } 2013 RegWidth++; 2014 return true; 2015 default: 2016 llvm_unreachable("unexpected register kind"); 2017 } 2018 } 2019 2020 static constexpr StringLiteral Registers[] = {"v", "s", "ttmp", "acc", "a"}; 2021 2022 bool 2023 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2024 const AsmToken &NextToken) const { 2025 2026 // A list of consecutive registers: [s0,s1,s2,s3] 2027 if (Token.is(AsmToken::LBrac)) 2028 return true; 2029 2030 if (!Token.is(AsmToken::Identifier)) 2031 return false; 2032 2033 // A single register like s0 or a range of registers like s[0:1] 2034 2035 StringRef RegName = Token.getString(); 2036 2037 for (StringRef Reg : Registers) { 2038 if (RegName.startswith(Reg)) { 2039 if (Reg.size() < RegName.size()) { 2040 unsigned RegNum; 2041 // A single register with an index: rXX 2042 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 2043 return true; 2044 } else { 2045 // A range of registers: r[XX:YY]. 2046 if (NextToken.is(AsmToken::LBrac)) 2047 return true; 2048 } 2049 } 2050 } 2051 2052 return getSpecialRegForName(RegName); 2053 } 2054 2055 bool 2056 AMDGPUAsmParser::isRegister() 2057 { 2058 return isRegister(getToken(), peekToken()); 2059 } 2060 2061 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2062 unsigned &RegNum, unsigned &RegWidth, 2063 unsigned *DwordRegIndex) { 2064 if (DwordRegIndex) { *DwordRegIndex = 0; } 2065 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2066 if (getLexer().is(AsmToken::Identifier)) { 2067 StringRef RegName = Parser.getTok().getString(); 2068 if ((Reg = getSpecialRegForName(RegName))) { 2069 Parser.Lex(); 2070 RegKind = IS_SPECIAL; 2071 } else { 2072 unsigned RegNumIndex = 0; 2073 if (RegName[0] == 'v') { 2074 RegNumIndex = 1; 2075 RegKind = IS_VGPR; 2076 } else if (RegName[0] == 's') { 2077 RegNumIndex = 1; 2078 RegKind = IS_SGPR; 2079 } else if (RegName[0] == 'a') { 2080 RegNumIndex = RegName.startswith("acc") ? 3 : 1; 2081 RegKind = IS_AGPR; 2082 } else if (RegName.startswith("ttmp")) { 2083 RegNumIndex = strlen("ttmp"); 2084 RegKind = IS_TTMP; 2085 } else { 2086 return false; 2087 } 2088 if (RegName.size() > RegNumIndex) { 2089 // Single 32-bit register: vXX. 2090 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 2091 return false; 2092 Parser.Lex(); 2093 RegWidth = 1; 2094 } else { 2095 // Range of registers: v[XX:YY]. ":YY" is optional. 2096 Parser.Lex(); 2097 int64_t RegLo, RegHi; 2098 if (getLexer().isNot(AsmToken::LBrac)) 2099 return false; 2100 Parser.Lex(); 2101 2102 if (getParser().parseAbsoluteExpression(RegLo)) 2103 return false; 2104 2105 const bool isRBrace = getLexer().is(AsmToken::RBrac); 2106 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 2107 return false; 2108 Parser.Lex(); 2109 2110 if (isRBrace) { 2111 RegHi = RegLo; 2112 } else { 2113 if (getParser().parseAbsoluteExpression(RegHi)) 2114 return false; 2115 2116 if (getLexer().isNot(AsmToken::RBrac)) 2117 return false; 2118 Parser.Lex(); 2119 } 2120 RegNum = (unsigned) RegLo; 2121 RegWidth = (RegHi - RegLo) + 1; 2122 } 2123 } 2124 } else if (getLexer().is(AsmToken::LBrac)) { 2125 // List of consecutive registers: [s0,s1,s2,s3] 2126 Parser.Lex(); 2127 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 2128 return false; 2129 if (RegWidth != 1) 2130 return false; 2131 RegisterKind RegKind1; 2132 unsigned Reg1, RegNum1, RegWidth1; 2133 do { 2134 if (getLexer().is(AsmToken::Comma)) { 2135 Parser.Lex(); 2136 } else if (getLexer().is(AsmToken::RBrac)) { 2137 Parser.Lex(); 2138 break; 2139 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 2140 if (RegWidth1 != 1) { 2141 return false; 2142 } 2143 if (RegKind1 != RegKind) { 2144 return false; 2145 } 2146 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 2147 return false; 2148 } 2149 } else { 2150 return false; 2151 } 2152 } while (true); 2153 } else { 2154 return false; 2155 } 2156 switch (RegKind) { 2157 case IS_SPECIAL: 2158 RegNum = 0; 2159 RegWidth = 1; 2160 break; 2161 case IS_VGPR: 2162 case IS_SGPR: 2163 case IS_AGPR: 2164 case IS_TTMP: 2165 { 2166 unsigned Size = 1; 2167 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2168 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2169 Size = std::min(RegWidth, 4u); 2170 } 2171 if (RegNum % Size != 0) 2172 return false; 2173 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2174 RegNum = RegNum / Size; 2175 int RCID = getRegClass(RegKind, RegWidth); 2176 if (RCID == -1) 2177 return false; 2178 const MCRegisterClass RC = TRI->getRegClass(RCID); 2179 if (RegNum >= RC.getNumRegs()) 2180 return false; 2181 Reg = RC.getRegister(RegNum); 2182 break; 2183 } 2184 2185 default: 2186 llvm_unreachable("unexpected register kind"); 2187 } 2188 2189 if (!subtargetHasRegister(*TRI, Reg)) 2190 return false; 2191 return true; 2192 } 2193 2194 Optional<StringRef> 2195 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2196 switch (RegKind) { 2197 case IS_VGPR: 2198 return StringRef(".amdgcn.next_free_vgpr"); 2199 case IS_SGPR: 2200 return StringRef(".amdgcn.next_free_sgpr"); 2201 default: 2202 return None; 2203 } 2204 } 2205 2206 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2207 auto SymbolName = getGprCountSymbolName(RegKind); 2208 assert(SymbolName && "initializing invalid register kind"); 2209 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2210 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2211 } 2212 2213 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2214 unsigned DwordRegIndex, 2215 unsigned RegWidth) { 2216 // Symbols are only defined for GCN targets 2217 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2218 return true; 2219 2220 auto SymbolName = getGprCountSymbolName(RegKind); 2221 if (!SymbolName) 2222 return true; 2223 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2224 2225 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2226 int64_t OldCount; 2227 2228 if (!Sym->isVariable()) 2229 return !Error(getParser().getTok().getLoc(), 2230 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2231 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2232 return !Error( 2233 getParser().getTok().getLoc(), 2234 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2235 2236 if (OldCount <= NewMax) 2237 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2238 2239 return true; 2240 } 2241 2242 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2243 const auto &Tok = Parser.getTok(); 2244 SMLoc StartLoc = Tok.getLoc(); 2245 SMLoc EndLoc = Tok.getEndLoc(); 2246 RegisterKind RegKind; 2247 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2248 2249 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2250 //FIXME: improve error messages (bug 41303). 2251 Error(StartLoc, "not a valid operand."); 2252 return nullptr; 2253 } 2254 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2255 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2256 return nullptr; 2257 } else 2258 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2259 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2260 } 2261 2262 OperandMatchResultTy 2263 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2264 // TODO: add syntactic sugar for 1/(2*PI) 2265 2266 assert(!isRegister()); 2267 assert(!isModifier()); 2268 2269 const auto& Tok = getToken(); 2270 const auto& NextTok = peekToken(); 2271 bool IsReal = Tok.is(AsmToken::Real); 2272 SMLoc S = getLoc(); 2273 bool Negate = false; 2274 2275 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2276 lex(); 2277 IsReal = true; 2278 Negate = true; 2279 } 2280 2281 if (IsReal) { 2282 // Floating-point expressions are not supported. 2283 // Can only allow floating-point literals with an 2284 // optional sign. 2285 2286 StringRef Num = getTokenStr(); 2287 lex(); 2288 2289 APFloat RealVal(APFloat::IEEEdouble()); 2290 auto roundMode = APFloat::rmNearestTiesToEven; 2291 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2292 return MatchOperand_ParseFail; 2293 } 2294 if (Negate) 2295 RealVal.changeSign(); 2296 2297 Operands.push_back( 2298 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2299 AMDGPUOperand::ImmTyNone, true)); 2300 2301 return MatchOperand_Success; 2302 2303 } else { 2304 int64_t IntVal; 2305 const MCExpr *Expr; 2306 SMLoc S = getLoc(); 2307 2308 if (HasSP3AbsModifier) { 2309 // This is a workaround for handling expressions 2310 // as arguments of SP3 'abs' modifier, for example: 2311 // |1.0| 2312 // |-1| 2313 // |1+x| 2314 // This syntax is not compatible with syntax of standard 2315 // MC expressions (due to the trailing '|'). 2316 SMLoc EndLoc; 2317 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2318 return MatchOperand_ParseFail; 2319 } else { 2320 if (Parser.parseExpression(Expr)) 2321 return MatchOperand_ParseFail; 2322 } 2323 2324 if (Expr->evaluateAsAbsolute(IntVal)) { 2325 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2326 } else { 2327 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2328 } 2329 2330 return MatchOperand_Success; 2331 } 2332 2333 return MatchOperand_NoMatch; 2334 } 2335 2336 OperandMatchResultTy 2337 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2338 if (!isRegister()) 2339 return MatchOperand_NoMatch; 2340 2341 if (auto R = parseRegister()) { 2342 assert(R->isReg()); 2343 Operands.push_back(std::move(R)); 2344 return MatchOperand_Success; 2345 } 2346 return MatchOperand_ParseFail; 2347 } 2348 2349 OperandMatchResultTy 2350 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2351 auto res = parseReg(Operands); 2352 if (res != MatchOperand_NoMatch) { 2353 return res; 2354 } else if (isModifier()) { 2355 return MatchOperand_NoMatch; 2356 } else { 2357 return parseImm(Operands, HasSP3AbsMod); 2358 } 2359 } 2360 2361 bool 2362 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2363 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2364 const auto &str = Token.getString(); 2365 return str == "abs" || str == "neg" || str == "sext"; 2366 } 2367 return false; 2368 } 2369 2370 bool 2371 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2372 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2373 } 2374 2375 bool 2376 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2377 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2378 } 2379 2380 bool 2381 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2382 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2383 } 2384 2385 // Check if this is an operand modifier or an opcode modifier 2386 // which may look like an expression but it is not. We should 2387 // avoid parsing these modifiers as expressions. Currently 2388 // recognized sequences are: 2389 // |...| 2390 // abs(...) 2391 // neg(...) 2392 // sext(...) 2393 // -reg 2394 // -|...| 2395 // -abs(...) 2396 // name:... 2397 // Note that simple opcode modifiers like 'gds' may be parsed as 2398 // expressions; this is a special case. See getExpressionAsToken. 2399 // 2400 bool 2401 AMDGPUAsmParser::isModifier() { 2402 2403 AsmToken Tok = getToken(); 2404 AsmToken NextToken[2]; 2405 peekTokens(NextToken); 2406 2407 return isOperandModifier(Tok, NextToken[0]) || 2408 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2409 isOpcodeModifierWithVal(Tok, NextToken[0]); 2410 } 2411 2412 // Check if the current token is an SP3 'neg' modifier. 2413 // Currently this modifier is allowed in the following context: 2414 // 2415 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2416 // 2. Before an 'abs' modifier: -abs(...) 2417 // 3. Before an SP3 'abs' modifier: -|...| 2418 // 2419 // In all other cases "-" is handled as a part 2420 // of an expression that follows the sign. 2421 // 2422 // Note: When "-" is followed by an integer literal, 2423 // this is interpreted as integer negation rather 2424 // than a floating-point NEG modifier applied to N. 2425 // Beside being contr-intuitive, such use of floating-point 2426 // NEG modifier would have resulted in different meaning 2427 // of integer literals used with VOP1/2/C and VOP3, 2428 // for example: 2429 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2430 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2431 // Negative fp literals with preceding "-" are 2432 // handled likewise for unifomtity 2433 // 2434 bool 2435 AMDGPUAsmParser::parseSP3NegModifier() { 2436 2437 AsmToken NextToken[2]; 2438 peekTokens(NextToken); 2439 2440 if (isToken(AsmToken::Minus) && 2441 (isRegister(NextToken[0], NextToken[1]) || 2442 NextToken[0].is(AsmToken::Pipe) || 2443 isId(NextToken[0], "abs"))) { 2444 lex(); 2445 return true; 2446 } 2447 2448 return false; 2449 } 2450 2451 OperandMatchResultTy 2452 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2453 bool AllowImm) { 2454 bool Neg, SP3Neg; 2455 bool Abs, SP3Abs; 2456 SMLoc Loc; 2457 2458 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2459 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2460 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2461 return MatchOperand_ParseFail; 2462 } 2463 2464 SP3Neg = parseSP3NegModifier(); 2465 2466 Loc = getLoc(); 2467 Neg = trySkipId("neg"); 2468 if (Neg && SP3Neg) { 2469 Error(Loc, "expected register or immediate"); 2470 return MatchOperand_ParseFail; 2471 } 2472 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2473 return MatchOperand_ParseFail; 2474 2475 Abs = trySkipId("abs"); 2476 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2477 return MatchOperand_ParseFail; 2478 2479 Loc = getLoc(); 2480 SP3Abs = trySkipToken(AsmToken::Pipe); 2481 if (Abs && SP3Abs) { 2482 Error(Loc, "expected register or immediate"); 2483 return MatchOperand_ParseFail; 2484 } 2485 2486 OperandMatchResultTy Res; 2487 if (AllowImm) { 2488 Res = parseRegOrImm(Operands, SP3Abs); 2489 } else { 2490 Res = parseReg(Operands); 2491 } 2492 if (Res != MatchOperand_Success) { 2493 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2494 } 2495 2496 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2497 return MatchOperand_ParseFail; 2498 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2499 return MatchOperand_ParseFail; 2500 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2501 return MatchOperand_ParseFail; 2502 2503 AMDGPUOperand::Modifiers Mods; 2504 Mods.Abs = Abs || SP3Abs; 2505 Mods.Neg = Neg || SP3Neg; 2506 2507 if (Mods.hasFPModifiers()) { 2508 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2509 if (Op.isExpr()) { 2510 Error(Op.getStartLoc(), "expected an absolute expression"); 2511 return MatchOperand_ParseFail; 2512 } 2513 Op.setModifiers(Mods); 2514 } 2515 return MatchOperand_Success; 2516 } 2517 2518 OperandMatchResultTy 2519 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2520 bool AllowImm) { 2521 bool Sext = trySkipId("sext"); 2522 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2523 return MatchOperand_ParseFail; 2524 2525 OperandMatchResultTy Res; 2526 if (AllowImm) { 2527 Res = parseRegOrImm(Operands); 2528 } else { 2529 Res = parseReg(Operands); 2530 } 2531 if (Res != MatchOperand_Success) { 2532 return Sext? MatchOperand_ParseFail : Res; 2533 } 2534 2535 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2536 return MatchOperand_ParseFail; 2537 2538 AMDGPUOperand::Modifiers Mods; 2539 Mods.Sext = Sext; 2540 2541 if (Mods.hasIntModifiers()) { 2542 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2543 if (Op.isExpr()) { 2544 Error(Op.getStartLoc(), "expected an absolute expression"); 2545 return MatchOperand_ParseFail; 2546 } 2547 Op.setModifiers(Mods); 2548 } 2549 2550 return MatchOperand_Success; 2551 } 2552 2553 OperandMatchResultTy 2554 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2555 return parseRegOrImmWithFPInputMods(Operands, false); 2556 } 2557 2558 OperandMatchResultTy 2559 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2560 return parseRegOrImmWithIntInputMods(Operands, false); 2561 } 2562 2563 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2564 auto Loc = getLoc(); 2565 if (trySkipId("off")) { 2566 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2567 AMDGPUOperand::ImmTyOff, false)); 2568 return MatchOperand_Success; 2569 } 2570 2571 if (!isRegister()) 2572 return MatchOperand_NoMatch; 2573 2574 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2575 if (Reg) { 2576 Operands.push_back(std::move(Reg)); 2577 return MatchOperand_Success; 2578 } 2579 2580 return MatchOperand_ParseFail; 2581 2582 } 2583 2584 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2585 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2586 2587 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2588 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2589 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2590 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2591 return Match_InvalidOperand; 2592 2593 if ((TSFlags & SIInstrFlags::VOP3) && 2594 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2595 getForcedEncodingSize() != 64) 2596 return Match_PreferE32; 2597 2598 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2599 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2600 // v_mac_f32/16 allow only dst_sel == DWORD; 2601 auto OpNum = 2602 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2603 const auto &Op = Inst.getOperand(OpNum); 2604 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2605 return Match_InvalidOperand; 2606 } 2607 } 2608 2609 return Match_Success; 2610 } 2611 2612 // What asm variants we should check 2613 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2614 if (getForcedEncodingSize() == 32) { 2615 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2616 return makeArrayRef(Variants); 2617 } 2618 2619 if (isForcedVOP3()) { 2620 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2621 return makeArrayRef(Variants); 2622 } 2623 2624 if (isForcedSDWA()) { 2625 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2626 AMDGPUAsmVariants::SDWA9}; 2627 return makeArrayRef(Variants); 2628 } 2629 2630 if (isForcedDPP()) { 2631 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2632 return makeArrayRef(Variants); 2633 } 2634 2635 static const unsigned Variants[] = { 2636 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2637 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2638 }; 2639 2640 return makeArrayRef(Variants); 2641 } 2642 2643 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2644 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2645 const unsigned Num = Desc.getNumImplicitUses(); 2646 for (unsigned i = 0; i < Num; ++i) { 2647 unsigned Reg = Desc.ImplicitUses[i]; 2648 switch (Reg) { 2649 case AMDGPU::FLAT_SCR: 2650 case AMDGPU::VCC: 2651 case AMDGPU::VCC_LO: 2652 case AMDGPU::VCC_HI: 2653 case AMDGPU::M0: 2654 return Reg; 2655 default: 2656 break; 2657 } 2658 } 2659 return AMDGPU::NoRegister; 2660 } 2661 2662 // NB: This code is correct only when used to check constant 2663 // bus limitations because GFX7 support no f16 inline constants. 2664 // Note that there are no cases when a GFX7 opcode violates 2665 // constant bus limitations due to the use of an f16 constant. 2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2667 unsigned OpIdx) const { 2668 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2669 2670 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2671 return false; 2672 } 2673 2674 const MCOperand &MO = Inst.getOperand(OpIdx); 2675 2676 int64_t Val = MO.getImm(); 2677 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2678 2679 switch (OpSize) { // expected operand size 2680 case 8: 2681 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2682 case 4: 2683 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2684 case 2: { 2685 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2686 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2687 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2688 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2689 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2690 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2691 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2692 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2693 } else { 2694 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2695 } 2696 } 2697 default: 2698 llvm_unreachable("invalid operand size"); 2699 } 2700 } 2701 2702 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2703 if (!isGFX10()) 2704 return 1; 2705 2706 switch (Opcode) { 2707 // 64-bit shift instructions can use only one scalar value input 2708 case AMDGPU::V_LSHLREV_B64: 2709 case AMDGPU::V_LSHLREV_B64_gfx10: 2710 case AMDGPU::V_LSHL_B64: 2711 case AMDGPU::V_LSHRREV_B64: 2712 case AMDGPU::V_LSHRREV_B64_gfx10: 2713 case AMDGPU::V_LSHR_B64: 2714 case AMDGPU::V_ASHRREV_I64: 2715 case AMDGPU::V_ASHRREV_I64_gfx10: 2716 case AMDGPU::V_ASHR_I64: 2717 return 1; 2718 default: 2719 return 2; 2720 } 2721 } 2722 2723 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2724 const MCOperand &MO = Inst.getOperand(OpIdx); 2725 if (MO.isImm()) { 2726 return !isInlineConstant(Inst, OpIdx); 2727 } else if (MO.isReg()) { 2728 auto Reg = MO.getReg(); 2729 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2730 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2731 } else { 2732 return true; 2733 } 2734 } 2735 2736 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2737 const unsigned Opcode = Inst.getOpcode(); 2738 const MCInstrDesc &Desc = MII.get(Opcode); 2739 unsigned ConstantBusUseCount = 0; 2740 unsigned NumLiterals = 0; 2741 unsigned LiteralSize; 2742 2743 if (Desc.TSFlags & 2744 (SIInstrFlags::VOPC | 2745 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2746 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2747 SIInstrFlags::SDWA)) { 2748 // Check special imm operands (used by madmk, etc) 2749 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2750 ++ConstantBusUseCount; 2751 } 2752 2753 SmallDenseSet<unsigned> SGPRsUsed; 2754 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2755 if (SGPRUsed != AMDGPU::NoRegister) { 2756 SGPRsUsed.insert(SGPRUsed); 2757 ++ConstantBusUseCount; 2758 } 2759 2760 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2761 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2762 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2763 2764 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2765 2766 for (int OpIdx : OpIndices) { 2767 if (OpIdx == -1) break; 2768 2769 const MCOperand &MO = Inst.getOperand(OpIdx); 2770 if (usesConstantBus(Inst, OpIdx)) { 2771 if (MO.isReg()) { 2772 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2773 // Pairs of registers with a partial intersections like these 2774 // s0, s[0:1] 2775 // flat_scratch_lo, flat_scratch 2776 // flat_scratch_lo, flat_scratch_hi 2777 // are theoretically valid but they are disabled anyway. 2778 // Note that this code mimics SIInstrInfo::verifyInstruction 2779 if (!SGPRsUsed.count(Reg)) { 2780 SGPRsUsed.insert(Reg); 2781 ++ConstantBusUseCount; 2782 } 2783 } else { // Expression or a literal 2784 2785 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2786 continue; // special operand like VINTERP attr_chan 2787 2788 // An instruction may use only one literal. 2789 // This has been validated on the previous step. 2790 // See validateVOP3Literal. 2791 // This literal may be used as more than one operand. 2792 // If all these operands are of the same size, 2793 // this literal counts as one scalar value. 2794 // Otherwise it counts as 2 scalar values. 2795 // See "GFX10 Shader Programming", section 3.6.2.3. 2796 2797 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2798 if (Size < 4) Size = 4; 2799 2800 if (NumLiterals == 0) { 2801 NumLiterals = 1; 2802 LiteralSize = Size; 2803 } else if (LiteralSize != Size) { 2804 NumLiterals = 2; 2805 } 2806 } 2807 } 2808 } 2809 } 2810 ConstantBusUseCount += NumLiterals; 2811 2812 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2813 } 2814 2815 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2816 const unsigned Opcode = Inst.getOpcode(); 2817 const MCInstrDesc &Desc = MII.get(Opcode); 2818 2819 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2820 if (DstIdx == -1 || 2821 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2822 return true; 2823 } 2824 2825 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2826 2827 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2828 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2829 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2830 2831 assert(DstIdx != -1); 2832 const MCOperand &Dst = Inst.getOperand(DstIdx); 2833 assert(Dst.isReg()); 2834 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2835 2836 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2837 2838 for (int SrcIdx : SrcIndices) { 2839 if (SrcIdx == -1) break; 2840 const MCOperand &Src = Inst.getOperand(SrcIdx); 2841 if (Src.isReg()) { 2842 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2843 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2844 return false; 2845 } 2846 } 2847 } 2848 2849 return true; 2850 } 2851 2852 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2853 2854 const unsigned Opc = Inst.getOpcode(); 2855 const MCInstrDesc &Desc = MII.get(Opc); 2856 2857 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2858 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2859 assert(ClampIdx != -1); 2860 return Inst.getOperand(ClampIdx).getImm() == 0; 2861 } 2862 2863 return true; 2864 } 2865 2866 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2867 2868 const unsigned Opc = Inst.getOpcode(); 2869 const MCInstrDesc &Desc = MII.get(Opc); 2870 2871 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2872 return true; 2873 2874 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2875 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2876 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2877 2878 assert(VDataIdx != -1); 2879 assert(DMaskIdx != -1); 2880 assert(TFEIdx != -1); 2881 2882 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2883 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2884 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2885 if (DMask == 0) 2886 DMask = 1; 2887 2888 unsigned DataSize = 2889 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2890 if (hasPackedD16()) { 2891 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2892 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2893 DataSize = (DataSize + 1) / 2; 2894 } 2895 2896 return (VDataSize / 4) == DataSize + TFESize; 2897 } 2898 2899 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2900 const unsigned Opc = Inst.getOpcode(); 2901 const MCInstrDesc &Desc = MII.get(Opc); 2902 2903 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2904 return true; 2905 2906 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2907 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2908 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2909 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2910 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2911 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2912 2913 assert(VAddr0Idx != -1); 2914 assert(SrsrcIdx != -1); 2915 assert(DimIdx != -1); 2916 assert(SrsrcIdx > VAddr0Idx); 2917 2918 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2919 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2920 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2921 unsigned VAddrSize = 2922 IsNSA ? SrsrcIdx - VAddr0Idx 2923 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2924 2925 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2926 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2927 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2928 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2929 if (!IsNSA) { 2930 if (AddrSize > 8) 2931 AddrSize = 16; 2932 else if (AddrSize > 4) 2933 AddrSize = 8; 2934 } 2935 2936 return VAddrSize == AddrSize; 2937 } 2938 2939 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2940 2941 const unsigned Opc = Inst.getOpcode(); 2942 const MCInstrDesc &Desc = MII.get(Opc); 2943 2944 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2945 return true; 2946 if (!Desc.mayLoad() || !Desc.mayStore()) 2947 return true; // Not atomic 2948 2949 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2950 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2951 2952 // This is an incomplete check because image_atomic_cmpswap 2953 // may only use 0x3 and 0xf while other atomic operations 2954 // may use 0x1 and 0x3. However these limitations are 2955 // verified when we check that dmask matches dst size. 2956 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2957 } 2958 2959 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2960 2961 const unsigned Opc = Inst.getOpcode(); 2962 const MCInstrDesc &Desc = MII.get(Opc); 2963 2964 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2965 return true; 2966 2967 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2968 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2969 2970 // GATHER4 instructions use dmask in a different fashion compared to 2971 // other MIMG instructions. The only useful DMASK values are 2972 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2973 // (red,red,red,red) etc.) The ISA document doesn't mention 2974 // this. 2975 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2976 } 2977 2978 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2979 2980 const unsigned Opc = Inst.getOpcode(); 2981 const MCInstrDesc &Desc = MII.get(Opc); 2982 2983 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2984 return true; 2985 2986 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2987 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2988 if (isCI() || isSI()) 2989 return false; 2990 } 2991 2992 return true; 2993 } 2994 2995 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2996 const unsigned Opc = Inst.getOpcode(); 2997 const MCInstrDesc &Desc = MII.get(Opc); 2998 2999 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3000 return true; 3001 3002 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3003 if (DimIdx < 0) 3004 return true; 3005 3006 long Imm = Inst.getOperand(DimIdx).getImm(); 3007 if (Imm < 0 || Imm >= 8) 3008 return false; 3009 3010 return true; 3011 } 3012 3013 static bool IsRevOpcode(const unsigned Opcode) 3014 { 3015 switch (Opcode) { 3016 case AMDGPU::V_SUBREV_F32_e32: 3017 case AMDGPU::V_SUBREV_F32_e64: 3018 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3019 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3020 case AMDGPU::V_SUBREV_F32_e32_vi: 3021 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3022 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3023 case AMDGPU::V_SUBREV_F32_e64_vi: 3024 3025 case AMDGPU::V_SUBREV_I32_e32: 3026 case AMDGPU::V_SUBREV_I32_e64: 3027 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3028 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3029 3030 case AMDGPU::V_SUBBREV_U32_e32: 3031 case AMDGPU::V_SUBBREV_U32_e64: 3032 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3033 case AMDGPU::V_SUBBREV_U32_e32_vi: 3034 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3035 case AMDGPU::V_SUBBREV_U32_e64_vi: 3036 3037 case AMDGPU::V_SUBREV_U32_e32: 3038 case AMDGPU::V_SUBREV_U32_e64: 3039 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3040 case AMDGPU::V_SUBREV_U32_e32_vi: 3041 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3042 case AMDGPU::V_SUBREV_U32_e64_vi: 3043 3044 case AMDGPU::V_SUBREV_F16_e32: 3045 case AMDGPU::V_SUBREV_F16_e64: 3046 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3047 case AMDGPU::V_SUBREV_F16_e32_vi: 3048 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3049 case AMDGPU::V_SUBREV_F16_e64_vi: 3050 3051 case AMDGPU::V_SUBREV_U16_e32: 3052 case AMDGPU::V_SUBREV_U16_e64: 3053 case AMDGPU::V_SUBREV_U16_e32_vi: 3054 case AMDGPU::V_SUBREV_U16_e64_vi: 3055 3056 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3057 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3058 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3059 3060 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3061 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3062 3063 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3064 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3065 3066 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3067 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3068 3069 case AMDGPU::V_LSHRREV_B32_e32: 3070 case AMDGPU::V_LSHRREV_B32_e64: 3071 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3072 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3073 case AMDGPU::V_LSHRREV_B32_e32_vi: 3074 case AMDGPU::V_LSHRREV_B32_e64_vi: 3075 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3076 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3077 3078 case AMDGPU::V_ASHRREV_I32_e32: 3079 case AMDGPU::V_ASHRREV_I32_e64: 3080 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3081 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3082 case AMDGPU::V_ASHRREV_I32_e32_vi: 3083 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3084 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3085 case AMDGPU::V_ASHRREV_I32_e64_vi: 3086 3087 case AMDGPU::V_LSHLREV_B32_e32: 3088 case AMDGPU::V_LSHLREV_B32_e64: 3089 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3090 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3091 case AMDGPU::V_LSHLREV_B32_e32_vi: 3092 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3093 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3094 case AMDGPU::V_LSHLREV_B32_e64_vi: 3095 3096 case AMDGPU::V_LSHLREV_B16_e32: 3097 case AMDGPU::V_LSHLREV_B16_e64: 3098 case AMDGPU::V_LSHLREV_B16_e32_vi: 3099 case AMDGPU::V_LSHLREV_B16_e64_vi: 3100 case AMDGPU::V_LSHLREV_B16_gfx10: 3101 3102 case AMDGPU::V_LSHRREV_B16_e32: 3103 case AMDGPU::V_LSHRREV_B16_e64: 3104 case AMDGPU::V_LSHRREV_B16_e32_vi: 3105 case AMDGPU::V_LSHRREV_B16_e64_vi: 3106 case AMDGPU::V_LSHRREV_B16_gfx10: 3107 3108 case AMDGPU::V_ASHRREV_I16_e32: 3109 case AMDGPU::V_ASHRREV_I16_e64: 3110 case AMDGPU::V_ASHRREV_I16_e32_vi: 3111 case AMDGPU::V_ASHRREV_I16_e64_vi: 3112 case AMDGPU::V_ASHRREV_I16_gfx10: 3113 3114 case AMDGPU::V_LSHLREV_B64: 3115 case AMDGPU::V_LSHLREV_B64_gfx10: 3116 case AMDGPU::V_LSHLREV_B64_vi: 3117 3118 case AMDGPU::V_LSHRREV_B64: 3119 case AMDGPU::V_LSHRREV_B64_gfx10: 3120 case AMDGPU::V_LSHRREV_B64_vi: 3121 3122 case AMDGPU::V_ASHRREV_I64: 3123 case AMDGPU::V_ASHRREV_I64_gfx10: 3124 case AMDGPU::V_ASHRREV_I64_vi: 3125 3126 case AMDGPU::V_PK_LSHLREV_B16: 3127 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3128 case AMDGPU::V_PK_LSHLREV_B16_vi: 3129 3130 case AMDGPU::V_PK_LSHRREV_B16: 3131 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3132 case AMDGPU::V_PK_LSHRREV_B16_vi: 3133 case AMDGPU::V_PK_ASHRREV_I16: 3134 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3135 case AMDGPU::V_PK_ASHRREV_I16_vi: 3136 return true; 3137 default: 3138 return false; 3139 } 3140 } 3141 3142 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3143 3144 using namespace SIInstrFlags; 3145 const unsigned Opcode = Inst.getOpcode(); 3146 const MCInstrDesc &Desc = MII.get(Opcode); 3147 3148 // lds_direct register is defined so that it can be used 3149 // with 9-bit operands only. Ignore encodings which do not accept these. 3150 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3151 return true; 3152 3153 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3154 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3155 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3156 3157 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3158 3159 // lds_direct cannot be specified as either src1 or src2. 3160 for (int SrcIdx : SrcIndices) { 3161 if (SrcIdx == -1) break; 3162 const MCOperand &Src = Inst.getOperand(SrcIdx); 3163 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3164 return false; 3165 } 3166 } 3167 3168 if (Src0Idx == -1) 3169 return true; 3170 3171 const MCOperand &Src = Inst.getOperand(Src0Idx); 3172 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3173 return true; 3174 3175 // lds_direct is specified as src0. Check additional limitations. 3176 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3177 } 3178 3179 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3180 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3181 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3182 if (Op.isFlatOffset()) 3183 return Op.getStartLoc(); 3184 } 3185 return getLoc(); 3186 } 3187 3188 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3189 const OperandVector &Operands) { 3190 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3191 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3192 return true; 3193 3194 auto Opcode = Inst.getOpcode(); 3195 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3196 assert(OpNum != -1); 3197 3198 const auto &Op = Inst.getOperand(OpNum); 3199 if (!hasFlatOffsets() && Op.getImm() != 0) { 3200 Error(getFlatOffsetLoc(Operands), 3201 "flat offset modifier is not supported on this GPU"); 3202 return false; 3203 } 3204 3205 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3206 // For FLAT segment the offset must be positive; 3207 // MSB is ignored and forced to zero. 3208 unsigned OffsetSize = isGFX9() ? 13 : 12; 3209 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3210 if (!isIntN(OffsetSize, Op.getImm())) { 3211 Error(getFlatOffsetLoc(Operands), 3212 isGFX9() ? "expected a 13-bit signed offset" : 3213 "expected a 12-bit signed offset"); 3214 return false; 3215 } 3216 } else { 3217 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3218 Error(getFlatOffsetLoc(Operands), 3219 isGFX9() ? "expected a 12-bit unsigned offset" : 3220 "expected an 11-bit unsigned offset"); 3221 return false; 3222 } 3223 } 3224 3225 return true; 3226 } 3227 3228 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3229 unsigned Opcode = Inst.getOpcode(); 3230 const MCInstrDesc &Desc = MII.get(Opcode); 3231 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3232 return true; 3233 3234 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3235 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3236 3237 const int OpIndices[] = { Src0Idx, Src1Idx }; 3238 3239 unsigned NumExprs = 0; 3240 unsigned NumLiterals = 0; 3241 uint32_t LiteralValue; 3242 3243 for (int OpIdx : OpIndices) { 3244 if (OpIdx == -1) break; 3245 3246 const MCOperand &MO = Inst.getOperand(OpIdx); 3247 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3248 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3249 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3250 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3251 if (NumLiterals == 0 || LiteralValue != Value) { 3252 LiteralValue = Value; 3253 ++NumLiterals; 3254 } 3255 } else if (MO.isExpr()) { 3256 ++NumExprs; 3257 } 3258 } 3259 } 3260 3261 return NumLiterals + NumExprs <= 1; 3262 } 3263 3264 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3265 const unsigned Opc = Inst.getOpcode(); 3266 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3267 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3268 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3269 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3270 3271 if (OpSel & ~3) 3272 return false; 3273 } 3274 return true; 3275 } 3276 3277 // Check if VCC register matches wavefront size 3278 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3279 auto FB = getFeatureBits(); 3280 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3281 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3282 } 3283 3284 // VOP3 literal is only allowed in GFX10+ and only one can be used 3285 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3286 unsigned Opcode = Inst.getOpcode(); 3287 const MCInstrDesc &Desc = MII.get(Opcode); 3288 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3289 return true; 3290 3291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3294 3295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3296 3297 unsigned NumExprs = 0; 3298 unsigned NumLiterals = 0; 3299 uint32_t LiteralValue; 3300 3301 for (int OpIdx : OpIndices) { 3302 if (OpIdx == -1) break; 3303 3304 const MCOperand &MO = Inst.getOperand(OpIdx); 3305 if (!MO.isImm() && !MO.isExpr()) 3306 continue; 3307 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3308 continue; 3309 3310 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3311 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3312 return false; 3313 3314 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3315 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3316 if (NumLiterals == 0 || LiteralValue != Value) { 3317 LiteralValue = Value; 3318 ++NumLiterals; 3319 } 3320 } else if (MO.isExpr()) { 3321 ++NumExprs; 3322 } 3323 } 3324 NumLiterals += NumExprs; 3325 3326 return !NumLiterals || 3327 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3328 } 3329 3330 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3331 const SMLoc &IDLoc, 3332 const OperandVector &Operands) { 3333 if (!validateLdsDirect(Inst)) { 3334 Error(IDLoc, 3335 "invalid use of lds_direct"); 3336 return false; 3337 } 3338 if (!validateSOPLiteral(Inst)) { 3339 Error(IDLoc, 3340 "only one literal operand is allowed"); 3341 return false; 3342 } 3343 if (!validateVOP3Literal(Inst)) { 3344 Error(IDLoc, 3345 "invalid literal operand"); 3346 return false; 3347 } 3348 if (!validateConstantBusLimitations(Inst)) { 3349 Error(IDLoc, 3350 "invalid operand (violates constant bus restrictions)"); 3351 return false; 3352 } 3353 if (!validateEarlyClobberLimitations(Inst)) { 3354 Error(IDLoc, 3355 "destination must be different than all sources"); 3356 return false; 3357 } 3358 if (!validateIntClampSupported(Inst)) { 3359 Error(IDLoc, 3360 "integer clamping is not supported on this GPU"); 3361 return false; 3362 } 3363 if (!validateOpSel(Inst)) { 3364 Error(IDLoc, 3365 "invalid op_sel operand"); 3366 return false; 3367 } 3368 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3369 if (!validateMIMGD16(Inst)) { 3370 Error(IDLoc, 3371 "d16 modifier is not supported on this GPU"); 3372 return false; 3373 } 3374 if (!validateMIMGDim(Inst)) { 3375 Error(IDLoc, "dim modifier is required on this GPU"); 3376 return false; 3377 } 3378 if (!validateMIMGDataSize(Inst)) { 3379 Error(IDLoc, 3380 "image data size does not match dmask and tfe"); 3381 return false; 3382 } 3383 if (!validateMIMGAddrSize(Inst)) { 3384 Error(IDLoc, 3385 "image address size does not match dim and a16"); 3386 return false; 3387 } 3388 if (!validateMIMGAtomicDMask(Inst)) { 3389 Error(IDLoc, 3390 "invalid atomic image dmask"); 3391 return false; 3392 } 3393 if (!validateMIMGGatherDMask(Inst)) { 3394 Error(IDLoc, 3395 "invalid image_gather dmask: only one bit must be set"); 3396 return false; 3397 } 3398 if (!validateFlatOffset(Inst, Operands)) { 3399 return false; 3400 } 3401 3402 return true; 3403 } 3404 3405 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3406 const FeatureBitset &FBS, 3407 unsigned VariantID = 0); 3408 3409 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3410 OperandVector &Operands, 3411 MCStreamer &Out, 3412 uint64_t &ErrorInfo, 3413 bool MatchingInlineAsm) { 3414 MCInst Inst; 3415 unsigned Result = Match_Success; 3416 for (auto Variant : getMatchedVariants()) { 3417 uint64_t EI; 3418 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3419 Variant); 3420 // We order match statuses from least to most specific. We use most specific 3421 // status as resulting 3422 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3423 if ((R == Match_Success) || 3424 (R == Match_PreferE32) || 3425 (R == Match_MissingFeature && Result != Match_PreferE32) || 3426 (R == Match_InvalidOperand && Result != Match_MissingFeature 3427 && Result != Match_PreferE32) || 3428 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3429 && Result != Match_MissingFeature 3430 && Result != Match_PreferE32)) { 3431 Result = R; 3432 ErrorInfo = EI; 3433 } 3434 if (R == Match_Success) 3435 break; 3436 } 3437 3438 switch (Result) { 3439 default: break; 3440 case Match_Success: 3441 if (!validateInstruction(Inst, IDLoc, Operands)) { 3442 return true; 3443 } 3444 Inst.setLoc(IDLoc); 3445 Out.EmitInstruction(Inst, getSTI()); 3446 return false; 3447 3448 case Match_MissingFeature: 3449 return Error(IDLoc, "instruction not supported on this GPU"); 3450 3451 case Match_MnemonicFail: { 3452 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3453 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3454 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3455 return Error(IDLoc, "invalid instruction" + Suggestion, 3456 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3457 } 3458 3459 case Match_InvalidOperand: { 3460 SMLoc ErrorLoc = IDLoc; 3461 if (ErrorInfo != ~0ULL) { 3462 if (ErrorInfo >= Operands.size()) { 3463 return Error(IDLoc, "too few operands for instruction"); 3464 } 3465 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3466 if (ErrorLoc == SMLoc()) 3467 ErrorLoc = IDLoc; 3468 } 3469 return Error(ErrorLoc, "invalid operand for instruction"); 3470 } 3471 3472 case Match_PreferE32: 3473 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3474 "should be encoded as e32"); 3475 } 3476 llvm_unreachable("Implement any new match types added!"); 3477 } 3478 3479 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3480 int64_t Tmp = -1; 3481 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3482 return true; 3483 } 3484 if (getParser().parseAbsoluteExpression(Tmp)) { 3485 return true; 3486 } 3487 Ret = static_cast<uint32_t>(Tmp); 3488 return false; 3489 } 3490 3491 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3492 uint32_t &Minor) { 3493 if (ParseAsAbsoluteExpression(Major)) 3494 return TokError("invalid major version"); 3495 3496 if (getLexer().isNot(AsmToken::Comma)) 3497 return TokError("minor version number required, comma expected"); 3498 Lex(); 3499 3500 if (ParseAsAbsoluteExpression(Minor)) 3501 return TokError("invalid minor version"); 3502 3503 return false; 3504 } 3505 3506 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3507 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3508 return TokError("directive only supported for amdgcn architecture"); 3509 3510 std::string Target; 3511 3512 SMLoc TargetStart = getTok().getLoc(); 3513 if (getParser().parseEscapedString(Target)) 3514 return true; 3515 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3516 3517 std::string ExpectedTarget; 3518 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3519 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3520 3521 if (Target != ExpectedTargetOS.str()) 3522 return getParser().Error(TargetRange.Start, "target must match options", 3523 TargetRange); 3524 3525 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3526 return false; 3527 } 3528 3529 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3530 return getParser().Error(Range.Start, "value out of range", Range); 3531 } 3532 3533 bool AMDGPUAsmParser::calculateGPRBlocks( 3534 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3535 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3536 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3537 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3538 // TODO(scott.linder): These calculations are duplicated from 3539 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3540 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3541 3542 unsigned NumVGPRs = NextFreeVGPR; 3543 unsigned NumSGPRs = NextFreeSGPR; 3544 3545 if (Version.Major >= 10) 3546 NumSGPRs = 0; 3547 else { 3548 unsigned MaxAddressableNumSGPRs = 3549 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3550 3551 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3552 NumSGPRs > MaxAddressableNumSGPRs) 3553 return OutOfRangeError(SGPRRange); 3554 3555 NumSGPRs += 3556 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3557 3558 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3559 NumSGPRs > MaxAddressableNumSGPRs) 3560 return OutOfRangeError(SGPRRange); 3561 3562 if (Features.test(FeatureSGPRInitBug)) 3563 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3564 } 3565 3566 VGPRBlocks = 3567 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3568 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3569 3570 return false; 3571 } 3572 3573 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3574 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3575 return TokError("directive only supported for amdgcn architecture"); 3576 3577 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3578 return TokError("directive only supported for amdhsa OS"); 3579 3580 StringRef KernelName; 3581 if (getParser().parseIdentifier(KernelName)) 3582 return true; 3583 3584 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3585 3586 StringSet<> Seen; 3587 3588 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3589 3590 SMRange VGPRRange; 3591 uint64_t NextFreeVGPR = 0; 3592 SMRange SGPRRange; 3593 uint64_t NextFreeSGPR = 0; 3594 unsigned UserSGPRCount = 0; 3595 bool ReserveVCC = true; 3596 bool ReserveFlatScr = true; 3597 bool ReserveXNACK = hasXNACK(); 3598 Optional<bool> EnableWavefrontSize32; 3599 3600 while (true) { 3601 while (getLexer().is(AsmToken::EndOfStatement)) 3602 Lex(); 3603 3604 if (getLexer().isNot(AsmToken::Identifier)) 3605 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3606 3607 StringRef ID = getTok().getIdentifier(); 3608 SMRange IDRange = getTok().getLocRange(); 3609 Lex(); 3610 3611 if (ID == ".end_amdhsa_kernel") 3612 break; 3613 3614 if (Seen.find(ID) != Seen.end()) 3615 return TokError(".amdhsa_ directives cannot be repeated"); 3616 Seen.insert(ID); 3617 3618 SMLoc ValStart = getTok().getLoc(); 3619 int64_t IVal; 3620 if (getParser().parseAbsoluteExpression(IVal)) 3621 return true; 3622 SMLoc ValEnd = getTok().getLoc(); 3623 SMRange ValRange = SMRange(ValStart, ValEnd); 3624 3625 if (IVal < 0) 3626 return OutOfRangeError(ValRange); 3627 3628 uint64_t Val = IVal; 3629 3630 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3631 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3632 return OutOfRangeError(RANGE); \ 3633 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3634 3635 if (ID == ".amdhsa_group_segment_fixed_size") { 3636 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3637 return OutOfRangeError(ValRange); 3638 KD.group_segment_fixed_size = Val; 3639 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3640 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3641 return OutOfRangeError(ValRange); 3642 KD.private_segment_fixed_size = Val; 3643 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3644 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3645 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3646 Val, ValRange); 3647 if (Val) 3648 UserSGPRCount += 4; 3649 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3650 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3651 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3652 ValRange); 3653 if (Val) 3654 UserSGPRCount += 2; 3655 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3656 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3657 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3658 ValRange); 3659 if (Val) 3660 UserSGPRCount += 2; 3661 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3662 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3663 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3664 Val, ValRange); 3665 if (Val) 3666 UserSGPRCount += 2; 3667 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3668 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3669 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3670 ValRange); 3671 if (Val) 3672 UserSGPRCount += 2; 3673 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3674 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3675 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3676 ValRange); 3677 if (Val) 3678 UserSGPRCount += 2; 3679 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3680 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3682 Val, ValRange); 3683 if (Val) 3684 UserSGPRCount += 1; 3685 } else if (ID == ".amdhsa_wavefront_size32") { 3686 if (IVersion.Major < 10) 3687 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3688 IDRange); 3689 EnableWavefrontSize32 = Val; 3690 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3691 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3692 Val, ValRange); 3693 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3694 PARSE_BITS_ENTRY( 3695 KD.compute_pgm_rsrc2, 3696 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3697 ValRange); 3698 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3699 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3700 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3701 ValRange); 3702 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3703 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3704 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3705 ValRange); 3706 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3707 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3708 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3709 ValRange); 3710 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3711 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3712 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3713 ValRange); 3714 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3715 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3716 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3717 ValRange); 3718 } else if (ID == ".amdhsa_next_free_vgpr") { 3719 VGPRRange = ValRange; 3720 NextFreeVGPR = Val; 3721 } else if (ID == ".amdhsa_next_free_sgpr") { 3722 SGPRRange = ValRange; 3723 NextFreeSGPR = Val; 3724 } else if (ID == ".amdhsa_reserve_vcc") { 3725 if (!isUInt<1>(Val)) 3726 return OutOfRangeError(ValRange); 3727 ReserveVCC = Val; 3728 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3729 if (IVersion.Major < 7) 3730 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3731 IDRange); 3732 if (!isUInt<1>(Val)) 3733 return OutOfRangeError(ValRange); 3734 ReserveFlatScr = Val; 3735 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3736 if (IVersion.Major < 8) 3737 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3738 IDRange); 3739 if (!isUInt<1>(Val)) 3740 return OutOfRangeError(ValRange); 3741 ReserveXNACK = Val; 3742 } else if (ID == ".amdhsa_float_round_mode_32") { 3743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3744 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3745 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3746 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3747 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3748 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3749 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3750 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3751 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3752 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3753 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3754 ValRange); 3755 } else if (ID == ".amdhsa_dx10_clamp") { 3756 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3757 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3758 } else if (ID == ".amdhsa_ieee_mode") { 3759 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3760 Val, ValRange); 3761 } else if (ID == ".amdhsa_fp16_overflow") { 3762 if (IVersion.Major < 9) 3763 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3764 IDRange); 3765 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3766 ValRange); 3767 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3768 if (IVersion.Major < 10) 3769 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3770 IDRange); 3771 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3772 ValRange); 3773 } else if (ID == ".amdhsa_memory_ordered") { 3774 if (IVersion.Major < 10) 3775 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3776 IDRange); 3777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3778 ValRange); 3779 } else if (ID == ".amdhsa_forward_progress") { 3780 if (IVersion.Major < 10) 3781 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3782 IDRange); 3783 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3784 ValRange); 3785 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3786 PARSE_BITS_ENTRY( 3787 KD.compute_pgm_rsrc2, 3788 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3789 ValRange); 3790 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3791 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3792 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3793 Val, ValRange); 3794 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3795 PARSE_BITS_ENTRY( 3796 KD.compute_pgm_rsrc2, 3797 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3798 ValRange); 3799 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3800 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3801 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3802 Val, ValRange); 3803 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3804 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3805 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3806 Val, ValRange); 3807 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3808 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3809 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3810 Val, ValRange); 3811 } else if (ID == ".amdhsa_exception_int_div_zero") { 3812 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3813 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3814 Val, ValRange); 3815 } else { 3816 return getParser().Error(IDRange.Start, 3817 "unknown .amdhsa_kernel directive", IDRange); 3818 } 3819 3820 #undef PARSE_BITS_ENTRY 3821 } 3822 3823 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3824 return TokError(".amdhsa_next_free_vgpr directive is required"); 3825 3826 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3827 return TokError(".amdhsa_next_free_sgpr directive is required"); 3828 3829 unsigned VGPRBlocks; 3830 unsigned SGPRBlocks; 3831 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3832 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3833 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3834 SGPRBlocks)) 3835 return true; 3836 3837 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3838 VGPRBlocks)) 3839 return OutOfRangeError(VGPRRange); 3840 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3841 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3842 3843 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3844 SGPRBlocks)) 3845 return OutOfRangeError(SGPRRange); 3846 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3847 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3848 SGPRBlocks); 3849 3850 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3851 return TokError("too many user SGPRs enabled"); 3852 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3853 UserSGPRCount); 3854 3855 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3856 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3857 ReserveFlatScr, ReserveXNACK); 3858 return false; 3859 } 3860 3861 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3862 uint32_t Major; 3863 uint32_t Minor; 3864 3865 if (ParseDirectiveMajorMinor(Major, Minor)) 3866 return true; 3867 3868 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3869 return false; 3870 } 3871 3872 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3873 uint32_t Major; 3874 uint32_t Minor; 3875 uint32_t Stepping; 3876 StringRef VendorName; 3877 StringRef ArchName; 3878 3879 // If this directive has no arguments, then use the ISA version for the 3880 // targeted GPU. 3881 if (getLexer().is(AsmToken::EndOfStatement)) { 3882 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3883 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3884 ISA.Stepping, 3885 "AMD", "AMDGPU"); 3886 return false; 3887 } 3888 3889 if (ParseDirectiveMajorMinor(Major, Minor)) 3890 return true; 3891 3892 if (getLexer().isNot(AsmToken::Comma)) 3893 return TokError("stepping version number required, comma expected"); 3894 Lex(); 3895 3896 if (ParseAsAbsoluteExpression(Stepping)) 3897 return TokError("invalid stepping version"); 3898 3899 if (getLexer().isNot(AsmToken::Comma)) 3900 return TokError("vendor name required, comma expected"); 3901 Lex(); 3902 3903 if (getLexer().isNot(AsmToken::String)) 3904 return TokError("invalid vendor name"); 3905 3906 VendorName = getLexer().getTok().getStringContents(); 3907 Lex(); 3908 3909 if (getLexer().isNot(AsmToken::Comma)) 3910 return TokError("arch name required, comma expected"); 3911 Lex(); 3912 3913 if (getLexer().isNot(AsmToken::String)) 3914 return TokError("invalid arch name"); 3915 3916 ArchName = getLexer().getTok().getStringContents(); 3917 Lex(); 3918 3919 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3920 VendorName, ArchName); 3921 return false; 3922 } 3923 3924 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3925 amd_kernel_code_t &Header) { 3926 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3927 // assembly for backwards compatibility. 3928 if (ID == "max_scratch_backing_memory_byte_size") { 3929 Parser.eatToEndOfStatement(); 3930 return false; 3931 } 3932 3933 SmallString<40> ErrStr; 3934 raw_svector_ostream Err(ErrStr); 3935 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3936 return TokError(Err.str()); 3937 } 3938 Lex(); 3939 3940 if (ID == "enable_wavefront_size32") { 3941 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3942 if (!isGFX10()) 3943 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3944 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3945 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3946 } else { 3947 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3948 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3949 } 3950 } 3951 3952 if (ID == "wavefront_size") { 3953 if (Header.wavefront_size == 5) { 3954 if (!isGFX10()) 3955 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3956 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3957 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3958 } else if (Header.wavefront_size == 6) { 3959 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3960 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3961 } 3962 } 3963 3964 if (ID == "enable_wgp_mode") { 3965 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3966 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3967 } 3968 3969 if (ID == "enable_mem_ordered") { 3970 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3971 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3972 } 3973 3974 if (ID == "enable_fwd_progress") { 3975 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3976 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3977 } 3978 3979 return false; 3980 } 3981 3982 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3983 amd_kernel_code_t Header; 3984 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3985 3986 while (true) { 3987 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3988 // will set the current token to EndOfStatement. 3989 while(getLexer().is(AsmToken::EndOfStatement)) 3990 Lex(); 3991 3992 if (getLexer().isNot(AsmToken::Identifier)) 3993 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3994 3995 StringRef ID = getLexer().getTok().getIdentifier(); 3996 Lex(); 3997 3998 if (ID == ".end_amd_kernel_code_t") 3999 break; 4000 4001 if (ParseAMDKernelCodeTValue(ID, Header)) 4002 return true; 4003 } 4004 4005 getTargetStreamer().EmitAMDKernelCodeT(Header); 4006 4007 return false; 4008 } 4009 4010 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4011 if (getLexer().isNot(AsmToken::Identifier)) 4012 return TokError("expected symbol name"); 4013 4014 StringRef KernelName = Parser.getTok().getString(); 4015 4016 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4017 ELF::STT_AMDGPU_HSA_KERNEL); 4018 Lex(); 4019 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4020 KernelScope.initialize(getContext()); 4021 return false; 4022 } 4023 4024 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4025 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4026 return Error(getParser().getTok().getLoc(), 4027 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4028 "architectures"); 4029 } 4030 4031 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4032 4033 std::string ISAVersionStringFromSTI; 4034 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4035 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4036 4037 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4038 return Error(getParser().getTok().getLoc(), 4039 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4040 "arguments specified through the command line"); 4041 } 4042 4043 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4044 Lex(); 4045 4046 return false; 4047 } 4048 4049 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4050 const char *AssemblerDirectiveBegin; 4051 const char *AssemblerDirectiveEnd; 4052 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4053 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4054 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4055 HSAMD::V3::AssemblerDirectiveEnd) 4056 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4057 HSAMD::AssemblerDirectiveEnd); 4058 4059 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4060 return Error(getParser().getTok().getLoc(), 4061 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4062 "not available on non-amdhsa OSes")).str()); 4063 } 4064 4065 std::string HSAMetadataString; 4066 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4067 HSAMetadataString)) 4068 return true; 4069 4070 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4071 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4072 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4073 } else { 4074 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4075 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4076 } 4077 4078 return false; 4079 } 4080 4081 /// Common code to parse out a block of text (typically YAML) between start and 4082 /// end directives. 4083 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4084 const char *AssemblerDirectiveEnd, 4085 std::string &CollectString) { 4086 4087 raw_string_ostream CollectStream(CollectString); 4088 4089 getLexer().setSkipSpace(false); 4090 4091 bool FoundEnd = false; 4092 while (!getLexer().is(AsmToken::Eof)) { 4093 while (getLexer().is(AsmToken::Space)) { 4094 CollectStream << getLexer().getTok().getString(); 4095 Lex(); 4096 } 4097 4098 if (getLexer().is(AsmToken::Identifier)) { 4099 StringRef ID = getLexer().getTok().getIdentifier(); 4100 if (ID == AssemblerDirectiveEnd) { 4101 Lex(); 4102 FoundEnd = true; 4103 break; 4104 } 4105 } 4106 4107 CollectStream << Parser.parseStringToEndOfStatement() 4108 << getContext().getAsmInfo()->getSeparatorString(); 4109 4110 Parser.eatToEndOfStatement(); 4111 } 4112 4113 getLexer().setSkipSpace(true); 4114 4115 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4116 return TokError(Twine("expected directive ") + 4117 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4118 } 4119 4120 CollectStream.flush(); 4121 return false; 4122 } 4123 4124 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4125 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4126 std::string String; 4127 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4128 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4129 return true; 4130 4131 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4132 if (!PALMetadata->setFromString(String)) 4133 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4134 return false; 4135 } 4136 4137 /// Parse the assembler directive for old linear-format PAL metadata. 4138 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4139 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4140 return Error(getParser().getTok().getLoc(), 4141 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4142 "not available on non-amdpal OSes")).str()); 4143 } 4144 4145 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4146 PALMetadata->setLegacy(); 4147 for (;;) { 4148 uint32_t Key, Value; 4149 if (ParseAsAbsoluteExpression(Key)) { 4150 return TokError(Twine("invalid value in ") + 4151 Twine(PALMD::AssemblerDirective)); 4152 } 4153 if (getLexer().isNot(AsmToken::Comma)) { 4154 return TokError(Twine("expected an even number of values in ") + 4155 Twine(PALMD::AssemblerDirective)); 4156 } 4157 Lex(); 4158 if (ParseAsAbsoluteExpression(Value)) { 4159 return TokError(Twine("invalid value in ") + 4160 Twine(PALMD::AssemblerDirective)); 4161 } 4162 PALMetadata->setRegister(Key, Value); 4163 if (getLexer().isNot(AsmToken::Comma)) 4164 break; 4165 Lex(); 4166 } 4167 return false; 4168 } 4169 4170 /// ParseDirectiveAMDGPULDS 4171 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4172 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4173 if (getParser().checkForValidSection()) 4174 return true; 4175 4176 StringRef Name; 4177 SMLoc NameLoc = getLexer().getLoc(); 4178 if (getParser().parseIdentifier(Name)) 4179 return TokError("expected identifier in directive"); 4180 4181 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4182 if (parseToken(AsmToken::Comma, "expected ','")) 4183 return true; 4184 4185 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4186 4187 int64_t Size; 4188 SMLoc SizeLoc = getLexer().getLoc(); 4189 if (getParser().parseAbsoluteExpression(Size)) 4190 return true; 4191 if (Size < 0) 4192 return Error(SizeLoc, "size must be non-negative"); 4193 if (Size > LocalMemorySize) 4194 return Error(SizeLoc, "size is too large"); 4195 4196 int64_t Align = 4; 4197 if (getLexer().is(AsmToken::Comma)) { 4198 Lex(); 4199 SMLoc AlignLoc = getLexer().getLoc(); 4200 if (getParser().parseAbsoluteExpression(Align)) 4201 return true; 4202 if (Align < 0 || !isPowerOf2_64(Align)) 4203 return Error(AlignLoc, "alignment must be a power of two"); 4204 4205 // Alignment larger than the size of LDS is possible in theory, as long 4206 // as the linker manages to place to symbol at address 0, but we do want 4207 // to make sure the alignment fits nicely into a 32-bit integer. 4208 if (Align >= 1u << 31) 4209 return Error(AlignLoc, "alignment is too large"); 4210 } 4211 4212 if (parseToken(AsmToken::EndOfStatement, 4213 "unexpected token in '.amdgpu_lds' directive")) 4214 return true; 4215 4216 Symbol->redefineIfPossible(); 4217 if (!Symbol->isUndefined()) 4218 return Error(NameLoc, "invalid symbol redefinition"); 4219 4220 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4221 return false; 4222 } 4223 4224 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4225 StringRef IDVal = DirectiveID.getString(); 4226 4227 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4228 if (IDVal == ".amdgcn_target") 4229 return ParseDirectiveAMDGCNTarget(); 4230 4231 if (IDVal == ".amdhsa_kernel") 4232 return ParseDirectiveAMDHSAKernel(); 4233 4234 // TODO: Restructure/combine with PAL metadata directive. 4235 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4236 return ParseDirectiveHSAMetadata(); 4237 } else { 4238 if (IDVal == ".hsa_code_object_version") 4239 return ParseDirectiveHSACodeObjectVersion(); 4240 4241 if (IDVal == ".hsa_code_object_isa") 4242 return ParseDirectiveHSACodeObjectISA(); 4243 4244 if (IDVal == ".amd_kernel_code_t") 4245 return ParseDirectiveAMDKernelCodeT(); 4246 4247 if (IDVal == ".amdgpu_hsa_kernel") 4248 return ParseDirectiveAMDGPUHsaKernel(); 4249 4250 if (IDVal == ".amd_amdgpu_isa") 4251 return ParseDirectiveISAVersion(); 4252 4253 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4254 return ParseDirectiveHSAMetadata(); 4255 } 4256 4257 if (IDVal == ".amdgpu_lds") 4258 return ParseDirectiveAMDGPULDS(); 4259 4260 if (IDVal == PALMD::AssemblerDirectiveBegin) 4261 return ParseDirectivePALMetadataBegin(); 4262 4263 if (IDVal == PALMD::AssemblerDirective) 4264 return ParseDirectivePALMetadata(); 4265 4266 return true; 4267 } 4268 4269 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4270 unsigned RegNo) const { 4271 4272 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4273 R.isValid(); ++R) { 4274 if (*R == RegNo) 4275 return isGFX9() || isGFX10(); 4276 } 4277 4278 // GFX10 has 2 more SGPRs 104 and 105. 4279 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4280 R.isValid(); ++R) { 4281 if (*R == RegNo) 4282 return hasSGPR104_SGPR105(); 4283 } 4284 4285 switch (RegNo) { 4286 case AMDGPU::SRC_SHARED_BASE: 4287 case AMDGPU::SRC_SHARED_LIMIT: 4288 case AMDGPU::SRC_PRIVATE_BASE: 4289 case AMDGPU::SRC_PRIVATE_LIMIT: 4290 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4291 return !isCI() && !isSI() && !isVI(); 4292 case AMDGPU::TBA: 4293 case AMDGPU::TBA_LO: 4294 case AMDGPU::TBA_HI: 4295 case AMDGPU::TMA: 4296 case AMDGPU::TMA_LO: 4297 case AMDGPU::TMA_HI: 4298 return !isGFX9() && !isGFX10(); 4299 case AMDGPU::XNACK_MASK: 4300 case AMDGPU::XNACK_MASK_LO: 4301 case AMDGPU::XNACK_MASK_HI: 4302 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4303 case AMDGPU::SGPR_NULL: 4304 return isGFX10(); 4305 default: 4306 break; 4307 } 4308 4309 if (isCI()) 4310 return true; 4311 4312 if (isSI() || isGFX10()) { 4313 // No flat_scr on SI. 4314 // On GFX10 flat scratch is not a valid register operand and can only be 4315 // accessed with s_setreg/s_getreg. 4316 switch (RegNo) { 4317 case AMDGPU::FLAT_SCR: 4318 case AMDGPU::FLAT_SCR_LO: 4319 case AMDGPU::FLAT_SCR_HI: 4320 return false; 4321 default: 4322 return true; 4323 } 4324 } 4325 4326 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4327 // SI/CI have. 4328 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4329 R.isValid(); ++R) { 4330 if (*R == RegNo) 4331 return hasSGPR102_SGPR103(); 4332 } 4333 4334 return true; 4335 } 4336 4337 OperandMatchResultTy 4338 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4339 OperandMode Mode) { 4340 // Try to parse with a custom parser 4341 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4342 4343 // If we successfully parsed the operand or if there as an error parsing, 4344 // we are done. 4345 // 4346 // If we are parsing after we reach EndOfStatement then this means we 4347 // are appending default values to the Operands list. This is only done 4348 // by custom parser, so we shouldn't continue on to the generic parsing. 4349 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4350 getLexer().is(AsmToken::EndOfStatement)) 4351 return ResTy; 4352 4353 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4354 unsigned Prefix = Operands.size(); 4355 SMLoc LBraceLoc = getTok().getLoc(); 4356 Parser.Lex(); // eat the '[' 4357 4358 for (;;) { 4359 ResTy = parseReg(Operands); 4360 if (ResTy != MatchOperand_Success) 4361 return ResTy; 4362 4363 if (getLexer().is(AsmToken::RBrac)) 4364 break; 4365 4366 if (getLexer().isNot(AsmToken::Comma)) 4367 return MatchOperand_ParseFail; 4368 Parser.Lex(); 4369 } 4370 4371 if (Operands.size() - Prefix > 1) { 4372 Operands.insert(Operands.begin() + Prefix, 4373 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4374 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4375 getTok().getLoc())); 4376 } 4377 4378 Parser.Lex(); // eat the ']' 4379 return MatchOperand_Success; 4380 } 4381 4382 return parseRegOrImm(Operands); 4383 } 4384 4385 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4386 // Clear any forced encodings from the previous instruction. 4387 setForcedEncodingSize(0); 4388 setForcedDPP(false); 4389 setForcedSDWA(false); 4390 4391 if (Name.endswith("_e64")) { 4392 setForcedEncodingSize(64); 4393 return Name.substr(0, Name.size() - 4); 4394 } else if (Name.endswith("_e32")) { 4395 setForcedEncodingSize(32); 4396 return Name.substr(0, Name.size() - 4); 4397 } else if (Name.endswith("_dpp")) { 4398 setForcedDPP(true); 4399 return Name.substr(0, Name.size() - 4); 4400 } else if (Name.endswith("_sdwa")) { 4401 setForcedSDWA(true); 4402 return Name.substr(0, Name.size() - 5); 4403 } 4404 return Name; 4405 } 4406 4407 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4408 StringRef Name, 4409 SMLoc NameLoc, OperandVector &Operands) { 4410 // Add the instruction mnemonic 4411 Name = parseMnemonicSuffix(Name); 4412 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4413 4414 bool IsMIMG = Name.startswith("image_"); 4415 4416 while (!getLexer().is(AsmToken::EndOfStatement)) { 4417 OperandMode Mode = OperandMode_Default; 4418 if (IsMIMG && isGFX10() && Operands.size() == 2) 4419 Mode = OperandMode_NSA; 4420 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4421 4422 // Eat the comma or space if there is one. 4423 if (getLexer().is(AsmToken::Comma)) 4424 Parser.Lex(); 4425 4426 switch (Res) { 4427 case MatchOperand_Success: break; 4428 case MatchOperand_ParseFail: 4429 // FIXME: use real operand location rather than the current location. 4430 Error(getLexer().getLoc(), "failed parsing operand."); 4431 while (!getLexer().is(AsmToken::EndOfStatement)) { 4432 Parser.Lex(); 4433 } 4434 return true; 4435 case MatchOperand_NoMatch: 4436 // FIXME: use real operand location rather than the current location. 4437 Error(getLexer().getLoc(), "not a valid operand."); 4438 while (!getLexer().is(AsmToken::EndOfStatement)) { 4439 Parser.Lex(); 4440 } 4441 return true; 4442 } 4443 } 4444 4445 return false; 4446 } 4447 4448 //===----------------------------------------------------------------------===// 4449 // Utility functions 4450 //===----------------------------------------------------------------------===// 4451 4452 OperandMatchResultTy 4453 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4454 4455 if (!trySkipId(Prefix, AsmToken::Colon)) 4456 return MatchOperand_NoMatch; 4457 4458 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4459 } 4460 4461 OperandMatchResultTy 4462 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4463 AMDGPUOperand::ImmTy ImmTy, 4464 bool (*ConvertResult)(int64_t&)) { 4465 SMLoc S = getLoc(); 4466 int64_t Value = 0; 4467 4468 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4469 if (Res != MatchOperand_Success) 4470 return Res; 4471 4472 if (ConvertResult && !ConvertResult(Value)) { 4473 Error(S, "invalid " + StringRef(Prefix) + " value."); 4474 } 4475 4476 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4477 return MatchOperand_Success; 4478 } 4479 4480 OperandMatchResultTy 4481 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4482 OperandVector &Operands, 4483 AMDGPUOperand::ImmTy ImmTy, 4484 bool (*ConvertResult)(int64_t&)) { 4485 SMLoc S = getLoc(); 4486 if (!trySkipId(Prefix, AsmToken::Colon)) 4487 return MatchOperand_NoMatch; 4488 4489 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4490 return MatchOperand_ParseFail; 4491 4492 unsigned Val = 0; 4493 const unsigned MaxSize = 4; 4494 4495 // FIXME: How to verify the number of elements matches the number of src 4496 // operands? 4497 for (int I = 0; ; ++I) { 4498 int64_t Op; 4499 SMLoc Loc = getLoc(); 4500 if (!parseExpr(Op)) 4501 return MatchOperand_ParseFail; 4502 4503 if (Op != 0 && Op != 1) { 4504 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4505 return MatchOperand_ParseFail; 4506 } 4507 4508 Val |= (Op << I); 4509 4510 if (trySkipToken(AsmToken::RBrac)) 4511 break; 4512 4513 if (I + 1 == MaxSize) { 4514 Error(getLoc(), "expected a closing square bracket"); 4515 return MatchOperand_ParseFail; 4516 } 4517 4518 if (!skipToken(AsmToken::Comma, "expected a comma")) 4519 return MatchOperand_ParseFail; 4520 } 4521 4522 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4523 return MatchOperand_Success; 4524 } 4525 4526 OperandMatchResultTy 4527 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4528 AMDGPUOperand::ImmTy ImmTy) { 4529 int64_t Bit = 0; 4530 SMLoc S = Parser.getTok().getLoc(); 4531 4532 // We are at the end of the statement, and this is a default argument, so 4533 // use a default value. 4534 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4535 switch(getLexer().getKind()) { 4536 case AsmToken::Identifier: { 4537 StringRef Tok = Parser.getTok().getString(); 4538 if (Tok == Name) { 4539 if (Tok == "r128" && isGFX9()) 4540 Error(S, "r128 modifier is not supported on this GPU"); 4541 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4542 Error(S, "a16 modifier is not supported on this GPU"); 4543 Bit = 1; 4544 Parser.Lex(); 4545 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4546 Bit = 0; 4547 Parser.Lex(); 4548 } else { 4549 return MatchOperand_NoMatch; 4550 } 4551 break; 4552 } 4553 default: 4554 return MatchOperand_NoMatch; 4555 } 4556 } 4557 4558 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4559 return MatchOperand_ParseFail; 4560 4561 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4562 return MatchOperand_Success; 4563 } 4564 4565 static void addOptionalImmOperand( 4566 MCInst& Inst, const OperandVector& Operands, 4567 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4568 AMDGPUOperand::ImmTy ImmT, 4569 int64_t Default = 0) { 4570 auto i = OptionalIdx.find(ImmT); 4571 if (i != OptionalIdx.end()) { 4572 unsigned Idx = i->second; 4573 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4574 } else { 4575 Inst.addOperand(MCOperand::createImm(Default)); 4576 } 4577 } 4578 4579 OperandMatchResultTy 4580 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4581 if (getLexer().isNot(AsmToken::Identifier)) { 4582 return MatchOperand_NoMatch; 4583 } 4584 StringRef Tok = Parser.getTok().getString(); 4585 if (Tok != Prefix) { 4586 return MatchOperand_NoMatch; 4587 } 4588 4589 Parser.Lex(); 4590 if (getLexer().isNot(AsmToken::Colon)) { 4591 return MatchOperand_ParseFail; 4592 } 4593 4594 Parser.Lex(); 4595 if (getLexer().isNot(AsmToken::Identifier)) { 4596 return MatchOperand_ParseFail; 4597 } 4598 4599 Value = Parser.getTok().getString(); 4600 return MatchOperand_Success; 4601 } 4602 4603 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4604 // values to live in a joint format operand in the MCInst encoding. 4605 OperandMatchResultTy 4606 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4607 SMLoc S = Parser.getTok().getLoc(); 4608 int64_t Dfmt = 0, Nfmt = 0; 4609 // dfmt and nfmt can appear in either order, and each is optional. 4610 bool GotDfmt = false, GotNfmt = false; 4611 while (!GotDfmt || !GotNfmt) { 4612 if (!GotDfmt) { 4613 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4614 if (Res != MatchOperand_NoMatch) { 4615 if (Res != MatchOperand_Success) 4616 return Res; 4617 if (Dfmt >= 16) { 4618 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4619 return MatchOperand_ParseFail; 4620 } 4621 GotDfmt = true; 4622 Parser.Lex(); 4623 continue; 4624 } 4625 } 4626 if (!GotNfmt) { 4627 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4628 if (Res != MatchOperand_NoMatch) { 4629 if (Res != MatchOperand_Success) 4630 return Res; 4631 if (Nfmt >= 8) { 4632 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4633 return MatchOperand_ParseFail; 4634 } 4635 GotNfmt = true; 4636 Parser.Lex(); 4637 continue; 4638 } 4639 } 4640 break; 4641 } 4642 if (!GotDfmt && !GotNfmt) 4643 return MatchOperand_NoMatch; 4644 auto Format = Dfmt | Nfmt << 4; 4645 Operands.push_back( 4646 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4647 return MatchOperand_Success; 4648 } 4649 4650 //===----------------------------------------------------------------------===// 4651 // ds 4652 //===----------------------------------------------------------------------===// 4653 4654 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4655 const OperandVector &Operands) { 4656 OptionalImmIndexMap OptionalIdx; 4657 4658 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4659 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4660 4661 // Add the register arguments 4662 if (Op.isReg()) { 4663 Op.addRegOperands(Inst, 1); 4664 continue; 4665 } 4666 4667 // Handle optional arguments 4668 OptionalIdx[Op.getImmTy()] = i; 4669 } 4670 4671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4674 4675 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4676 } 4677 4678 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4679 bool IsGdsHardcoded) { 4680 OptionalImmIndexMap OptionalIdx; 4681 4682 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4683 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4684 4685 // Add the register arguments 4686 if (Op.isReg()) { 4687 Op.addRegOperands(Inst, 1); 4688 continue; 4689 } 4690 4691 if (Op.isToken() && Op.getToken() == "gds") { 4692 IsGdsHardcoded = true; 4693 continue; 4694 } 4695 4696 // Handle optional arguments 4697 OptionalIdx[Op.getImmTy()] = i; 4698 } 4699 4700 AMDGPUOperand::ImmTy OffsetType = 4701 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4702 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4703 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4704 AMDGPUOperand::ImmTyOffset; 4705 4706 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4707 4708 if (!IsGdsHardcoded) { 4709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4710 } 4711 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4712 } 4713 4714 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4715 OptionalImmIndexMap OptionalIdx; 4716 4717 unsigned OperandIdx[4]; 4718 unsigned EnMask = 0; 4719 int SrcIdx = 0; 4720 4721 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4722 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4723 4724 // Add the register arguments 4725 if (Op.isReg()) { 4726 assert(SrcIdx < 4); 4727 OperandIdx[SrcIdx] = Inst.size(); 4728 Op.addRegOperands(Inst, 1); 4729 ++SrcIdx; 4730 continue; 4731 } 4732 4733 if (Op.isOff()) { 4734 assert(SrcIdx < 4); 4735 OperandIdx[SrcIdx] = Inst.size(); 4736 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4737 ++SrcIdx; 4738 continue; 4739 } 4740 4741 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4742 Op.addImmOperands(Inst, 1); 4743 continue; 4744 } 4745 4746 if (Op.isToken() && Op.getToken() == "done") 4747 continue; 4748 4749 // Handle optional arguments 4750 OptionalIdx[Op.getImmTy()] = i; 4751 } 4752 4753 assert(SrcIdx == 4); 4754 4755 bool Compr = false; 4756 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4757 Compr = true; 4758 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4759 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4760 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4761 } 4762 4763 for (auto i = 0; i < SrcIdx; ++i) { 4764 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4765 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4766 } 4767 } 4768 4769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4770 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4771 4772 Inst.addOperand(MCOperand::createImm(EnMask)); 4773 } 4774 4775 //===----------------------------------------------------------------------===// 4776 // s_waitcnt 4777 //===----------------------------------------------------------------------===// 4778 4779 static bool 4780 encodeCnt( 4781 const AMDGPU::IsaVersion ISA, 4782 int64_t &IntVal, 4783 int64_t CntVal, 4784 bool Saturate, 4785 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4786 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4787 { 4788 bool Failed = false; 4789 4790 IntVal = encode(ISA, IntVal, CntVal); 4791 if (CntVal != decode(ISA, IntVal)) { 4792 if (Saturate) { 4793 IntVal = encode(ISA, IntVal, -1); 4794 } else { 4795 Failed = true; 4796 } 4797 } 4798 return Failed; 4799 } 4800 4801 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4802 4803 SMLoc CntLoc = getLoc(); 4804 StringRef CntName = getTokenStr(); 4805 4806 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4807 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4808 return false; 4809 4810 int64_t CntVal; 4811 SMLoc ValLoc = getLoc(); 4812 if (!parseExpr(CntVal)) 4813 return false; 4814 4815 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4816 4817 bool Failed = true; 4818 bool Sat = CntName.endswith("_sat"); 4819 4820 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4821 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4822 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4823 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4824 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4825 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4826 } else { 4827 Error(CntLoc, "invalid counter name " + CntName); 4828 return false; 4829 } 4830 4831 if (Failed) { 4832 Error(ValLoc, "too large value for " + CntName); 4833 return false; 4834 } 4835 4836 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4837 return false; 4838 4839 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4840 if (isToken(AsmToken::EndOfStatement)) { 4841 Error(getLoc(), "expected a counter name"); 4842 return false; 4843 } 4844 } 4845 4846 return true; 4847 } 4848 4849 OperandMatchResultTy 4850 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4851 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4852 int64_t Waitcnt = getWaitcntBitMask(ISA); 4853 SMLoc S = getLoc(); 4854 4855 // If parse failed, do not return error code 4856 // to avoid excessive error messages. 4857 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4858 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4859 } else { 4860 parseExpr(Waitcnt); 4861 } 4862 4863 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4864 return MatchOperand_Success; 4865 } 4866 4867 bool 4868 AMDGPUOperand::isSWaitCnt() const { 4869 return isImm(); 4870 } 4871 4872 //===----------------------------------------------------------------------===// 4873 // hwreg 4874 //===----------------------------------------------------------------------===// 4875 4876 bool 4877 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4878 int64_t &Offset, 4879 int64_t &Width) { 4880 using namespace llvm::AMDGPU::Hwreg; 4881 4882 // The register may be specified by name or using a numeric code 4883 if (isToken(AsmToken::Identifier) && 4884 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4885 HwReg.IsSymbolic = true; 4886 lex(); // skip message name 4887 } else if (!parseExpr(HwReg.Id)) { 4888 return false; 4889 } 4890 4891 if (trySkipToken(AsmToken::RParen)) 4892 return true; 4893 4894 // parse optional params 4895 return 4896 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4897 parseExpr(Offset) && 4898 skipToken(AsmToken::Comma, "expected a comma") && 4899 parseExpr(Width) && 4900 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4901 } 4902 4903 bool 4904 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4905 const int64_t Offset, 4906 const int64_t Width, 4907 const SMLoc Loc) { 4908 4909 using namespace llvm::AMDGPU::Hwreg; 4910 4911 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4912 Error(Loc, "specified hardware register is not supported on this GPU"); 4913 return false; 4914 } else if (!isValidHwreg(HwReg.Id)) { 4915 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4916 return false; 4917 } else if (!isValidHwregOffset(Offset)) { 4918 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4919 return false; 4920 } else if (!isValidHwregWidth(Width)) { 4921 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4922 return false; 4923 } 4924 return true; 4925 } 4926 4927 OperandMatchResultTy 4928 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4929 using namespace llvm::AMDGPU::Hwreg; 4930 4931 int64_t ImmVal = 0; 4932 SMLoc Loc = getLoc(); 4933 4934 // If parse failed, do not return error code 4935 // to avoid excessive error messages. 4936 if (trySkipId("hwreg", AsmToken::LParen)) { 4937 OperandInfoTy HwReg(ID_UNKNOWN_); 4938 int64_t Offset = OFFSET_DEFAULT_; 4939 int64_t Width = WIDTH_DEFAULT_; 4940 if (parseHwregBody(HwReg, Offset, Width) && 4941 validateHwreg(HwReg, Offset, Width, Loc)) { 4942 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4943 } 4944 } else if (parseExpr(ImmVal)) { 4945 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4946 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4947 } 4948 4949 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4950 return MatchOperand_Success; 4951 } 4952 4953 bool AMDGPUOperand::isHwreg() const { 4954 return isImmTy(ImmTyHwreg); 4955 } 4956 4957 //===----------------------------------------------------------------------===// 4958 // sendmsg 4959 //===----------------------------------------------------------------------===// 4960 4961 bool 4962 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4963 OperandInfoTy &Op, 4964 OperandInfoTy &Stream) { 4965 using namespace llvm::AMDGPU::SendMsg; 4966 4967 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4968 Msg.IsSymbolic = true; 4969 lex(); // skip message name 4970 } else if (!parseExpr(Msg.Id)) { 4971 return false; 4972 } 4973 4974 if (trySkipToken(AsmToken::Comma)) { 4975 Op.IsDefined = true; 4976 if (isToken(AsmToken::Identifier) && 4977 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4978 lex(); // skip operation name 4979 } else if (!parseExpr(Op.Id)) { 4980 return false; 4981 } 4982 4983 if (trySkipToken(AsmToken::Comma)) { 4984 Stream.IsDefined = true; 4985 if (!parseExpr(Stream.Id)) 4986 return false; 4987 } 4988 } 4989 4990 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4991 } 4992 4993 bool 4994 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4995 const OperandInfoTy &Op, 4996 const OperandInfoTy &Stream, 4997 const SMLoc S) { 4998 using namespace llvm::AMDGPU::SendMsg; 4999 5000 // Validation strictness depends on whether message is specified 5001 // in a symbolc or in a numeric form. In the latter case 5002 // only encoding possibility is checked. 5003 bool Strict = Msg.IsSymbolic; 5004 5005 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5006 Error(S, "invalid message id"); 5007 return false; 5008 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5009 Error(S, Op.IsDefined ? 5010 "message does not support operations" : 5011 "missing message operation"); 5012 return false; 5013 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5014 Error(S, "invalid operation id"); 5015 return false; 5016 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5017 Error(S, "message operation does not support streams"); 5018 return false; 5019 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5020 Error(S, "invalid message stream id"); 5021 return false; 5022 } 5023 return true; 5024 } 5025 5026 OperandMatchResultTy 5027 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5028 using namespace llvm::AMDGPU::SendMsg; 5029 5030 int64_t ImmVal = 0; 5031 SMLoc Loc = getLoc(); 5032 5033 // If parse failed, do not return error code 5034 // to avoid excessive error messages. 5035 if (trySkipId("sendmsg", AsmToken::LParen)) { 5036 OperandInfoTy Msg(ID_UNKNOWN_); 5037 OperandInfoTy Op(OP_NONE_); 5038 OperandInfoTy Stream(STREAM_ID_NONE_); 5039 if (parseSendMsgBody(Msg, Op, Stream) && 5040 validateSendMsg(Msg, Op, Stream, Loc)) { 5041 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5042 } 5043 } else if (parseExpr(ImmVal)) { 5044 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5045 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5046 } 5047 5048 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5049 return MatchOperand_Success; 5050 } 5051 5052 bool AMDGPUOperand::isSendMsg() const { 5053 return isImmTy(ImmTySendMsg); 5054 } 5055 5056 //===----------------------------------------------------------------------===// 5057 // v_interp 5058 //===----------------------------------------------------------------------===// 5059 5060 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5061 if (getLexer().getKind() != AsmToken::Identifier) 5062 return MatchOperand_NoMatch; 5063 5064 StringRef Str = Parser.getTok().getString(); 5065 int Slot = StringSwitch<int>(Str) 5066 .Case("p10", 0) 5067 .Case("p20", 1) 5068 .Case("p0", 2) 5069 .Default(-1); 5070 5071 SMLoc S = Parser.getTok().getLoc(); 5072 if (Slot == -1) 5073 return MatchOperand_ParseFail; 5074 5075 Parser.Lex(); 5076 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5077 AMDGPUOperand::ImmTyInterpSlot)); 5078 return MatchOperand_Success; 5079 } 5080 5081 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5082 if (getLexer().getKind() != AsmToken::Identifier) 5083 return MatchOperand_NoMatch; 5084 5085 StringRef Str = Parser.getTok().getString(); 5086 if (!Str.startswith("attr")) 5087 return MatchOperand_NoMatch; 5088 5089 StringRef Chan = Str.take_back(2); 5090 int AttrChan = StringSwitch<int>(Chan) 5091 .Case(".x", 0) 5092 .Case(".y", 1) 5093 .Case(".z", 2) 5094 .Case(".w", 3) 5095 .Default(-1); 5096 if (AttrChan == -1) 5097 return MatchOperand_ParseFail; 5098 5099 Str = Str.drop_back(2).drop_front(4); 5100 5101 uint8_t Attr; 5102 if (Str.getAsInteger(10, Attr)) 5103 return MatchOperand_ParseFail; 5104 5105 SMLoc S = Parser.getTok().getLoc(); 5106 Parser.Lex(); 5107 if (Attr > 63) { 5108 Error(S, "out of bounds attr"); 5109 return MatchOperand_Success; 5110 } 5111 5112 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5113 5114 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5115 AMDGPUOperand::ImmTyInterpAttr)); 5116 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5117 AMDGPUOperand::ImmTyAttrChan)); 5118 return MatchOperand_Success; 5119 } 5120 5121 //===----------------------------------------------------------------------===// 5122 // exp 5123 //===----------------------------------------------------------------------===// 5124 5125 void AMDGPUAsmParser::errorExpTgt() { 5126 Error(Parser.getTok().getLoc(), "invalid exp target"); 5127 } 5128 5129 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5130 uint8_t &Val) { 5131 if (Str == "null") { 5132 Val = 9; 5133 return MatchOperand_Success; 5134 } 5135 5136 if (Str.startswith("mrt")) { 5137 Str = Str.drop_front(3); 5138 if (Str == "z") { // == mrtz 5139 Val = 8; 5140 return MatchOperand_Success; 5141 } 5142 5143 if (Str.getAsInteger(10, Val)) 5144 return MatchOperand_ParseFail; 5145 5146 if (Val > 7) 5147 errorExpTgt(); 5148 5149 return MatchOperand_Success; 5150 } 5151 5152 if (Str.startswith("pos")) { 5153 Str = Str.drop_front(3); 5154 if (Str.getAsInteger(10, Val)) 5155 return MatchOperand_ParseFail; 5156 5157 if (Val > 4 || (Val == 4 && !isGFX10())) 5158 errorExpTgt(); 5159 5160 Val += 12; 5161 return MatchOperand_Success; 5162 } 5163 5164 if (isGFX10() && Str == "prim") { 5165 Val = 20; 5166 return MatchOperand_Success; 5167 } 5168 5169 if (Str.startswith("param")) { 5170 Str = Str.drop_front(5); 5171 if (Str.getAsInteger(10, Val)) 5172 return MatchOperand_ParseFail; 5173 5174 if (Val >= 32) 5175 errorExpTgt(); 5176 5177 Val += 32; 5178 return MatchOperand_Success; 5179 } 5180 5181 if (Str.startswith("invalid_target_")) { 5182 Str = Str.drop_front(15); 5183 if (Str.getAsInteger(10, Val)) 5184 return MatchOperand_ParseFail; 5185 5186 errorExpTgt(); 5187 return MatchOperand_Success; 5188 } 5189 5190 return MatchOperand_NoMatch; 5191 } 5192 5193 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5194 uint8_t Val; 5195 StringRef Str = Parser.getTok().getString(); 5196 5197 auto Res = parseExpTgtImpl(Str, Val); 5198 if (Res != MatchOperand_Success) 5199 return Res; 5200 5201 SMLoc S = Parser.getTok().getLoc(); 5202 Parser.Lex(); 5203 5204 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5205 AMDGPUOperand::ImmTyExpTgt)); 5206 return MatchOperand_Success; 5207 } 5208 5209 //===----------------------------------------------------------------------===// 5210 // parser helpers 5211 //===----------------------------------------------------------------------===// 5212 5213 bool 5214 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5215 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5216 } 5217 5218 bool 5219 AMDGPUAsmParser::isId(const StringRef Id) const { 5220 return isId(getToken(), Id); 5221 } 5222 5223 bool 5224 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5225 return getTokenKind() == Kind; 5226 } 5227 5228 bool 5229 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5230 if (isId(Id)) { 5231 lex(); 5232 return true; 5233 } 5234 return false; 5235 } 5236 5237 bool 5238 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5239 if (isId(Id) && peekToken().is(Kind)) { 5240 lex(); 5241 lex(); 5242 return true; 5243 } 5244 return false; 5245 } 5246 5247 bool 5248 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5249 if (isToken(Kind)) { 5250 lex(); 5251 return true; 5252 } 5253 return false; 5254 } 5255 5256 bool 5257 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5258 const StringRef ErrMsg) { 5259 if (!trySkipToken(Kind)) { 5260 Error(getLoc(), ErrMsg); 5261 return false; 5262 } 5263 return true; 5264 } 5265 5266 bool 5267 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5268 return !getParser().parseAbsoluteExpression(Imm); 5269 } 5270 5271 bool 5272 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5273 SMLoc S = getLoc(); 5274 5275 const MCExpr *Expr; 5276 if (Parser.parseExpression(Expr)) 5277 return false; 5278 5279 int64_t IntVal; 5280 if (Expr->evaluateAsAbsolute(IntVal)) { 5281 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5282 } else { 5283 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5284 } 5285 return true; 5286 } 5287 5288 bool 5289 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5290 if (isToken(AsmToken::String)) { 5291 Val = getToken().getStringContents(); 5292 lex(); 5293 return true; 5294 } else { 5295 Error(getLoc(), ErrMsg); 5296 return false; 5297 } 5298 } 5299 5300 AsmToken 5301 AMDGPUAsmParser::getToken() const { 5302 return Parser.getTok(); 5303 } 5304 5305 AsmToken 5306 AMDGPUAsmParser::peekToken() { 5307 return getLexer().peekTok(); 5308 } 5309 5310 void 5311 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5312 auto TokCount = getLexer().peekTokens(Tokens); 5313 5314 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5315 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5316 } 5317 5318 AsmToken::TokenKind 5319 AMDGPUAsmParser::getTokenKind() const { 5320 return getLexer().getKind(); 5321 } 5322 5323 SMLoc 5324 AMDGPUAsmParser::getLoc() const { 5325 return getToken().getLoc(); 5326 } 5327 5328 StringRef 5329 AMDGPUAsmParser::getTokenStr() const { 5330 return getToken().getString(); 5331 } 5332 5333 void 5334 AMDGPUAsmParser::lex() { 5335 Parser.Lex(); 5336 } 5337 5338 //===----------------------------------------------------------------------===// 5339 // swizzle 5340 //===----------------------------------------------------------------------===// 5341 5342 LLVM_READNONE 5343 static unsigned 5344 encodeBitmaskPerm(const unsigned AndMask, 5345 const unsigned OrMask, 5346 const unsigned XorMask) { 5347 using namespace llvm::AMDGPU::Swizzle; 5348 5349 return BITMASK_PERM_ENC | 5350 (AndMask << BITMASK_AND_SHIFT) | 5351 (OrMask << BITMASK_OR_SHIFT) | 5352 (XorMask << BITMASK_XOR_SHIFT); 5353 } 5354 5355 bool 5356 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5357 const unsigned MinVal, 5358 const unsigned MaxVal, 5359 const StringRef ErrMsg) { 5360 for (unsigned i = 0; i < OpNum; ++i) { 5361 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5362 return false; 5363 } 5364 SMLoc ExprLoc = Parser.getTok().getLoc(); 5365 if (!parseExpr(Op[i])) { 5366 return false; 5367 } 5368 if (Op[i] < MinVal || Op[i] > MaxVal) { 5369 Error(ExprLoc, ErrMsg); 5370 return false; 5371 } 5372 } 5373 5374 return true; 5375 } 5376 5377 bool 5378 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5379 using namespace llvm::AMDGPU::Swizzle; 5380 5381 int64_t Lane[LANE_NUM]; 5382 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5383 "expected a 2-bit lane id")) { 5384 Imm = QUAD_PERM_ENC; 5385 for (unsigned I = 0; I < LANE_NUM; ++I) { 5386 Imm |= Lane[I] << (LANE_SHIFT * I); 5387 } 5388 return true; 5389 } 5390 return false; 5391 } 5392 5393 bool 5394 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5395 using namespace llvm::AMDGPU::Swizzle; 5396 5397 SMLoc S = Parser.getTok().getLoc(); 5398 int64_t GroupSize; 5399 int64_t LaneIdx; 5400 5401 if (!parseSwizzleOperands(1, &GroupSize, 5402 2, 32, 5403 "group size must be in the interval [2,32]")) { 5404 return false; 5405 } 5406 if (!isPowerOf2_64(GroupSize)) { 5407 Error(S, "group size must be a power of two"); 5408 return false; 5409 } 5410 if (parseSwizzleOperands(1, &LaneIdx, 5411 0, GroupSize - 1, 5412 "lane id must be in the interval [0,group size - 1]")) { 5413 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5414 return true; 5415 } 5416 return false; 5417 } 5418 5419 bool 5420 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5421 using namespace llvm::AMDGPU::Swizzle; 5422 5423 SMLoc S = Parser.getTok().getLoc(); 5424 int64_t GroupSize; 5425 5426 if (!parseSwizzleOperands(1, &GroupSize, 5427 2, 32, "group size must be in the interval [2,32]")) { 5428 return false; 5429 } 5430 if (!isPowerOf2_64(GroupSize)) { 5431 Error(S, "group size must be a power of two"); 5432 return false; 5433 } 5434 5435 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5436 return true; 5437 } 5438 5439 bool 5440 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5441 using namespace llvm::AMDGPU::Swizzle; 5442 5443 SMLoc S = Parser.getTok().getLoc(); 5444 int64_t GroupSize; 5445 5446 if (!parseSwizzleOperands(1, &GroupSize, 5447 1, 16, "group size must be in the interval [1,16]")) { 5448 return false; 5449 } 5450 if (!isPowerOf2_64(GroupSize)) { 5451 Error(S, "group size must be a power of two"); 5452 return false; 5453 } 5454 5455 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5456 return true; 5457 } 5458 5459 bool 5460 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5461 using namespace llvm::AMDGPU::Swizzle; 5462 5463 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5464 return false; 5465 } 5466 5467 StringRef Ctl; 5468 SMLoc StrLoc = Parser.getTok().getLoc(); 5469 if (!parseString(Ctl)) { 5470 return false; 5471 } 5472 if (Ctl.size() != BITMASK_WIDTH) { 5473 Error(StrLoc, "expected a 5-character mask"); 5474 return false; 5475 } 5476 5477 unsigned AndMask = 0; 5478 unsigned OrMask = 0; 5479 unsigned XorMask = 0; 5480 5481 for (size_t i = 0; i < Ctl.size(); ++i) { 5482 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5483 switch(Ctl[i]) { 5484 default: 5485 Error(StrLoc, "invalid mask"); 5486 return false; 5487 case '0': 5488 break; 5489 case '1': 5490 OrMask |= Mask; 5491 break; 5492 case 'p': 5493 AndMask |= Mask; 5494 break; 5495 case 'i': 5496 AndMask |= Mask; 5497 XorMask |= Mask; 5498 break; 5499 } 5500 } 5501 5502 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5503 return true; 5504 } 5505 5506 bool 5507 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5508 5509 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5510 5511 if (!parseExpr(Imm)) { 5512 return false; 5513 } 5514 if (!isUInt<16>(Imm)) { 5515 Error(OffsetLoc, "expected a 16-bit offset"); 5516 return false; 5517 } 5518 return true; 5519 } 5520 5521 bool 5522 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5523 using namespace llvm::AMDGPU::Swizzle; 5524 5525 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5526 5527 SMLoc ModeLoc = Parser.getTok().getLoc(); 5528 bool Ok = false; 5529 5530 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5531 Ok = parseSwizzleQuadPerm(Imm); 5532 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5533 Ok = parseSwizzleBitmaskPerm(Imm); 5534 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5535 Ok = parseSwizzleBroadcast(Imm); 5536 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5537 Ok = parseSwizzleSwap(Imm); 5538 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5539 Ok = parseSwizzleReverse(Imm); 5540 } else { 5541 Error(ModeLoc, "expected a swizzle mode"); 5542 } 5543 5544 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5545 } 5546 5547 return false; 5548 } 5549 5550 OperandMatchResultTy 5551 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5552 SMLoc S = Parser.getTok().getLoc(); 5553 int64_t Imm = 0; 5554 5555 if (trySkipId("offset")) { 5556 5557 bool Ok = false; 5558 if (skipToken(AsmToken::Colon, "expected a colon")) { 5559 if (trySkipId("swizzle")) { 5560 Ok = parseSwizzleMacro(Imm); 5561 } else { 5562 Ok = parseSwizzleOffset(Imm); 5563 } 5564 } 5565 5566 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5567 5568 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5569 } else { 5570 // Swizzle "offset" operand is optional. 5571 // If it is omitted, try parsing other optional operands. 5572 return parseOptionalOpr(Operands); 5573 } 5574 } 5575 5576 bool 5577 AMDGPUOperand::isSwizzle() const { 5578 return isImmTy(ImmTySwizzle); 5579 } 5580 5581 //===----------------------------------------------------------------------===// 5582 // VGPR Index Mode 5583 //===----------------------------------------------------------------------===// 5584 5585 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5586 5587 using namespace llvm::AMDGPU::VGPRIndexMode; 5588 5589 if (trySkipToken(AsmToken::RParen)) { 5590 return OFF; 5591 } 5592 5593 int64_t Imm = 0; 5594 5595 while (true) { 5596 unsigned Mode = 0; 5597 SMLoc S = Parser.getTok().getLoc(); 5598 5599 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5600 if (trySkipId(IdSymbolic[ModeId])) { 5601 Mode = 1 << ModeId; 5602 break; 5603 } 5604 } 5605 5606 if (Mode == 0) { 5607 Error(S, (Imm == 0)? 5608 "expected a VGPR index mode or a closing parenthesis" : 5609 "expected a VGPR index mode"); 5610 break; 5611 } 5612 5613 if (Imm & Mode) { 5614 Error(S, "duplicate VGPR index mode"); 5615 break; 5616 } 5617 Imm |= Mode; 5618 5619 if (trySkipToken(AsmToken::RParen)) 5620 break; 5621 if (!skipToken(AsmToken::Comma, 5622 "expected a comma or a closing parenthesis")) 5623 break; 5624 } 5625 5626 return Imm; 5627 } 5628 5629 OperandMatchResultTy 5630 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5631 5632 int64_t Imm = 0; 5633 SMLoc S = Parser.getTok().getLoc(); 5634 5635 if (getLexer().getKind() == AsmToken::Identifier && 5636 Parser.getTok().getString() == "gpr_idx" && 5637 getLexer().peekTok().is(AsmToken::LParen)) { 5638 5639 Parser.Lex(); 5640 Parser.Lex(); 5641 5642 // If parse failed, trigger an error but do not return error code 5643 // to avoid excessive error messages. 5644 Imm = parseGPRIdxMacro(); 5645 5646 } else { 5647 if (getParser().parseAbsoluteExpression(Imm)) 5648 return MatchOperand_NoMatch; 5649 if (Imm < 0 || !isUInt<4>(Imm)) { 5650 Error(S, "invalid immediate: only 4-bit values are legal"); 5651 } 5652 } 5653 5654 Operands.push_back( 5655 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5656 return MatchOperand_Success; 5657 } 5658 5659 bool AMDGPUOperand::isGPRIdxMode() const { 5660 return isImmTy(ImmTyGprIdxMode); 5661 } 5662 5663 //===----------------------------------------------------------------------===// 5664 // sopp branch targets 5665 //===----------------------------------------------------------------------===// 5666 5667 OperandMatchResultTy 5668 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5669 5670 // Make sure we are not parsing something 5671 // that looks like a label or an expression but is not. 5672 // This will improve error messages. 5673 if (isRegister() || isModifier()) 5674 return MatchOperand_NoMatch; 5675 5676 if (parseExpr(Operands)) { 5677 5678 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5679 assert(Opr.isImm() || Opr.isExpr()); 5680 SMLoc Loc = Opr.getStartLoc(); 5681 5682 // Currently we do not support arbitrary expressions as branch targets. 5683 // Only labels and absolute expressions are accepted. 5684 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5685 Error(Loc, "expected an absolute expression or a label"); 5686 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5687 Error(Loc, "expected a 16-bit signed jump offset"); 5688 } 5689 } 5690 5691 return MatchOperand_Success; // avoid excessive error messages 5692 } 5693 5694 //===----------------------------------------------------------------------===// 5695 // Boolean holding registers 5696 //===----------------------------------------------------------------------===// 5697 5698 OperandMatchResultTy 5699 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5700 return parseReg(Operands); 5701 } 5702 5703 //===----------------------------------------------------------------------===// 5704 // mubuf 5705 //===----------------------------------------------------------------------===// 5706 5707 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5708 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5709 } 5710 5711 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5712 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5713 } 5714 5715 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5716 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5717 } 5718 5719 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5720 const OperandVector &Operands, 5721 bool IsAtomic, 5722 bool IsAtomicReturn, 5723 bool IsLds) { 5724 bool IsLdsOpcode = IsLds; 5725 bool HasLdsModifier = false; 5726 OptionalImmIndexMap OptionalIdx; 5727 assert(IsAtomicReturn ? IsAtomic : true); 5728 unsigned FirstOperandIdx = 1; 5729 5730 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5731 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5732 5733 // Add the register arguments 5734 if (Op.isReg()) { 5735 Op.addRegOperands(Inst, 1); 5736 // Insert a tied src for atomic return dst. 5737 // This cannot be postponed as subsequent calls to 5738 // addImmOperands rely on correct number of MC operands. 5739 if (IsAtomicReturn && i == FirstOperandIdx) 5740 Op.addRegOperands(Inst, 1); 5741 continue; 5742 } 5743 5744 // Handle the case where soffset is an immediate 5745 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5746 Op.addImmOperands(Inst, 1); 5747 continue; 5748 } 5749 5750 HasLdsModifier |= Op.isLDS(); 5751 5752 // Handle tokens like 'offen' which are sometimes hard-coded into the 5753 // asm string. There are no MCInst operands for these. 5754 if (Op.isToken()) { 5755 continue; 5756 } 5757 assert(Op.isImm()); 5758 5759 // Handle optional arguments 5760 OptionalIdx[Op.getImmTy()] = i; 5761 } 5762 5763 // This is a workaround for an llvm quirk which may result in an 5764 // incorrect instruction selection. Lds and non-lds versions of 5765 // MUBUF instructions are identical except that lds versions 5766 // have mandatory 'lds' modifier. However this modifier follows 5767 // optional modifiers and llvm asm matcher regards this 'lds' 5768 // modifier as an optional one. As a result, an lds version 5769 // of opcode may be selected even if it has no 'lds' modifier. 5770 if (IsLdsOpcode && !HasLdsModifier) { 5771 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5772 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5773 Inst.setOpcode(NoLdsOpcode); 5774 IsLdsOpcode = false; 5775 } 5776 } 5777 5778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5779 if (!IsAtomic) { // glc is hard-coded. 5780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5781 } 5782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5783 5784 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5786 } 5787 5788 if (isGFX10()) 5789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5790 } 5791 5792 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5793 OptionalImmIndexMap OptionalIdx; 5794 5795 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5796 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5797 5798 // Add the register arguments 5799 if (Op.isReg()) { 5800 Op.addRegOperands(Inst, 1); 5801 continue; 5802 } 5803 5804 // Handle the case where soffset is an immediate 5805 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5806 Op.addImmOperands(Inst, 1); 5807 continue; 5808 } 5809 5810 // Handle tokens like 'offen' which are sometimes hard-coded into the 5811 // asm string. There are no MCInst operands for these. 5812 if (Op.isToken()) { 5813 continue; 5814 } 5815 assert(Op.isImm()); 5816 5817 // Handle optional arguments 5818 OptionalIdx[Op.getImmTy()] = i; 5819 } 5820 5821 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5822 AMDGPUOperand::ImmTyOffset); 5823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5824 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5825 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5826 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5827 5828 if (isGFX10()) 5829 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5830 } 5831 5832 //===----------------------------------------------------------------------===// 5833 // mimg 5834 //===----------------------------------------------------------------------===// 5835 5836 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5837 bool IsAtomic) { 5838 unsigned I = 1; 5839 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5840 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5841 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5842 } 5843 5844 if (IsAtomic) { 5845 // Add src, same as dst 5846 assert(Desc.getNumDefs() == 1); 5847 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5848 } 5849 5850 OptionalImmIndexMap OptionalIdx; 5851 5852 for (unsigned E = Operands.size(); I != E; ++I) { 5853 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5854 5855 // Add the register arguments 5856 if (Op.isReg()) { 5857 Op.addRegOperands(Inst, 1); 5858 } else if (Op.isImmModifier()) { 5859 OptionalIdx[Op.getImmTy()] = I; 5860 } else if (!Op.isToken()) { 5861 llvm_unreachable("unexpected operand type"); 5862 } 5863 } 5864 5865 bool IsGFX10 = isGFX10(); 5866 5867 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5868 if (IsGFX10) 5869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5870 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5871 if (IsGFX10) 5872 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5878 if (!IsGFX10) 5879 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5880 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5881 } 5882 5883 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5884 cvtMIMG(Inst, Operands, true); 5885 } 5886 5887 //===----------------------------------------------------------------------===// 5888 // smrd 5889 //===----------------------------------------------------------------------===// 5890 5891 bool AMDGPUOperand::isSMRDOffset8() const { 5892 return isImm() && isUInt<8>(getImm()); 5893 } 5894 5895 bool AMDGPUOperand::isSMRDOffset20() const { 5896 return isImm() && isUInt<20>(getImm()); 5897 } 5898 5899 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5900 // 32-bit literals are only supported on CI and we only want to use them 5901 // when the offset is > 8-bits. 5902 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5903 } 5904 5905 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5906 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5907 } 5908 5909 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5910 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5911 } 5912 5913 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5914 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5915 } 5916 5917 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5918 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5919 } 5920 5921 //===----------------------------------------------------------------------===// 5922 // vop3 5923 //===----------------------------------------------------------------------===// 5924 5925 static bool ConvertOmodMul(int64_t &Mul) { 5926 if (Mul != 1 && Mul != 2 && Mul != 4) 5927 return false; 5928 5929 Mul >>= 1; 5930 return true; 5931 } 5932 5933 static bool ConvertOmodDiv(int64_t &Div) { 5934 if (Div == 1) { 5935 Div = 0; 5936 return true; 5937 } 5938 5939 if (Div == 2) { 5940 Div = 3; 5941 return true; 5942 } 5943 5944 return false; 5945 } 5946 5947 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5948 if (BoundCtrl == 0) { 5949 BoundCtrl = 1; 5950 return true; 5951 } 5952 5953 if (BoundCtrl == -1) { 5954 BoundCtrl = 0; 5955 return true; 5956 } 5957 5958 return false; 5959 } 5960 5961 // Note: the order in this table matches the order of operands in AsmString. 5962 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5963 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5964 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5965 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5966 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5967 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5968 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5969 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5970 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5971 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5972 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5973 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5974 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5975 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5976 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5977 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5978 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5979 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5980 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5981 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5982 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5983 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5984 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5985 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5986 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5987 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5988 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5989 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5990 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5991 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5992 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5993 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5994 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5995 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5996 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5997 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5998 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5999 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6000 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6001 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6002 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6003 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6004 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6005 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6006 }; 6007 6008 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6009 unsigned size = Operands.size(); 6010 assert(size > 0); 6011 6012 OperandMatchResultTy res = parseOptionalOpr(Operands); 6013 6014 // This is a hack to enable hardcoded mandatory operands which follow 6015 // optional operands. 6016 // 6017 // Current design assumes that all operands after the first optional operand 6018 // are also optional. However implementation of some instructions violates 6019 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6020 // 6021 // To alleviate this problem, we have to (implicitly) parse extra operands 6022 // to make sure autogenerated parser of custom operands never hit hardcoded 6023 // mandatory operands. 6024 6025 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 6026 6027 // We have parsed the first optional operand. 6028 // Parse as many operands as necessary to skip all mandatory operands. 6029 6030 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6031 if (res != MatchOperand_Success || 6032 getLexer().is(AsmToken::EndOfStatement)) break; 6033 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 6034 res = parseOptionalOpr(Operands); 6035 } 6036 } 6037 6038 return res; 6039 } 6040 6041 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6042 OperandMatchResultTy res; 6043 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6044 // try to parse any optional operand here 6045 if (Op.IsBit) { 6046 res = parseNamedBit(Op.Name, Operands, Op.Type); 6047 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6048 res = parseOModOperand(Operands); 6049 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6050 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6051 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6052 res = parseSDWASel(Operands, Op.Name, Op.Type); 6053 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6054 res = parseSDWADstUnused(Operands); 6055 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6056 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6057 Op.Type == AMDGPUOperand::ImmTyNegLo || 6058 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6059 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6060 Op.ConvertResult); 6061 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6062 res = parseDim(Operands); 6063 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6064 res = parseDfmtNfmt(Operands); 6065 } else { 6066 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6067 } 6068 if (res != MatchOperand_NoMatch) { 6069 return res; 6070 } 6071 } 6072 return MatchOperand_NoMatch; 6073 } 6074 6075 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6076 StringRef Name = Parser.getTok().getString(); 6077 if (Name == "mul") { 6078 return parseIntWithPrefix("mul", Operands, 6079 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6080 } 6081 6082 if (Name == "div") { 6083 return parseIntWithPrefix("div", Operands, 6084 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6085 } 6086 6087 return MatchOperand_NoMatch; 6088 } 6089 6090 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6091 cvtVOP3P(Inst, Operands); 6092 6093 int Opc = Inst.getOpcode(); 6094 6095 int SrcNum; 6096 const int Ops[] = { AMDGPU::OpName::src0, 6097 AMDGPU::OpName::src1, 6098 AMDGPU::OpName::src2 }; 6099 for (SrcNum = 0; 6100 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6101 ++SrcNum); 6102 assert(SrcNum > 0); 6103 6104 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6105 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6106 6107 if ((OpSel & (1 << SrcNum)) != 0) { 6108 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6109 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6110 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6111 } 6112 } 6113 6114 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6115 // 1. This operand is input modifiers 6116 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6117 // 2. This is not last operand 6118 && Desc.NumOperands > (OpNum + 1) 6119 // 3. Next operand is register class 6120 && Desc.OpInfo[OpNum + 1].RegClass != -1 6121 // 4. Next register is not tied to any other operand 6122 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6123 } 6124 6125 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6126 { 6127 OptionalImmIndexMap OptionalIdx; 6128 unsigned Opc = Inst.getOpcode(); 6129 6130 unsigned I = 1; 6131 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6132 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6133 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6134 } 6135 6136 for (unsigned E = Operands.size(); I != E; ++I) { 6137 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6138 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6139 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6140 } else if (Op.isInterpSlot() || 6141 Op.isInterpAttr() || 6142 Op.isAttrChan()) { 6143 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6144 } else if (Op.isImmModifier()) { 6145 OptionalIdx[Op.getImmTy()] = I; 6146 } else { 6147 llvm_unreachable("unhandled operand type"); 6148 } 6149 } 6150 6151 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6152 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6153 } 6154 6155 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6157 } 6158 6159 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6161 } 6162 } 6163 6164 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6165 OptionalImmIndexMap &OptionalIdx) { 6166 unsigned Opc = Inst.getOpcode(); 6167 6168 unsigned I = 1; 6169 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6170 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6171 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6172 } 6173 6174 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6175 // This instruction has src modifiers 6176 for (unsigned E = Operands.size(); I != E; ++I) { 6177 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6178 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6179 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6180 } else if (Op.isImmModifier()) { 6181 OptionalIdx[Op.getImmTy()] = I; 6182 } else if (Op.isRegOrImm()) { 6183 Op.addRegOrImmOperands(Inst, 1); 6184 } else { 6185 llvm_unreachable("unhandled operand type"); 6186 } 6187 } 6188 } else { 6189 // No src modifiers 6190 for (unsigned E = Operands.size(); I != E; ++I) { 6191 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6192 if (Op.isMod()) { 6193 OptionalIdx[Op.getImmTy()] = I; 6194 } else { 6195 Op.addRegOrImmOperands(Inst, 1); 6196 } 6197 } 6198 } 6199 6200 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6202 } 6203 6204 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6206 } 6207 6208 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6209 // it has src2 register operand that is tied to dst operand 6210 // we don't allow modifiers for this operand in assembler so src2_modifiers 6211 // should be 0. 6212 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6213 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6214 Opc == AMDGPU::V_MAC_F32_e64_vi || 6215 Opc == AMDGPU::V_MAC_F16_e64_vi || 6216 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6217 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6218 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6219 auto it = Inst.begin(); 6220 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6221 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6222 ++it; 6223 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6224 } 6225 } 6226 6227 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6228 OptionalImmIndexMap OptionalIdx; 6229 cvtVOP3(Inst, Operands, OptionalIdx); 6230 } 6231 6232 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6233 const OperandVector &Operands) { 6234 OptionalImmIndexMap OptIdx; 6235 const int Opc = Inst.getOpcode(); 6236 const MCInstrDesc &Desc = MII.get(Opc); 6237 6238 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6239 6240 cvtVOP3(Inst, Operands, OptIdx); 6241 6242 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6243 assert(!IsPacked); 6244 Inst.addOperand(Inst.getOperand(0)); 6245 } 6246 6247 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6248 // instruction, and then figure out where to actually put the modifiers 6249 6250 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6251 6252 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6253 if (OpSelHiIdx != -1) { 6254 int DefaultVal = IsPacked ? -1 : 0; 6255 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6256 DefaultVal); 6257 } 6258 6259 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6260 if (NegLoIdx != -1) { 6261 assert(IsPacked); 6262 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6263 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6264 } 6265 6266 const int Ops[] = { AMDGPU::OpName::src0, 6267 AMDGPU::OpName::src1, 6268 AMDGPU::OpName::src2 }; 6269 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6270 AMDGPU::OpName::src1_modifiers, 6271 AMDGPU::OpName::src2_modifiers }; 6272 6273 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6274 6275 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6276 unsigned OpSelHi = 0; 6277 unsigned NegLo = 0; 6278 unsigned NegHi = 0; 6279 6280 if (OpSelHiIdx != -1) { 6281 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6282 } 6283 6284 if (NegLoIdx != -1) { 6285 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6286 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6287 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6288 } 6289 6290 for (int J = 0; J < 3; ++J) { 6291 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6292 if (OpIdx == -1) 6293 break; 6294 6295 uint32_t ModVal = 0; 6296 6297 if ((OpSel & (1 << J)) != 0) 6298 ModVal |= SISrcMods::OP_SEL_0; 6299 6300 if ((OpSelHi & (1 << J)) != 0) 6301 ModVal |= SISrcMods::OP_SEL_1; 6302 6303 if ((NegLo & (1 << J)) != 0) 6304 ModVal |= SISrcMods::NEG; 6305 6306 if ((NegHi & (1 << J)) != 0) 6307 ModVal |= SISrcMods::NEG_HI; 6308 6309 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6310 6311 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6312 } 6313 } 6314 6315 //===----------------------------------------------------------------------===// 6316 // dpp 6317 //===----------------------------------------------------------------------===// 6318 6319 bool AMDGPUOperand::isDPP8() const { 6320 return isImmTy(ImmTyDPP8); 6321 } 6322 6323 bool AMDGPUOperand::isDPPCtrl() const { 6324 using namespace AMDGPU::DPP; 6325 6326 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6327 if (result) { 6328 int64_t Imm = getImm(); 6329 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6330 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6331 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6332 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6333 (Imm == DppCtrl::WAVE_SHL1) || 6334 (Imm == DppCtrl::WAVE_ROL1) || 6335 (Imm == DppCtrl::WAVE_SHR1) || 6336 (Imm == DppCtrl::WAVE_ROR1) || 6337 (Imm == DppCtrl::ROW_MIRROR) || 6338 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6339 (Imm == DppCtrl::BCAST15) || 6340 (Imm == DppCtrl::BCAST31) || 6341 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6342 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6343 } 6344 return false; 6345 } 6346 6347 //===----------------------------------------------------------------------===// 6348 // mAI 6349 //===----------------------------------------------------------------------===// 6350 6351 bool AMDGPUOperand::isBLGP() const { 6352 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6353 } 6354 6355 bool AMDGPUOperand::isCBSZ() const { 6356 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6357 } 6358 6359 bool AMDGPUOperand::isABID() const { 6360 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6361 } 6362 6363 bool AMDGPUOperand::isS16Imm() const { 6364 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6365 } 6366 6367 bool AMDGPUOperand::isU16Imm() const { 6368 return isImm() && isUInt<16>(getImm()); 6369 } 6370 6371 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6372 if (!isGFX10()) 6373 return MatchOperand_NoMatch; 6374 6375 SMLoc S = Parser.getTok().getLoc(); 6376 6377 if (getLexer().isNot(AsmToken::Identifier)) 6378 return MatchOperand_NoMatch; 6379 if (getLexer().getTok().getString() != "dim") 6380 return MatchOperand_NoMatch; 6381 6382 Parser.Lex(); 6383 if (getLexer().isNot(AsmToken::Colon)) 6384 return MatchOperand_ParseFail; 6385 6386 Parser.Lex(); 6387 6388 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6389 // integer. 6390 std::string Token; 6391 if (getLexer().is(AsmToken::Integer)) { 6392 SMLoc Loc = getLexer().getTok().getEndLoc(); 6393 Token = getLexer().getTok().getString(); 6394 Parser.Lex(); 6395 if (getLexer().getTok().getLoc() != Loc) 6396 return MatchOperand_ParseFail; 6397 } 6398 if (getLexer().isNot(AsmToken::Identifier)) 6399 return MatchOperand_ParseFail; 6400 Token += getLexer().getTok().getString(); 6401 6402 StringRef DimId = Token; 6403 if (DimId.startswith("SQ_RSRC_IMG_")) 6404 DimId = DimId.substr(12); 6405 6406 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6407 if (!DimInfo) 6408 return MatchOperand_ParseFail; 6409 6410 Parser.Lex(); 6411 6412 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6413 AMDGPUOperand::ImmTyDim)); 6414 return MatchOperand_Success; 6415 } 6416 6417 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6418 SMLoc S = Parser.getTok().getLoc(); 6419 StringRef Prefix; 6420 6421 if (getLexer().getKind() == AsmToken::Identifier) { 6422 Prefix = Parser.getTok().getString(); 6423 } else { 6424 return MatchOperand_NoMatch; 6425 } 6426 6427 if (Prefix != "dpp8") 6428 return parseDPPCtrl(Operands); 6429 if (!isGFX10()) 6430 return MatchOperand_NoMatch; 6431 6432 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6433 6434 int64_t Sels[8]; 6435 6436 Parser.Lex(); 6437 if (getLexer().isNot(AsmToken::Colon)) 6438 return MatchOperand_ParseFail; 6439 6440 Parser.Lex(); 6441 if (getLexer().isNot(AsmToken::LBrac)) 6442 return MatchOperand_ParseFail; 6443 6444 Parser.Lex(); 6445 if (getParser().parseAbsoluteExpression(Sels[0])) 6446 return MatchOperand_ParseFail; 6447 if (0 > Sels[0] || 7 < Sels[0]) 6448 return MatchOperand_ParseFail; 6449 6450 for (size_t i = 1; i < 8; ++i) { 6451 if (getLexer().isNot(AsmToken::Comma)) 6452 return MatchOperand_ParseFail; 6453 6454 Parser.Lex(); 6455 if (getParser().parseAbsoluteExpression(Sels[i])) 6456 return MatchOperand_ParseFail; 6457 if (0 > Sels[i] || 7 < Sels[i]) 6458 return MatchOperand_ParseFail; 6459 } 6460 6461 if (getLexer().isNot(AsmToken::RBrac)) 6462 return MatchOperand_ParseFail; 6463 Parser.Lex(); 6464 6465 unsigned DPP8 = 0; 6466 for (size_t i = 0; i < 8; ++i) 6467 DPP8 |= (Sels[i] << (i * 3)); 6468 6469 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6470 return MatchOperand_Success; 6471 } 6472 6473 OperandMatchResultTy 6474 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6475 using namespace AMDGPU::DPP; 6476 6477 SMLoc S = Parser.getTok().getLoc(); 6478 StringRef Prefix; 6479 int64_t Int; 6480 6481 if (getLexer().getKind() == AsmToken::Identifier) { 6482 Prefix = Parser.getTok().getString(); 6483 } else { 6484 return MatchOperand_NoMatch; 6485 } 6486 6487 if (Prefix == "row_mirror") { 6488 Int = DppCtrl::ROW_MIRROR; 6489 Parser.Lex(); 6490 } else if (Prefix == "row_half_mirror") { 6491 Int = DppCtrl::ROW_HALF_MIRROR; 6492 Parser.Lex(); 6493 } else { 6494 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6495 if (Prefix != "quad_perm" 6496 && Prefix != "row_shl" 6497 && Prefix != "row_shr" 6498 && Prefix != "row_ror" 6499 && Prefix != "wave_shl" 6500 && Prefix != "wave_rol" 6501 && Prefix != "wave_shr" 6502 && Prefix != "wave_ror" 6503 && Prefix != "row_bcast" 6504 && Prefix != "row_share" 6505 && Prefix != "row_xmask") { 6506 return MatchOperand_NoMatch; 6507 } 6508 6509 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6510 return MatchOperand_NoMatch; 6511 6512 if (!isVI() && !isGFX9() && 6513 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6514 Prefix == "wave_rol" || Prefix == "wave_ror" || 6515 Prefix == "row_bcast")) 6516 return MatchOperand_NoMatch; 6517 6518 Parser.Lex(); 6519 if (getLexer().isNot(AsmToken::Colon)) 6520 return MatchOperand_ParseFail; 6521 6522 if (Prefix == "quad_perm") { 6523 // quad_perm:[%d,%d,%d,%d] 6524 Parser.Lex(); 6525 if (getLexer().isNot(AsmToken::LBrac)) 6526 return MatchOperand_ParseFail; 6527 Parser.Lex(); 6528 6529 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6530 return MatchOperand_ParseFail; 6531 6532 for (int i = 0; i < 3; ++i) { 6533 if (getLexer().isNot(AsmToken::Comma)) 6534 return MatchOperand_ParseFail; 6535 Parser.Lex(); 6536 6537 int64_t Temp; 6538 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6539 return MatchOperand_ParseFail; 6540 const int shift = i*2 + 2; 6541 Int += (Temp << shift); 6542 } 6543 6544 if (getLexer().isNot(AsmToken::RBrac)) 6545 return MatchOperand_ParseFail; 6546 Parser.Lex(); 6547 } else { 6548 // sel:%d 6549 Parser.Lex(); 6550 if (getParser().parseAbsoluteExpression(Int)) 6551 return MatchOperand_ParseFail; 6552 6553 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6554 Int |= DppCtrl::ROW_SHL0; 6555 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6556 Int |= DppCtrl::ROW_SHR0; 6557 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6558 Int |= DppCtrl::ROW_ROR0; 6559 } else if (Prefix == "wave_shl" && 1 == Int) { 6560 Int = DppCtrl::WAVE_SHL1; 6561 } else if (Prefix == "wave_rol" && 1 == Int) { 6562 Int = DppCtrl::WAVE_ROL1; 6563 } else if (Prefix == "wave_shr" && 1 == Int) { 6564 Int = DppCtrl::WAVE_SHR1; 6565 } else if (Prefix == "wave_ror" && 1 == Int) { 6566 Int = DppCtrl::WAVE_ROR1; 6567 } else if (Prefix == "row_bcast") { 6568 if (Int == 15) { 6569 Int = DppCtrl::BCAST15; 6570 } else if (Int == 31) { 6571 Int = DppCtrl::BCAST31; 6572 } else { 6573 return MatchOperand_ParseFail; 6574 } 6575 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6576 Int |= DppCtrl::ROW_SHARE_FIRST; 6577 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6578 Int |= DppCtrl::ROW_XMASK_FIRST; 6579 } else { 6580 return MatchOperand_ParseFail; 6581 } 6582 } 6583 } 6584 6585 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6586 return MatchOperand_Success; 6587 } 6588 6589 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6590 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6591 } 6592 6593 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6594 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6595 } 6596 6597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6598 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6599 } 6600 6601 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6602 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6603 } 6604 6605 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6606 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6607 } 6608 6609 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6610 OptionalImmIndexMap OptionalIdx; 6611 6612 unsigned I = 1; 6613 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6614 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6615 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6616 } 6617 6618 int Fi = 0; 6619 for (unsigned E = Operands.size(); I != E; ++I) { 6620 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6621 MCOI::TIED_TO); 6622 if (TiedTo != -1) { 6623 assert((unsigned)TiedTo < Inst.getNumOperands()); 6624 // handle tied old or src2 for MAC instructions 6625 Inst.addOperand(Inst.getOperand(TiedTo)); 6626 } 6627 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6628 // Add the register arguments 6629 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6630 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6631 // Skip it. 6632 continue; 6633 } 6634 6635 if (IsDPP8) { 6636 if (Op.isDPP8()) { 6637 Op.addImmOperands(Inst, 1); 6638 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6639 Op.addRegWithFPInputModsOperands(Inst, 2); 6640 } else if (Op.isFI()) { 6641 Fi = Op.getImm(); 6642 } else if (Op.isReg()) { 6643 Op.addRegOperands(Inst, 1); 6644 } else { 6645 llvm_unreachable("Invalid operand type"); 6646 } 6647 } else { 6648 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6649 Op.addRegWithFPInputModsOperands(Inst, 2); 6650 } else if (Op.isDPPCtrl()) { 6651 Op.addImmOperands(Inst, 1); 6652 } else if (Op.isImm()) { 6653 // Handle optional arguments 6654 OptionalIdx[Op.getImmTy()] = I; 6655 } else { 6656 llvm_unreachable("Invalid operand type"); 6657 } 6658 } 6659 } 6660 6661 if (IsDPP8) { 6662 using namespace llvm::AMDGPU::DPP; 6663 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6664 } else { 6665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6668 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6670 } 6671 } 6672 } 6673 6674 //===----------------------------------------------------------------------===// 6675 // sdwa 6676 //===----------------------------------------------------------------------===// 6677 6678 OperandMatchResultTy 6679 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6680 AMDGPUOperand::ImmTy Type) { 6681 using namespace llvm::AMDGPU::SDWA; 6682 6683 SMLoc S = Parser.getTok().getLoc(); 6684 StringRef Value; 6685 OperandMatchResultTy res; 6686 6687 res = parseStringWithPrefix(Prefix, Value); 6688 if (res != MatchOperand_Success) { 6689 return res; 6690 } 6691 6692 int64_t Int; 6693 Int = StringSwitch<int64_t>(Value) 6694 .Case("BYTE_0", SdwaSel::BYTE_0) 6695 .Case("BYTE_1", SdwaSel::BYTE_1) 6696 .Case("BYTE_2", SdwaSel::BYTE_2) 6697 .Case("BYTE_3", SdwaSel::BYTE_3) 6698 .Case("WORD_0", SdwaSel::WORD_0) 6699 .Case("WORD_1", SdwaSel::WORD_1) 6700 .Case("DWORD", SdwaSel::DWORD) 6701 .Default(0xffffffff); 6702 Parser.Lex(); // eat last token 6703 6704 if (Int == 0xffffffff) { 6705 return MatchOperand_ParseFail; 6706 } 6707 6708 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6709 return MatchOperand_Success; 6710 } 6711 6712 OperandMatchResultTy 6713 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6714 using namespace llvm::AMDGPU::SDWA; 6715 6716 SMLoc S = Parser.getTok().getLoc(); 6717 StringRef Value; 6718 OperandMatchResultTy res; 6719 6720 res = parseStringWithPrefix("dst_unused", Value); 6721 if (res != MatchOperand_Success) { 6722 return res; 6723 } 6724 6725 int64_t Int; 6726 Int = StringSwitch<int64_t>(Value) 6727 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6728 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6729 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6730 .Default(0xffffffff); 6731 Parser.Lex(); // eat last token 6732 6733 if (Int == 0xffffffff) { 6734 return MatchOperand_ParseFail; 6735 } 6736 6737 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6738 return MatchOperand_Success; 6739 } 6740 6741 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6742 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6743 } 6744 6745 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6746 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6747 } 6748 6749 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6750 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6751 } 6752 6753 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6754 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6755 } 6756 6757 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6758 uint64_t BasicInstType, bool skipVcc) { 6759 using namespace llvm::AMDGPU::SDWA; 6760 6761 OptionalImmIndexMap OptionalIdx; 6762 bool skippedVcc = false; 6763 6764 unsigned I = 1; 6765 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6766 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6767 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6768 } 6769 6770 for (unsigned E = Operands.size(); I != E; ++I) { 6771 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6772 if (skipVcc && !skippedVcc && Op.isReg() && 6773 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6774 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6775 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6776 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6777 // Skip VCC only if we didn't skip it on previous iteration. 6778 if (BasicInstType == SIInstrFlags::VOP2 && 6779 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6780 skippedVcc = true; 6781 continue; 6782 } else if (BasicInstType == SIInstrFlags::VOPC && 6783 Inst.getNumOperands() == 0) { 6784 skippedVcc = true; 6785 continue; 6786 } 6787 } 6788 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6789 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6790 } else if (Op.isImm()) { 6791 // Handle optional arguments 6792 OptionalIdx[Op.getImmTy()] = I; 6793 } else { 6794 llvm_unreachable("Invalid operand type"); 6795 } 6796 skippedVcc = false; 6797 } 6798 6799 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6800 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6801 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6802 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6803 switch (BasicInstType) { 6804 case SIInstrFlags::VOP1: 6805 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6806 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6808 } 6809 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6812 break; 6813 6814 case SIInstrFlags::VOP2: 6815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6816 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6818 } 6819 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6821 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6822 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6823 break; 6824 6825 case SIInstrFlags::VOPC: 6826 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6828 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6829 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6830 break; 6831 6832 default: 6833 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6834 } 6835 } 6836 6837 // special case v_mac_{f16, f32}: 6838 // it has src2 register operand that is tied to dst operand 6839 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6840 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6841 auto it = Inst.begin(); 6842 std::advance( 6843 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6844 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6845 } 6846 } 6847 6848 //===----------------------------------------------------------------------===// 6849 // mAI 6850 //===----------------------------------------------------------------------===// 6851 6852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6853 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6854 } 6855 6856 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6857 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6858 } 6859 6860 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6861 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6862 } 6863 6864 /// Force static initialization. 6865 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6866 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6867 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6868 } 6869 6870 #define GET_REGISTER_MATCHER 6871 #define GET_MATCHER_IMPLEMENTATION 6872 #define GET_MNEMONIC_SPELL_CHECKER 6873 #include "AMDGPUGenAsmMatcher.inc" 6874 6875 // This fuction should be defined after auto-generated include so that we have 6876 // MatchClassKind enum defined 6877 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6878 unsigned Kind) { 6879 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6880 // But MatchInstructionImpl() expects to meet token and fails to validate 6881 // operand. This method checks if we are given immediate operand but expect to 6882 // get corresponding token. 6883 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6884 switch (Kind) { 6885 case MCK_addr64: 6886 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6887 case MCK_gds: 6888 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6889 case MCK_lds: 6890 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6891 case MCK_glc: 6892 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6893 case MCK_idxen: 6894 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6895 case MCK_offen: 6896 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6897 case MCK_SSrcB32: 6898 // When operands have expression values, they will return true for isToken, 6899 // because it is not possible to distinguish between a token and an 6900 // expression at parse time. MatchInstructionImpl() will always try to 6901 // match an operand as a token, when isToken returns true, and when the 6902 // name of the expression is not a valid token, the match will fail, 6903 // so we need to handle it here. 6904 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6905 case MCK_SSrcF32: 6906 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6907 case MCK_SoppBrTarget: 6908 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6909 case MCK_VReg32OrOff: 6910 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6911 case MCK_InterpSlot: 6912 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6913 case MCK_Attr: 6914 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6915 case MCK_AttrChan: 6916 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6917 default: 6918 return Match_InvalidOperand; 6919 } 6920 } 6921 6922 //===----------------------------------------------------------------------===// 6923 // endpgm 6924 //===----------------------------------------------------------------------===// 6925 6926 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6927 SMLoc S = Parser.getTok().getLoc(); 6928 int64_t Imm = 0; 6929 6930 if (!parseExpr(Imm)) { 6931 // The operand is optional, if not present default to 0 6932 Imm = 0; 6933 } 6934 6935 if (!isUInt<16>(Imm)) { 6936 Error(S, "expected a 16-bit value"); 6937 return MatchOperand_ParseFail; 6938 } 6939 6940 Operands.push_back( 6941 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6942 return MatchOperand_Success; 6943 } 6944 6945 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6946