1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDGPUBaseInfo.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/CodeGen/MachineMemOperand.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Constants.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Instruction.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCInstrDesc.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSectionELF.h" 27 #include "llvm/MC/MCSubtargetInfo.h" 28 #include "llvm/MC/SubtargetFeature.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ELF.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 42 #define GET_INSTRINFO_NAMED_OPS 43 #include "AMDGPUGenInstrInfo.inc" 44 #undef GET_INSTRINFO_NAMED_OPS 45 46 namespace { 47 48 /// \returns Bit mask for given bit \p Shift and bit \p Width. 49 unsigned getBitMask(unsigned Shift, unsigned Width) { 50 return ((1 << Width) - 1) << Shift; 51 } 52 53 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 54 /// 55 /// \returns Packed \p Dst. 56 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 57 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 58 Dst |= (Src << Shift) & getBitMask(Shift, Width); 59 return Dst; 60 } 61 62 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 63 /// 64 /// \returns Unpacked bits. 65 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 66 return (Src & getBitMask(Shift, Width)) >> Shift; 67 } 68 69 /// \returns Vmcnt bit shift (lower bits). 70 unsigned getVmcntBitShiftLo() { return 0; } 71 72 /// \returns Vmcnt bit width (lower bits). 73 unsigned getVmcntBitWidthLo() { return 4; } 74 75 /// \returns Expcnt bit shift. 76 unsigned getExpcntBitShift() { return 4; } 77 78 /// \returns Expcnt bit width. 79 unsigned getExpcntBitWidth() { return 3; } 80 81 /// \returns Lgkmcnt bit shift. 82 unsigned getLgkmcntBitShift() { return 8; } 83 84 /// \returns Lgkmcnt bit width. 85 unsigned getLgkmcntBitWidth() { return 4; } 86 87 /// \returns Vmcnt bit shift (higher bits). 88 unsigned getVmcntBitShiftHi() { return 14; } 89 90 /// \returns Vmcnt bit width (higher bits). 91 unsigned getVmcntBitWidthHi() { return 2; } 92 93 } // end namespace anonymous 94 95 namespace llvm { 96 97 static cl::opt<bool> EnablePackedInlinableLiterals( 98 "enable-packed-inlinable-literals", 99 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 100 cl::init(false)); 101 102 namespace AMDGPU { 103 104 namespace IsaInfo { 105 106 IsaVersion getIsaVersion(const FeatureBitset &Features) { 107 // CI. 108 if (Features.test(FeatureISAVersion7_0_0)) 109 return {7, 0, 0}; 110 if (Features.test(FeatureISAVersion7_0_1)) 111 return {7, 0, 1}; 112 if (Features.test(FeatureISAVersion7_0_2)) 113 return {7, 0, 2}; 114 115 // VI. 116 if (Features.test(FeatureISAVersion8_0_0)) 117 return {8, 0, 0}; 118 if (Features.test(FeatureISAVersion8_0_1)) 119 return {8, 0, 1}; 120 if (Features.test(FeatureISAVersion8_0_2)) 121 return {8, 0, 2}; 122 if (Features.test(FeatureISAVersion8_0_3)) 123 return {8, 0, 3}; 124 if (Features.test(FeatureISAVersion8_0_4)) 125 return {8, 0, 4}; 126 if (Features.test(FeatureISAVersion8_1_0)) 127 return {8, 1, 0}; 128 129 // GFX9. 130 if (Features.test(FeatureISAVersion9_0_0)) 131 return {9, 0, 0}; 132 if (Features.test(FeatureISAVersion9_0_1)) 133 return {9, 0, 1}; 134 135 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 136 return {0, 0, 0}; 137 return {7, 0, 0}; 138 } 139 140 unsigned getWavefrontSize(const FeatureBitset &Features) { 141 if (Features.test(FeatureWavefrontSize16)) 142 return 16; 143 if (Features.test(FeatureWavefrontSize32)) 144 return 32; 145 146 return 64; 147 } 148 149 unsigned getLocalMemorySize(const FeatureBitset &Features) { 150 if (Features.test(FeatureLocalMemorySize32768)) 151 return 32768; 152 if (Features.test(FeatureLocalMemorySize65536)) 153 return 65536; 154 155 return 0; 156 } 157 158 unsigned getEUsPerCU(const FeatureBitset &Features) { 159 return 4; 160 } 161 162 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 163 unsigned FlatWorkGroupSize) { 164 if (!Features.test(FeatureGCN)) 165 return 8; 166 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 167 if (N == 1) 168 return 40; 169 N = 40 / N; 170 return std::min(N, 16u); 171 } 172 173 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 174 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 175 } 176 177 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 178 unsigned FlatWorkGroupSize) { 179 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 180 } 181 182 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 183 return 1; 184 } 185 186 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 187 if (!Features.test(FeatureGCN)) 188 return 8; 189 // FIXME: Need to take scratch memory into account. 190 return 10; 191 } 192 193 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 194 unsigned FlatWorkGroupSize) { 195 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 196 getEUsPerCU(Features)) / getEUsPerCU(Features); 197 } 198 199 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 200 return 1; 201 } 202 203 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 204 return 2048; 205 } 206 207 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 208 unsigned FlatWorkGroupSize) { 209 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 210 getWavefrontSize(Features); 211 } 212 213 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 214 IsaVersion Version = getIsaVersion(Features); 215 if (Version.Major >= 8) 216 return 16; 217 return 8; 218 } 219 220 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 221 return 8; 222 } 223 224 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 225 IsaVersion Version = getIsaVersion(Features); 226 if (Version.Major >= 8) 227 return 800; 228 return 512; 229 } 230 231 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 232 if (Features.test(FeatureSGPRInitBug)) 233 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 234 235 IsaVersion Version = getIsaVersion(Features); 236 if (Version.Major >= 8) 237 return 102; 238 return 104; 239 } 240 241 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 242 assert(WavesPerEU != 0); 243 244 if (WavesPerEU >= getMaxWavesPerEU(Features)) 245 return 0; 246 unsigned MinNumSGPRs = 247 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 248 getSGPRAllocGranule(Features)) + 1; 249 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 250 } 251 252 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 253 bool Addressable) { 254 assert(WavesPerEU != 0); 255 256 IsaVersion Version = getIsaVersion(Features); 257 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 258 getSGPRAllocGranule(Features)); 259 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 260 if (Version.Major >= 8 && !Addressable) 261 AddressableNumSGPRs = 112; 262 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 263 } 264 265 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 266 return 4; 267 } 268 269 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 270 return getVGPRAllocGranule(Features); 271 } 272 273 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 274 return 256; 275 } 276 277 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 278 return getTotalNumVGPRs(Features); 279 } 280 281 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 282 assert(WavesPerEU != 0); 283 284 if (WavesPerEU >= getMaxWavesPerEU(Features)) 285 return 0; 286 unsigned MinNumVGPRs = 287 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 288 getVGPRAllocGranule(Features)) + 1; 289 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 290 } 291 292 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 293 assert(WavesPerEU != 0); 294 295 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 296 getVGPRAllocGranule(Features)); 297 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 298 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 299 } 300 301 } // end namespace IsaInfo 302 303 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 304 const FeatureBitset &Features) { 305 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 306 307 memset(&Header, 0, sizeof(Header)); 308 309 Header.amd_kernel_code_version_major = 1; 310 Header.amd_kernel_code_version_minor = 1; 311 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 312 Header.amd_machine_version_major = ISA.Major; 313 Header.amd_machine_version_minor = ISA.Minor; 314 Header.amd_machine_version_stepping = ISA.Stepping; 315 Header.kernel_code_entry_byte_offset = sizeof(Header); 316 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 317 Header.wavefront_size = 6; 318 319 // If the code object does not support indirect functions, then the value must 320 // be 0xffffffff. 321 Header.call_convention = -1; 322 323 // These alignment values are specified in powers of two, so alignment = 324 // 2^n. The minimum alignment is 2^4 = 16. 325 Header.kernarg_segment_alignment = 4; 326 Header.group_segment_alignment = 4; 327 Header.private_segment_alignment = 4; 328 } 329 330 MCSection *getHSATextSection(MCContext &Ctx) { 331 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, 332 ELF::SHF_ALLOC | ELF::SHF_WRITE | 333 ELF::SHF_EXECINSTR | 334 ELF::SHF_AMDGPU_HSA_AGENT | 335 ELF::SHF_AMDGPU_HSA_CODE); 336 } 337 338 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { 339 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, 340 ELF::SHF_ALLOC | ELF::SHF_WRITE | 341 ELF::SHF_AMDGPU_HSA_GLOBAL | 342 ELF::SHF_AMDGPU_HSA_AGENT); 343 } 344 345 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { 346 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, 347 ELF::SHF_ALLOC | ELF::SHF_WRITE | 348 ELF::SHF_AMDGPU_HSA_GLOBAL); 349 } 350 351 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { 352 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, 353 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | 354 ELF::SHF_AMDGPU_HSA_AGENT); 355 } 356 357 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 358 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 359 } 360 361 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 362 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 363 } 364 365 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 366 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 367 } 368 369 bool shouldEmitConstantsToTextSection(const Triple &TT) { 370 return TT.getOS() != Triple::AMDHSA; 371 } 372 373 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 374 Attribute A = F.getFnAttribute(Name); 375 int Result = Default; 376 377 if (A.isStringAttribute()) { 378 StringRef Str = A.getValueAsString(); 379 if (Str.getAsInteger(0, Result)) { 380 LLVMContext &Ctx = F.getContext(); 381 Ctx.emitError("can't parse integer attribute " + Name); 382 } 383 } 384 385 return Result; 386 } 387 388 std::pair<int, int> getIntegerPairAttribute(const Function &F, 389 StringRef Name, 390 std::pair<int, int> Default, 391 bool OnlyFirstRequired) { 392 Attribute A = F.getFnAttribute(Name); 393 if (!A.isStringAttribute()) 394 return Default; 395 396 LLVMContext &Ctx = F.getContext(); 397 std::pair<int, int> Ints = Default; 398 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 399 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 400 Ctx.emitError("can't parse first integer attribute " + Name); 401 return Default; 402 } 403 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 404 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 405 Ctx.emitError("can't parse second integer attribute " + Name); 406 return Default; 407 } 408 } 409 410 return Ints; 411 } 412 413 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 414 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 415 if (Version.Major < 9) 416 return VmcntLo; 417 418 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 419 return VmcntLo | VmcntHi; 420 } 421 422 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 423 return (1 << getExpcntBitWidth()) - 1; 424 } 425 426 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 427 return (1 << getLgkmcntBitWidth()) - 1; 428 } 429 430 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 431 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 432 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 433 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 434 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 435 if (Version.Major < 9) 436 return Waitcnt; 437 438 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 439 return Waitcnt | VmcntHi; 440 } 441 442 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 443 unsigned VmcntLo = 444 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 445 if (Version.Major < 9) 446 return VmcntLo; 447 448 unsigned VmcntHi = 449 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 450 VmcntHi <<= getVmcntBitWidthLo(); 451 return VmcntLo | VmcntHi; 452 } 453 454 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 455 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 456 } 457 458 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 459 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 460 } 461 462 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 463 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 464 Vmcnt = decodeVmcnt(Version, Waitcnt); 465 Expcnt = decodeExpcnt(Version, Waitcnt); 466 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 467 } 468 469 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 470 unsigned Vmcnt) { 471 Waitcnt = 472 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 473 if (Version.Major < 9) 474 return Waitcnt; 475 476 Vmcnt >>= getVmcntBitWidthLo(); 477 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 478 } 479 480 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 481 unsigned Expcnt) { 482 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 483 } 484 485 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 486 unsigned Lgkmcnt) { 487 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 488 } 489 490 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 491 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 492 unsigned Waitcnt = getWaitcntBitMask(Version); 493 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 494 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 495 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 496 return Waitcnt; 497 } 498 499 unsigned getInitialPSInputAddr(const Function &F) { 500 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 501 } 502 503 bool isShader(CallingConv::ID cc) { 504 switch(cc) { 505 case CallingConv::AMDGPU_VS: 506 case CallingConv::AMDGPU_GS: 507 case CallingConv::AMDGPU_PS: 508 case CallingConv::AMDGPU_CS: 509 return true; 510 default: 511 return false; 512 } 513 } 514 515 bool isCompute(CallingConv::ID cc) { 516 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 517 } 518 519 bool isEntryFunctionCC(CallingConv::ID CC) { 520 return true; 521 } 522 523 bool isSI(const MCSubtargetInfo &STI) { 524 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 525 } 526 527 bool isCI(const MCSubtargetInfo &STI) { 528 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 529 } 530 531 bool isVI(const MCSubtargetInfo &STI) { 532 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 533 } 534 535 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 536 537 switch(Reg) { 538 default: break; 539 case AMDGPU::FLAT_SCR: 540 assert(!isSI(STI)); 541 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 542 543 case AMDGPU::FLAT_SCR_LO: 544 assert(!isSI(STI)); 545 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 546 547 case AMDGPU::FLAT_SCR_HI: 548 assert(!isSI(STI)); 549 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 550 } 551 return Reg; 552 } 553 554 unsigned mc2PseudoReg(unsigned Reg) { 555 switch (Reg) { 556 case AMDGPU::FLAT_SCR_ci: 557 case AMDGPU::FLAT_SCR_vi: 558 return FLAT_SCR; 559 560 case AMDGPU::FLAT_SCR_LO_ci: 561 case AMDGPU::FLAT_SCR_LO_vi: 562 return AMDGPU::FLAT_SCR_LO; 563 564 case AMDGPU::FLAT_SCR_HI_ci: 565 case AMDGPU::FLAT_SCR_HI_vi: 566 return AMDGPU::FLAT_SCR_HI; 567 568 default: 569 return Reg; 570 } 571 } 572 573 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 574 assert(OpNo < Desc.NumOperands); 575 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 576 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 577 OpType <= AMDGPU::OPERAND_SRC_LAST; 578 } 579 580 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 581 assert(OpNo < Desc.NumOperands); 582 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 583 switch (OpType) { 584 case AMDGPU::OPERAND_REG_IMM_FP32: 585 case AMDGPU::OPERAND_REG_IMM_FP64: 586 case AMDGPU::OPERAND_REG_IMM_FP16: 587 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 588 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 589 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 590 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 591 return true; 592 default: 593 return false; 594 } 595 } 596 597 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 598 assert(OpNo < Desc.NumOperands); 599 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 600 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 601 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 602 } 603 604 // Avoid using MCRegisterClass::getSize, since that function will go away 605 // (move from MC* level to Target* level). Return size in bits. 606 unsigned getRegBitWidth(unsigned RCID) { 607 switch (RCID) { 608 case AMDGPU::SGPR_32RegClassID: 609 case AMDGPU::VGPR_32RegClassID: 610 case AMDGPU::VS_32RegClassID: 611 case AMDGPU::SReg_32RegClassID: 612 case AMDGPU::SReg_32_XM0RegClassID: 613 return 32; 614 case AMDGPU::SGPR_64RegClassID: 615 case AMDGPU::VS_64RegClassID: 616 case AMDGPU::SReg_64RegClassID: 617 case AMDGPU::VReg_64RegClassID: 618 return 64; 619 case AMDGPU::VReg_96RegClassID: 620 return 96; 621 case AMDGPU::SGPR_128RegClassID: 622 case AMDGPU::SReg_128RegClassID: 623 case AMDGPU::VReg_128RegClassID: 624 return 128; 625 case AMDGPU::SReg_256RegClassID: 626 case AMDGPU::VReg_256RegClassID: 627 return 256; 628 case AMDGPU::SReg_512RegClassID: 629 case AMDGPU::VReg_512RegClassID: 630 return 512; 631 default: 632 llvm_unreachable("Unexpected register class"); 633 } 634 } 635 636 unsigned getRegBitWidth(const MCRegisterClass &RC) { 637 return getRegBitWidth(RC.getID()); 638 } 639 640 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 641 unsigned OpNo) { 642 assert(OpNo < Desc.NumOperands); 643 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 644 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 645 } 646 647 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 648 if (Literal >= -16 && Literal <= 64) 649 return true; 650 651 uint64_t Val = static_cast<uint64_t>(Literal); 652 return (Val == DoubleToBits(0.0)) || 653 (Val == DoubleToBits(1.0)) || 654 (Val == DoubleToBits(-1.0)) || 655 (Val == DoubleToBits(0.5)) || 656 (Val == DoubleToBits(-0.5)) || 657 (Val == DoubleToBits(2.0)) || 658 (Val == DoubleToBits(-2.0)) || 659 (Val == DoubleToBits(4.0)) || 660 (Val == DoubleToBits(-4.0)) || 661 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 662 } 663 664 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 665 if (Literal >= -16 && Literal <= 64) 666 return true; 667 668 // The actual type of the operand does not seem to matter as long 669 // as the bits match one of the inline immediate values. For example: 670 // 671 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 672 // so it is a legal inline immediate. 673 // 674 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 675 // floating-point, so it is a legal inline immediate. 676 677 uint32_t Val = static_cast<uint32_t>(Literal); 678 return (Val == FloatToBits(0.0f)) || 679 (Val == FloatToBits(1.0f)) || 680 (Val == FloatToBits(-1.0f)) || 681 (Val == FloatToBits(0.5f)) || 682 (Val == FloatToBits(-0.5f)) || 683 (Val == FloatToBits(2.0f)) || 684 (Val == FloatToBits(-2.0f)) || 685 (Val == FloatToBits(4.0f)) || 686 (Val == FloatToBits(-4.0f)) || 687 (Val == 0x3e22f983 && HasInv2Pi); 688 } 689 690 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 691 if (!HasInv2Pi) 692 return false; 693 694 if (Literal >= -16 && Literal <= 64) 695 return true; 696 697 uint16_t Val = static_cast<uint16_t>(Literal); 698 return Val == 0x3C00 || // 1.0 699 Val == 0xBC00 || // -1.0 700 Val == 0x3800 || // 0.5 701 Val == 0xB800 || // -0.5 702 Val == 0x4000 || // 2.0 703 Val == 0xC000 || // -2.0 704 Val == 0x4400 || // 4.0 705 Val == 0xC400 || // -4.0 706 Val == 0x3118; // 1/2pi 707 } 708 709 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 710 assert(HasInv2Pi); 711 712 if (!EnablePackedInlinableLiterals) 713 return false; 714 715 int16_t Lo16 = static_cast<int16_t>(Literal); 716 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 717 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 718 } 719 720 bool isUniformMMO(const MachineMemOperand *MMO) { 721 const Value *Ptr = MMO->getValue(); 722 // UndefValue means this is a load of a kernel input. These are uniform. 723 // Sometimes LDS instructions have constant pointers. 724 // If Ptr is null, then that means this mem operand contains a 725 // PseudoSourceValue like GOT. 726 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 727 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 728 return true; 729 730 const Instruction *I = dyn_cast<Instruction>(Ptr); 731 return I && I->getMetadata("amdgpu.uniform"); 732 } 733 734 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 735 if (isSI(ST) || isCI(ST)) 736 return ByteOffset >> 2; 737 738 return ByteOffset; 739 } 740 741 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 742 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 743 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 744 isUInt<20>(EncodedOffset); 745 } 746 } // end namespace AMDGPU 747 748 } // end namespace llvm 749 750 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 751 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 752 const unsigned AMDGPUAS::LOCAL_ADDRESS; 753 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 754 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 755 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 756 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 757 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 758 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 759 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 760 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 761 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 762 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 763 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 764 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 765 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 766 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 767 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 768 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 769 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 770 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 771 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 772 773 namespace llvm { 774 namespace AMDGPU { 775 776 AMDGPUAS getAMDGPUAS(Triple T) { 777 auto Env = T.getEnvironmentName(); 778 AMDGPUAS AS; 779 if (Env == "amdgiz" || Env == "amdgizcl") { 780 AS.FLAT_ADDRESS = 0; 781 AS.PRIVATE_ADDRESS = 5; 782 AS.REGION_ADDRESS = 4; 783 } 784 else { 785 AS.FLAT_ADDRESS = 4; 786 AS.PRIVATE_ADDRESS = 0; 787 AS.REGION_ADDRESS = 5; 788 } 789 return AS; 790 } 791 792 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 793 return getAMDGPUAS(M.getTargetTriple()); 794 } 795 796 AMDGPUAS getAMDGPUAS(const Module &M) { 797 return getAMDGPUAS(Triple(M.getTargetTriple())); 798 } 799 } // namespace AMDGPU 800 } // namespace llvm 801