1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDGPUBaseInfo.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/CodeGen/MachineMemOperand.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Constants.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Instruction.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCInstrDesc.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSectionELF.h" 27 #include "llvm/MC/MCSubtargetInfo.h" 28 #include "llvm/MC/SubtargetFeature.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ELF.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #define GET_SUBTARGETINFO_ENUM 40 #include "AMDGPUGenSubtargetInfo.inc" 41 #undef GET_SUBTARGETINFO_ENUM 42 43 #define GET_REGINFO_ENUM 44 #include "AMDGPUGenRegisterInfo.inc" 45 #undef GET_REGINFO_ENUM 46 47 #define GET_INSTRINFO_NAMED_OPS 48 #define GET_INSTRINFO_ENUM 49 #include "AMDGPUGenInstrInfo.inc" 50 #undef GET_INSTRINFO_NAMED_OPS 51 #undef GET_INSTRINFO_ENUM 52 53 namespace { 54 55 /// \returns Bit mask for given bit \p Shift and bit \p Width. 56 unsigned getBitMask(unsigned Shift, unsigned Width) { 57 return ((1 << Width) - 1) << Shift; 58 } 59 60 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 61 /// 62 /// \returns Packed \p Dst. 63 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 64 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 65 Dst |= (Src << Shift) & getBitMask(Shift, Width); 66 return Dst; 67 } 68 69 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 70 /// 71 /// \returns Unpacked bits. 72 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 73 return (Src & getBitMask(Shift, Width)) >> Shift; 74 } 75 76 /// \returns Vmcnt bit shift (lower bits). 77 unsigned getVmcntBitShiftLo() { return 0; } 78 79 /// \returns Vmcnt bit width (lower bits). 80 unsigned getVmcntBitWidthLo() { return 4; } 81 82 /// \returns Expcnt bit shift. 83 unsigned getExpcntBitShift() { return 4; } 84 85 /// \returns Expcnt bit width. 86 unsigned getExpcntBitWidth() { return 3; } 87 88 /// \returns Lgkmcnt bit shift. 89 unsigned getLgkmcntBitShift() { return 8; } 90 91 /// \returns Lgkmcnt bit width. 92 unsigned getLgkmcntBitWidth() { return 4; } 93 94 /// \returns Vmcnt bit shift (higher bits). 95 unsigned getVmcntBitShiftHi() { return 14; } 96 97 /// \returns Vmcnt bit width (higher bits). 98 unsigned getVmcntBitWidthHi() { return 2; } 99 100 } // end namespace anonymous 101 102 namespace llvm { 103 namespace AMDGPU { 104 105 namespace IsaInfo { 106 107 IsaVersion getIsaVersion(const FeatureBitset &Features) { 108 // CI. 109 if (Features.test(FeatureISAVersion7_0_0)) 110 return {7, 0, 0}; 111 if (Features.test(FeatureISAVersion7_0_1)) 112 return {7, 0, 1}; 113 if (Features.test(FeatureISAVersion7_0_2)) 114 return {7, 0, 2}; 115 116 // VI. 117 if (Features.test(FeatureISAVersion8_0_0)) 118 return {8, 0, 0}; 119 if (Features.test(FeatureISAVersion8_0_1)) 120 return {8, 0, 1}; 121 if (Features.test(FeatureISAVersion8_0_2)) 122 return {8, 0, 2}; 123 if (Features.test(FeatureISAVersion8_0_3)) 124 return {8, 0, 3}; 125 if (Features.test(FeatureISAVersion8_0_4)) 126 return {8, 0, 4}; 127 if (Features.test(FeatureISAVersion8_1_0)) 128 return {8, 1, 0}; 129 130 // GFX9. 131 if (Features.test(FeatureISAVersion9_0_0)) 132 return {9, 0, 0}; 133 if (Features.test(FeatureISAVersion9_0_1)) 134 return {9, 0, 1}; 135 136 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 137 return {0, 0, 0}; 138 return {7, 0, 0}; 139 } 140 141 unsigned getWavefrontSize(const FeatureBitset &Features) { 142 if (Features.test(FeatureWavefrontSize16)) 143 return 16; 144 if (Features.test(FeatureWavefrontSize32)) 145 return 32; 146 147 return 64; 148 } 149 150 unsigned getLocalMemorySize(const FeatureBitset &Features) { 151 if (Features.test(FeatureLocalMemorySize32768)) 152 return 32768; 153 if (Features.test(FeatureLocalMemorySize65536)) 154 return 65536; 155 156 return 0; 157 } 158 159 unsigned getEUsPerCU(const FeatureBitset &Features) { 160 return 4; 161 } 162 163 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 164 unsigned FlatWorkGroupSize) { 165 if (!Features.test(FeatureGCN)) 166 return 8; 167 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 168 if (N == 1) 169 return 40; 170 N = 40 / N; 171 return std::min(N, 16u); 172 } 173 174 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 175 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 176 } 177 178 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 179 unsigned FlatWorkGroupSize) { 180 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 181 } 182 183 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 184 return 1; 185 } 186 187 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 188 if (!Features.test(FeatureGCN)) 189 return 8; 190 // FIXME: Need to take scratch memory into account. 191 return 10; 192 } 193 194 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 195 unsigned FlatWorkGroupSize) { 196 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 197 getEUsPerCU(Features)) / getEUsPerCU(Features); 198 } 199 200 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 201 return 1; 202 } 203 204 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 205 return 2048; 206 } 207 208 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 209 unsigned FlatWorkGroupSize) { 210 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 211 getWavefrontSize(Features); 212 } 213 214 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 215 IsaVersion Version = getIsaVersion(Features); 216 if (Version.Major >= 8) 217 return 16; 218 return 8; 219 } 220 221 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 222 return 8; 223 } 224 225 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 226 IsaVersion Version = getIsaVersion(Features); 227 if (Version.Major >= 8) 228 return 800; 229 return 512; 230 } 231 232 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 233 if (Features.test(FeatureSGPRInitBug)) 234 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 235 236 IsaVersion Version = getIsaVersion(Features); 237 if (Version.Major >= 8) 238 return 102; 239 return 104; 240 } 241 242 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 243 assert(WavesPerEU != 0); 244 245 if (WavesPerEU >= getMaxWavesPerEU(Features)) 246 return 0; 247 unsigned MinNumSGPRs = 248 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 249 getSGPRAllocGranule(Features)) + 1; 250 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 251 } 252 253 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 254 bool Addressable) { 255 assert(WavesPerEU != 0); 256 257 IsaVersion Version = getIsaVersion(Features); 258 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 259 getSGPRAllocGranule(Features)); 260 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 261 if (Version.Major >= 8 && !Addressable) 262 AddressableNumSGPRs = 112; 263 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 264 } 265 266 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 267 return 4; 268 } 269 270 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 271 return getVGPRAllocGranule(Features); 272 } 273 274 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 275 return 256; 276 } 277 278 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 279 return getTotalNumVGPRs(Features); 280 } 281 282 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 283 assert(WavesPerEU != 0); 284 285 if (WavesPerEU >= getMaxWavesPerEU(Features)) 286 return 0; 287 unsigned MinNumVGPRs = 288 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 289 getVGPRAllocGranule(Features)) + 1; 290 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 291 } 292 293 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 294 assert(WavesPerEU != 0); 295 296 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 297 getVGPRAllocGranule(Features)); 298 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 299 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 300 } 301 302 } // end namespace IsaInfo 303 304 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 305 const FeatureBitset &Features) { 306 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 307 308 memset(&Header, 0, sizeof(Header)); 309 310 Header.amd_kernel_code_version_major = 1; 311 Header.amd_kernel_code_version_minor = 1; 312 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 313 Header.amd_machine_version_major = ISA.Major; 314 Header.amd_machine_version_minor = ISA.Minor; 315 Header.amd_machine_version_stepping = ISA.Stepping; 316 Header.kernel_code_entry_byte_offset = sizeof(Header); 317 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 318 Header.wavefront_size = 6; 319 320 // If the code object does not support indirect functions, then the value must 321 // be 0xffffffff. 322 Header.call_convention = -1; 323 324 // These alignment values are specified in powers of two, so alignment = 325 // 2^n. The minimum alignment is 2^4 = 16. 326 Header.kernarg_segment_alignment = 4; 327 Header.group_segment_alignment = 4; 328 Header.private_segment_alignment = 4; 329 } 330 331 MCSection *getHSATextSection(MCContext &Ctx) { 332 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, 333 ELF::SHF_ALLOC | ELF::SHF_WRITE | 334 ELF::SHF_EXECINSTR | 335 ELF::SHF_AMDGPU_HSA_AGENT | 336 ELF::SHF_AMDGPU_HSA_CODE); 337 } 338 339 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { 340 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, 341 ELF::SHF_ALLOC | ELF::SHF_WRITE | 342 ELF::SHF_AMDGPU_HSA_GLOBAL | 343 ELF::SHF_AMDGPU_HSA_AGENT); 344 } 345 346 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { 347 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, 348 ELF::SHF_ALLOC | ELF::SHF_WRITE | 349 ELF::SHF_AMDGPU_HSA_GLOBAL); 350 } 351 352 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { 353 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, 354 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | 355 ELF::SHF_AMDGPU_HSA_AGENT); 356 } 357 358 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 359 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 360 } 361 362 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 363 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 364 } 365 366 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 367 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 368 } 369 370 bool shouldEmitConstantsToTextSection(const Triple &TT) { 371 return TT.getOS() != Triple::AMDHSA; 372 } 373 374 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 375 Attribute A = F.getFnAttribute(Name); 376 int Result = Default; 377 378 if (A.isStringAttribute()) { 379 StringRef Str = A.getValueAsString(); 380 if (Str.getAsInteger(0, Result)) { 381 LLVMContext &Ctx = F.getContext(); 382 Ctx.emitError("can't parse integer attribute " + Name); 383 } 384 } 385 386 return Result; 387 } 388 389 std::pair<int, int> getIntegerPairAttribute(const Function &F, 390 StringRef Name, 391 std::pair<int, int> Default, 392 bool OnlyFirstRequired) { 393 Attribute A = F.getFnAttribute(Name); 394 if (!A.isStringAttribute()) 395 return Default; 396 397 LLVMContext &Ctx = F.getContext(); 398 std::pair<int, int> Ints = Default; 399 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 400 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 401 Ctx.emitError("can't parse first integer attribute " + Name); 402 return Default; 403 } 404 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 405 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 406 Ctx.emitError("can't parse second integer attribute " + Name); 407 return Default; 408 } 409 } 410 411 return Ints; 412 } 413 414 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 415 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 416 if (Version.Major < 9) 417 return VmcntLo; 418 419 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 420 return VmcntLo | VmcntHi; 421 } 422 423 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 424 return (1 << getExpcntBitWidth()) - 1; 425 } 426 427 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 428 return (1 << getLgkmcntBitWidth()) - 1; 429 } 430 431 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 432 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 433 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 434 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 435 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 436 if (Version.Major < 9) 437 return Waitcnt; 438 439 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 440 return Waitcnt | VmcntHi; 441 } 442 443 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 444 unsigned VmcntLo = 445 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 446 if (Version.Major < 9) 447 return VmcntLo; 448 449 unsigned VmcntHi = 450 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 451 VmcntHi <<= getVmcntBitWidthLo(); 452 return VmcntLo | VmcntHi; 453 } 454 455 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 456 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 457 } 458 459 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 460 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 461 } 462 463 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 464 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 465 Vmcnt = decodeVmcnt(Version, Waitcnt); 466 Expcnt = decodeExpcnt(Version, Waitcnt); 467 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 468 } 469 470 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 471 unsigned Vmcnt) { 472 Waitcnt = 473 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 474 if (Version.Major < 9) 475 return Waitcnt; 476 477 Vmcnt >>= getVmcntBitWidthLo(); 478 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 479 } 480 481 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 482 unsigned Expcnt) { 483 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 484 } 485 486 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 487 unsigned Lgkmcnt) { 488 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 489 } 490 491 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 492 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 493 unsigned Waitcnt = getWaitcntBitMask(Version); 494 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 495 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 496 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 497 return Waitcnt; 498 } 499 500 unsigned getInitialPSInputAddr(const Function &F) { 501 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 502 } 503 504 bool isShader(CallingConv::ID cc) { 505 switch(cc) { 506 case CallingConv::AMDGPU_VS: 507 case CallingConv::AMDGPU_GS: 508 case CallingConv::AMDGPU_PS: 509 case CallingConv::AMDGPU_CS: 510 return true; 511 default: 512 return false; 513 } 514 } 515 516 bool isCompute(CallingConv::ID cc) { 517 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 518 } 519 520 bool isSI(const MCSubtargetInfo &STI) { 521 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 522 } 523 524 bool isCI(const MCSubtargetInfo &STI) { 525 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 526 } 527 528 bool isVI(const MCSubtargetInfo &STI) { 529 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 530 } 531 532 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 533 534 switch(Reg) { 535 default: break; 536 case AMDGPU::FLAT_SCR: 537 assert(!isSI(STI)); 538 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 539 540 case AMDGPU::FLAT_SCR_LO: 541 assert(!isSI(STI)); 542 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 543 544 case AMDGPU::FLAT_SCR_HI: 545 assert(!isSI(STI)); 546 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 547 } 548 return Reg; 549 } 550 551 unsigned mc2PseudoReg(unsigned Reg) { 552 switch (Reg) { 553 case AMDGPU::FLAT_SCR_ci: 554 case AMDGPU::FLAT_SCR_vi: 555 return FLAT_SCR; 556 557 case AMDGPU::FLAT_SCR_LO_ci: 558 case AMDGPU::FLAT_SCR_LO_vi: 559 return AMDGPU::FLAT_SCR_LO; 560 561 case AMDGPU::FLAT_SCR_HI_ci: 562 case AMDGPU::FLAT_SCR_HI_vi: 563 return AMDGPU::FLAT_SCR_HI; 564 565 default: 566 return Reg; 567 } 568 } 569 570 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 571 assert(OpNo < Desc.NumOperands); 572 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 573 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 574 OpType <= AMDGPU::OPERAND_SRC_LAST; 575 } 576 577 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 578 assert(OpNo < Desc.NumOperands); 579 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 580 switch (OpType) { 581 case AMDGPU::OPERAND_REG_IMM_FP32: 582 case AMDGPU::OPERAND_REG_IMM_FP64: 583 case AMDGPU::OPERAND_REG_IMM_FP16: 584 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 585 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 586 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 587 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 588 return true; 589 default: 590 return false; 591 } 592 } 593 594 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 595 assert(OpNo < Desc.NumOperands); 596 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 597 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 598 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 599 } 600 601 // Avoid using MCRegisterClass::getSize, since that function will go away 602 // (move from MC* level to Target* level). Return size in bits. 603 unsigned getRegBitWidth(unsigned RCID) { 604 switch (RCID) { 605 case AMDGPU::SGPR_32RegClassID: 606 case AMDGPU::VGPR_32RegClassID: 607 case AMDGPU::VS_32RegClassID: 608 case AMDGPU::SReg_32RegClassID: 609 case AMDGPU::SReg_32_XM0RegClassID: 610 return 32; 611 case AMDGPU::SGPR_64RegClassID: 612 case AMDGPU::VS_64RegClassID: 613 case AMDGPU::SReg_64RegClassID: 614 case AMDGPU::VReg_64RegClassID: 615 return 64; 616 case AMDGPU::VReg_96RegClassID: 617 return 96; 618 case AMDGPU::SGPR_128RegClassID: 619 case AMDGPU::SReg_128RegClassID: 620 case AMDGPU::VReg_128RegClassID: 621 return 128; 622 case AMDGPU::SReg_256RegClassID: 623 case AMDGPU::VReg_256RegClassID: 624 return 256; 625 case AMDGPU::SReg_512RegClassID: 626 case AMDGPU::VReg_512RegClassID: 627 return 512; 628 default: 629 llvm_unreachable("Unexpected register class"); 630 } 631 } 632 633 unsigned getRegBitWidth(const MCRegisterClass &RC) { 634 return getRegBitWidth(RC.getID()); 635 } 636 637 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 638 unsigned OpNo) { 639 assert(OpNo < Desc.NumOperands); 640 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 641 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 642 } 643 644 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 645 if (Literal >= -16 && Literal <= 64) 646 return true; 647 648 uint64_t Val = static_cast<uint64_t>(Literal); 649 return (Val == DoubleToBits(0.0)) || 650 (Val == DoubleToBits(1.0)) || 651 (Val == DoubleToBits(-1.0)) || 652 (Val == DoubleToBits(0.5)) || 653 (Val == DoubleToBits(-0.5)) || 654 (Val == DoubleToBits(2.0)) || 655 (Val == DoubleToBits(-2.0)) || 656 (Val == DoubleToBits(4.0)) || 657 (Val == DoubleToBits(-4.0)) || 658 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 659 } 660 661 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 662 if (Literal >= -16 && Literal <= 64) 663 return true; 664 665 // The actual type of the operand does not seem to matter as long 666 // as the bits match one of the inline immediate values. For example: 667 // 668 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 669 // so it is a legal inline immediate. 670 // 671 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 672 // floating-point, so it is a legal inline immediate. 673 674 uint32_t Val = static_cast<uint32_t>(Literal); 675 return (Val == FloatToBits(0.0f)) || 676 (Val == FloatToBits(1.0f)) || 677 (Val == FloatToBits(-1.0f)) || 678 (Val == FloatToBits(0.5f)) || 679 (Val == FloatToBits(-0.5f)) || 680 (Val == FloatToBits(2.0f)) || 681 (Val == FloatToBits(-2.0f)) || 682 (Val == FloatToBits(4.0f)) || 683 (Val == FloatToBits(-4.0f)) || 684 (Val == 0x3e22f983 && HasInv2Pi); 685 } 686 687 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 688 if (!HasInv2Pi) 689 return false; 690 691 if (Literal >= -16 && Literal <= 64) 692 return true; 693 694 uint16_t Val = static_cast<uint16_t>(Literal); 695 return Val == 0x3C00 || // 1.0 696 Val == 0xBC00 || // -1.0 697 Val == 0x3800 || // 0.5 698 Val == 0xB800 || // -0.5 699 Val == 0x4000 || // 2.0 700 Val == 0xC000 || // -2.0 701 Val == 0x4400 || // 4.0 702 Val == 0xC400 || // -4.0 703 Val == 0x3118; // 1/2pi 704 } 705 706 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 707 assert(HasInv2Pi); 708 709 int16_t Lo16 = static_cast<int16_t>(Literal); 710 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 711 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 712 } 713 714 bool isUniformMMO(const MachineMemOperand *MMO) { 715 const Value *Ptr = MMO->getValue(); 716 // UndefValue means this is a load of a kernel input. These are uniform. 717 // Sometimes LDS instructions have constant pointers. 718 // If Ptr is null, then that means this mem operand contains a 719 // PseudoSourceValue like GOT. 720 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 721 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 722 return true; 723 724 const Instruction *I = dyn_cast<Instruction>(Ptr); 725 return I && I->getMetadata("amdgpu.uniform"); 726 } 727 728 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 729 if (isSI(ST) || isCI(ST)) 730 return ByteOffset >> 2; 731 732 return ByteOffset; 733 } 734 735 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 736 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 737 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 738 isUInt<20>(EncodedOffset); 739 } 740 } // end namespace AMDGPU 741 742 } // end namespace llvm 743 744 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 745 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 746 const unsigned AMDGPUAS::LOCAL_ADDRESS; 747 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 748 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 749 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 750 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 751 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 752 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 753 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 754 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 755 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 756 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 757 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 758 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 759 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 760 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 761 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 762 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 763 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 764 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 765 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 766 767 namespace llvm { 768 namespace AMDGPU { 769 770 AMDGPUAS getAMDGPUAS(Triple T) { 771 auto Env = T.getEnvironmentName(); 772 AMDGPUAS AS; 773 if (Env == "amdgiz" || Env == "amdgizcl") { 774 AS.FLAT_ADDRESS = 0; 775 AS.CONSTANT_ADDRESS = 4; 776 AS.PRIVATE_ADDRESS = 5; 777 AS.REGION_ADDRESS = 2; 778 } 779 else { 780 AS.FLAT_ADDRESS = 4; 781 AS.CONSTANT_ADDRESS = 2; 782 AS.PRIVATE_ADDRESS = 0; 783 AS.REGION_ADDRESS = 5; 784 } 785 return AS; 786 } 787 788 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 789 return getAMDGPUAS(M.getTargetTriple()); 790 } 791 792 AMDGPUAS getAMDGPUAS(const Module &M) { 793 return getAMDGPUAS(Triple(M.getTargetTriple())); 794 } 795 } // namespace AMDGPU 796 } // namespace llvm 797