1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDGPUBaseInfo.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/CodeGen/MachineMemOperand.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Constants.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Instruction.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCInstrDesc.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSectionELF.h" 27 #include "llvm/MC/MCSubtargetInfo.h" 28 #include "llvm/MC/SubtargetFeature.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ELF.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 42 #define GET_INSTRINFO_NAMED_OPS 43 #include "AMDGPUGenInstrInfo.inc" 44 #undef GET_INSTRINFO_NAMED_OPS 45 46 namespace { 47 48 /// \returns Bit mask for given bit \p Shift and bit \p Width. 49 unsigned getBitMask(unsigned Shift, unsigned Width) { 50 return ((1 << Width) - 1) << Shift; 51 } 52 53 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 54 /// 55 /// \returns Packed \p Dst. 56 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 57 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 58 Dst |= (Src << Shift) & getBitMask(Shift, Width); 59 return Dst; 60 } 61 62 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 63 /// 64 /// \returns Unpacked bits. 65 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 66 return (Src & getBitMask(Shift, Width)) >> Shift; 67 } 68 69 /// \returns Vmcnt bit shift (lower bits). 70 unsigned getVmcntBitShiftLo() { return 0; } 71 72 /// \returns Vmcnt bit width (lower bits). 73 unsigned getVmcntBitWidthLo() { return 4; } 74 75 /// \returns Expcnt bit shift. 76 unsigned getExpcntBitShift() { return 4; } 77 78 /// \returns Expcnt bit width. 79 unsigned getExpcntBitWidth() { return 3; } 80 81 /// \returns Lgkmcnt bit shift. 82 unsigned getLgkmcntBitShift() { return 8; } 83 84 /// \returns Lgkmcnt bit width. 85 unsigned getLgkmcntBitWidth() { return 4; } 86 87 /// \returns Vmcnt bit shift (higher bits). 88 unsigned getVmcntBitShiftHi() { return 14; } 89 90 /// \returns Vmcnt bit width (higher bits). 91 unsigned getVmcntBitWidthHi() { return 2; } 92 93 } // end namespace anonymous 94 95 namespace llvm { 96 namespace AMDGPU { 97 98 namespace IsaInfo { 99 100 IsaVersion getIsaVersion(const FeatureBitset &Features) { 101 // CI. 102 if (Features.test(FeatureISAVersion7_0_0)) 103 return {7, 0, 0}; 104 if (Features.test(FeatureISAVersion7_0_1)) 105 return {7, 0, 1}; 106 if (Features.test(FeatureISAVersion7_0_2)) 107 return {7, 0, 2}; 108 109 // VI. 110 if (Features.test(FeatureISAVersion8_0_0)) 111 return {8, 0, 0}; 112 if (Features.test(FeatureISAVersion8_0_1)) 113 return {8, 0, 1}; 114 if (Features.test(FeatureISAVersion8_0_2)) 115 return {8, 0, 2}; 116 if (Features.test(FeatureISAVersion8_0_3)) 117 return {8, 0, 3}; 118 if (Features.test(FeatureISAVersion8_0_4)) 119 return {8, 0, 4}; 120 if (Features.test(FeatureISAVersion8_1_0)) 121 return {8, 1, 0}; 122 123 // GFX9. 124 if (Features.test(FeatureISAVersion9_0_0)) 125 return {9, 0, 0}; 126 if (Features.test(FeatureISAVersion9_0_1)) 127 return {9, 0, 1}; 128 129 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 130 return {0, 0, 0}; 131 return {7, 0, 0}; 132 } 133 134 unsigned getWavefrontSize(const FeatureBitset &Features) { 135 if (Features.test(FeatureWavefrontSize16)) 136 return 16; 137 if (Features.test(FeatureWavefrontSize32)) 138 return 32; 139 140 return 64; 141 } 142 143 unsigned getLocalMemorySize(const FeatureBitset &Features) { 144 if (Features.test(FeatureLocalMemorySize32768)) 145 return 32768; 146 if (Features.test(FeatureLocalMemorySize65536)) 147 return 65536; 148 149 return 0; 150 } 151 152 unsigned getEUsPerCU(const FeatureBitset &Features) { 153 return 4; 154 } 155 156 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 157 unsigned FlatWorkGroupSize) { 158 if (!Features.test(FeatureGCN)) 159 return 8; 160 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 161 if (N == 1) 162 return 40; 163 N = 40 / N; 164 return std::min(N, 16u); 165 } 166 167 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 168 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 169 } 170 171 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 172 unsigned FlatWorkGroupSize) { 173 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 174 } 175 176 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 177 return 1; 178 } 179 180 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 181 if (!Features.test(FeatureGCN)) 182 return 8; 183 // FIXME: Need to take scratch memory into account. 184 return 10; 185 } 186 187 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 188 unsigned FlatWorkGroupSize) { 189 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 190 getEUsPerCU(Features)) / getEUsPerCU(Features); 191 } 192 193 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 194 return 1; 195 } 196 197 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 198 return 2048; 199 } 200 201 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 202 unsigned FlatWorkGroupSize) { 203 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 204 getWavefrontSize(Features); 205 } 206 207 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 208 IsaVersion Version = getIsaVersion(Features); 209 if (Version.Major >= 8) 210 return 16; 211 return 8; 212 } 213 214 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 215 return 8; 216 } 217 218 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 219 IsaVersion Version = getIsaVersion(Features); 220 if (Version.Major >= 8) 221 return 800; 222 return 512; 223 } 224 225 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 226 if (Features.test(FeatureSGPRInitBug)) 227 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 228 229 IsaVersion Version = getIsaVersion(Features); 230 if (Version.Major >= 8) 231 return 102; 232 return 104; 233 } 234 235 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 236 assert(WavesPerEU != 0); 237 238 if (WavesPerEU >= getMaxWavesPerEU(Features)) 239 return 0; 240 unsigned MinNumSGPRs = 241 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 242 getSGPRAllocGranule(Features)) + 1; 243 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 244 } 245 246 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 247 bool Addressable) { 248 assert(WavesPerEU != 0); 249 250 IsaVersion Version = getIsaVersion(Features); 251 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 252 getSGPRAllocGranule(Features)); 253 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 254 if (Version.Major >= 8 && !Addressable) 255 AddressableNumSGPRs = 112; 256 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 257 } 258 259 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 260 return 4; 261 } 262 263 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 264 return getVGPRAllocGranule(Features); 265 } 266 267 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 268 return 256; 269 } 270 271 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 272 return getTotalNumVGPRs(Features); 273 } 274 275 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 276 assert(WavesPerEU != 0); 277 278 if (WavesPerEU >= getMaxWavesPerEU(Features)) 279 return 0; 280 unsigned MinNumVGPRs = 281 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 282 getVGPRAllocGranule(Features)) + 1; 283 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 284 } 285 286 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 287 assert(WavesPerEU != 0); 288 289 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 290 getVGPRAllocGranule(Features)); 291 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 292 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 293 } 294 295 } // end namespace IsaInfo 296 297 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 298 const FeatureBitset &Features) { 299 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 300 301 memset(&Header, 0, sizeof(Header)); 302 303 Header.amd_kernel_code_version_major = 1; 304 Header.amd_kernel_code_version_minor = 1; 305 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 306 Header.amd_machine_version_major = ISA.Major; 307 Header.amd_machine_version_minor = ISA.Minor; 308 Header.amd_machine_version_stepping = ISA.Stepping; 309 Header.kernel_code_entry_byte_offset = sizeof(Header); 310 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 311 Header.wavefront_size = 6; 312 313 // If the code object does not support indirect functions, then the value must 314 // be 0xffffffff. 315 Header.call_convention = -1; 316 317 // These alignment values are specified in powers of two, so alignment = 318 // 2^n. The minimum alignment is 2^4 = 16. 319 Header.kernarg_segment_alignment = 4; 320 Header.group_segment_alignment = 4; 321 Header.private_segment_alignment = 4; 322 } 323 324 MCSection *getHSATextSection(MCContext &Ctx) { 325 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, 326 ELF::SHF_ALLOC | ELF::SHF_WRITE | 327 ELF::SHF_EXECINSTR | 328 ELF::SHF_AMDGPU_HSA_AGENT | 329 ELF::SHF_AMDGPU_HSA_CODE); 330 } 331 332 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { 333 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, 334 ELF::SHF_ALLOC | ELF::SHF_WRITE | 335 ELF::SHF_AMDGPU_HSA_GLOBAL | 336 ELF::SHF_AMDGPU_HSA_AGENT); 337 } 338 339 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { 340 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, 341 ELF::SHF_ALLOC | ELF::SHF_WRITE | 342 ELF::SHF_AMDGPU_HSA_GLOBAL); 343 } 344 345 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { 346 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, 347 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | 348 ELF::SHF_AMDGPU_HSA_AGENT); 349 } 350 351 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 352 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 353 } 354 355 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 356 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 357 } 358 359 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 360 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 361 } 362 363 bool shouldEmitConstantsToTextSection(const Triple &TT) { 364 return TT.getOS() != Triple::AMDHSA; 365 } 366 367 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 368 Attribute A = F.getFnAttribute(Name); 369 int Result = Default; 370 371 if (A.isStringAttribute()) { 372 StringRef Str = A.getValueAsString(); 373 if (Str.getAsInteger(0, Result)) { 374 LLVMContext &Ctx = F.getContext(); 375 Ctx.emitError("can't parse integer attribute " + Name); 376 } 377 } 378 379 return Result; 380 } 381 382 std::pair<int, int> getIntegerPairAttribute(const Function &F, 383 StringRef Name, 384 std::pair<int, int> Default, 385 bool OnlyFirstRequired) { 386 Attribute A = F.getFnAttribute(Name); 387 if (!A.isStringAttribute()) 388 return Default; 389 390 LLVMContext &Ctx = F.getContext(); 391 std::pair<int, int> Ints = Default; 392 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 393 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 394 Ctx.emitError("can't parse first integer attribute " + Name); 395 return Default; 396 } 397 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 398 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 399 Ctx.emitError("can't parse second integer attribute " + Name); 400 return Default; 401 } 402 } 403 404 return Ints; 405 } 406 407 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 408 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 409 if (Version.Major < 9) 410 return VmcntLo; 411 412 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 413 return VmcntLo | VmcntHi; 414 } 415 416 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 417 return (1 << getExpcntBitWidth()) - 1; 418 } 419 420 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 421 return (1 << getLgkmcntBitWidth()) - 1; 422 } 423 424 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 425 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 426 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 427 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 428 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 429 if (Version.Major < 9) 430 return Waitcnt; 431 432 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 433 return Waitcnt | VmcntHi; 434 } 435 436 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 437 unsigned VmcntLo = 438 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 439 if (Version.Major < 9) 440 return VmcntLo; 441 442 unsigned VmcntHi = 443 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 444 VmcntHi <<= getVmcntBitWidthLo(); 445 return VmcntLo | VmcntHi; 446 } 447 448 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 449 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 450 } 451 452 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 453 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 454 } 455 456 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 457 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 458 Vmcnt = decodeVmcnt(Version, Waitcnt); 459 Expcnt = decodeExpcnt(Version, Waitcnt); 460 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 461 } 462 463 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 464 unsigned Vmcnt) { 465 Waitcnt = 466 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 467 if (Version.Major < 9) 468 return Waitcnt; 469 470 Vmcnt >>= getVmcntBitWidthLo(); 471 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 472 } 473 474 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 475 unsigned Expcnt) { 476 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 477 } 478 479 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 480 unsigned Lgkmcnt) { 481 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 482 } 483 484 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 485 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 486 unsigned Waitcnt = getWaitcntBitMask(Version); 487 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 488 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 489 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 490 return Waitcnt; 491 } 492 493 unsigned getInitialPSInputAddr(const Function &F) { 494 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 495 } 496 497 bool isShader(CallingConv::ID cc) { 498 switch(cc) { 499 case CallingConv::AMDGPU_VS: 500 case CallingConv::AMDGPU_GS: 501 case CallingConv::AMDGPU_PS: 502 case CallingConv::AMDGPU_CS: 503 return true; 504 default: 505 return false; 506 } 507 } 508 509 bool isCompute(CallingConv::ID cc) { 510 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 511 } 512 513 bool isEntryFunctionCC(CallingConv::ID CC) { 514 return true; 515 } 516 517 bool isSI(const MCSubtargetInfo &STI) { 518 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 519 } 520 521 bool isCI(const MCSubtargetInfo &STI) { 522 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 523 } 524 525 bool isVI(const MCSubtargetInfo &STI) { 526 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 527 } 528 529 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 530 531 switch(Reg) { 532 default: break; 533 case AMDGPU::FLAT_SCR: 534 assert(!isSI(STI)); 535 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 536 537 case AMDGPU::FLAT_SCR_LO: 538 assert(!isSI(STI)); 539 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 540 541 case AMDGPU::FLAT_SCR_HI: 542 assert(!isSI(STI)); 543 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 544 } 545 return Reg; 546 } 547 548 unsigned mc2PseudoReg(unsigned Reg) { 549 switch (Reg) { 550 case AMDGPU::FLAT_SCR_ci: 551 case AMDGPU::FLAT_SCR_vi: 552 return FLAT_SCR; 553 554 case AMDGPU::FLAT_SCR_LO_ci: 555 case AMDGPU::FLAT_SCR_LO_vi: 556 return AMDGPU::FLAT_SCR_LO; 557 558 case AMDGPU::FLAT_SCR_HI_ci: 559 case AMDGPU::FLAT_SCR_HI_vi: 560 return AMDGPU::FLAT_SCR_HI; 561 562 default: 563 return Reg; 564 } 565 } 566 567 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 568 assert(OpNo < Desc.NumOperands); 569 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 570 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 571 OpType <= AMDGPU::OPERAND_SRC_LAST; 572 } 573 574 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 575 assert(OpNo < Desc.NumOperands); 576 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 577 switch (OpType) { 578 case AMDGPU::OPERAND_REG_IMM_FP32: 579 case AMDGPU::OPERAND_REG_IMM_FP64: 580 case AMDGPU::OPERAND_REG_IMM_FP16: 581 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 582 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 583 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 584 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 585 return true; 586 default: 587 return false; 588 } 589 } 590 591 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 592 assert(OpNo < Desc.NumOperands); 593 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 594 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 595 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 596 } 597 598 // Avoid using MCRegisterClass::getSize, since that function will go away 599 // (move from MC* level to Target* level). Return size in bits. 600 unsigned getRegBitWidth(unsigned RCID) { 601 switch (RCID) { 602 case AMDGPU::SGPR_32RegClassID: 603 case AMDGPU::VGPR_32RegClassID: 604 case AMDGPU::VS_32RegClassID: 605 case AMDGPU::SReg_32RegClassID: 606 case AMDGPU::SReg_32_XM0RegClassID: 607 return 32; 608 case AMDGPU::SGPR_64RegClassID: 609 case AMDGPU::VS_64RegClassID: 610 case AMDGPU::SReg_64RegClassID: 611 case AMDGPU::VReg_64RegClassID: 612 return 64; 613 case AMDGPU::VReg_96RegClassID: 614 return 96; 615 case AMDGPU::SGPR_128RegClassID: 616 case AMDGPU::SReg_128RegClassID: 617 case AMDGPU::VReg_128RegClassID: 618 return 128; 619 case AMDGPU::SReg_256RegClassID: 620 case AMDGPU::VReg_256RegClassID: 621 return 256; 622 case AMDGPU::SReg_512RegClassID: 623 case AMDGPU::VReg_512RegClassID: 624 return 512; 625 default: 626 llvm_unreachable("Unexpected register class"); 627 } 628 } 629 630 unsigned getRegBitWidth(const MCRegisterClass &RC) { 631 return getRegBitWidth(RC.getID()); 632 } 633 634 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 635 unsigned OpNo) { 636 assert(OpNo < Desc.NumOperands); 637 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 638 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 639 } 640 641 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 642 if (Literal >= -16 && Literal <= 64) 643 return true; 644 645 uint64_t Val = static_cast<uint64_t>(Literal); 646 return (Val == DoubleToBits(0.0)) || 647 (Val == DoubleToBits(1.0)) || 648 (Val == DoubleToBits(-1.0)) || 649 (Val == DoubleToBits(0.5)) || 650 (Val == DoubleToBits(-0.5)) || 651 (Val == DoubleToBits(2.0)) || 652 (Val == DoubleToBits(-2.0)) || 653 (Val == DoubleToBits(4.0)) || 654 (Val == DoubleToBits(-4.0)) || 655 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 656 } 657 658 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 659 if (Literal >= -16 && Literal <= 64) 660 return true; 661 662 // The actual type of the operand does not seem to matter as long 663 // as the bits match one of the inline immediate values. For example: 664 // 665 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 666 // so it is a legal inline immediate. 667 // 668 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 669 // floating-point, so it is a legal inline immediate. 670 671 uint32_t Val = static_cast<uint32_t>(Literal); 672 return (Val == FloatToBits(0.0f)) || 673 (Val == FloatToBits(1.0f)) || 674 (Val == FloatToBits(-1.0f)) || 675 (Val == FloatToBits(0.5f)) || 676 (Val == FloatToBits(-0.5f)) || 677 (Val == FloatToBits(2.0f)) || 678 (Val == FloatToBits(-2.0f)) || 679 (Val == FloatToBits(4.0f)) || 680 (Val == FloatToBits(-4.0f)) || 681 (Val == 0x3e22f983 && HasInv2Pi); 682 } 683 684 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 685 if (!HasInv2Pi) 686 return false; 687 688 if (Literal >= -16 && Literal <= 64) 689 return true; 690 691 uint16_t Val = static_cast<uint16_t>(Literal); 692 return Val == 0x3C00 || // 1.0 693 Val == 0xBC00 || // -1.0 694 Val == 0x3800 || // 0.5 695 Val == 0xB800 || // -0.5 696 Val == 0x4000 || // 2.0 697 Val == 0xC000 || // -2.0 698 Val == 0x4400 || // 4.0 699 Val == 0xC400 || // -4.0 700 Val == 0x3118; // 1/2pi 701 } 702 703 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 704 assert(HasInv2Pi); 705 706 int16_t Lo16 = static_cast<int16_t>(Literal); 707 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 708 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 709 } 710 711 bool isUniformMMO(const MachineMemOperand *MMO) { 712 const Value *Ptr = MMO->getValue(); 713 // UndefValue means this is a load of a kernel input. These are uniform. 714 // Sometimes LDS instructions have constant pointers. 715 // If Ptr is null, then that means this mem operand contains a 716 // PseudoSourceValue like GOT. 717 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 718 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 719 return true; 720 721 const Instruction *I = dyn_cast<Instruction>(Ptr); 722 return I && I->getMetadata("amdgpu.uniform"); 723 } 724 725 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 726 if (isSI(ST) || isCI(ST)) 727 return ByteOffset >> 2; 728 729 return ByteOffset; 730 } 731 732 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 733 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 734 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 735 isUInt<20>(EncodedOffset); 736 } 737 } // end namespace AMDGPU 738 739 } // end namespace llvm 740 741 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 742 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 743 const unsigned AMDGPUAS::LOCAL_ADDRESS; 744 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 745 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 746 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 747 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 748 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 749 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 750 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 751 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 752 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 753 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 754 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 755 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 756 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 757 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 758 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 759 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 760 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 761 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 762 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 763 764 namespace llvm { 765 namespace AMDGPU { 766 767 AMDGPUAS getAMDGPUAS(Triple T) { 768 auto Env = T.getEnvironmentName(); 769 AMDGPUAS AS; 770 if (Env == "amdgiz" || Env == "amdgizcl") { 771 AS.FLAT_ADDRESS = 0; 772 AS.PRIVATE_ADDRESS = 5; 773 AS.REGION_ADDRESS = 4; 774 } 775 else { 776 AS.FLAT_ADDRESS = 4; 777 AS.PRIVATE_ADDRESS = 0; 778 AS.REGION_ADDRESS = 5; 779 } 780 return AS; 781 } 782 783 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 784 return getAMDGPUAS(M.getTargetTriple()); 785 } 786 787 AMDGPUAS getAMDGPUAS(const Module &M) { 788 return getAMDGPUAS(Triple(M.getTargetTriple())); 789 } 790 } // namespace AMDGPU 791 } // namespace llvm 792