1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "SIDefines.h"
12 #include "llvm/CodeGen/MachineMemOperand.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/LLVMContext.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCSectionELF.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/MC/SubtargetFeature.h"
23 
24 #define GET_SUBTARGETINFO_ENUM
25 #include "AMDGPUGenSubtargetInfo.inc"
26 #undef GET_SUBTARGETINFO_ENUM
27 
28 #define GET_REGINFO_ENUM
29 #include "AMDGPUGenRegisterInfo.inc"
30 #undef GET_REGINFO_ENUM
31 
32 #define GET_INSTRINFO_NAMED_OPS
33 #define GET_INSTRINFO_ENUM
34 #include "AMDGPUGenInstrInfo.inc"
35 #undef GET_INSTRINFO_NAMED_OPS
36 #undef GET_INSTRINFO_ENUM
37 
38 namespace {
39 
40 /// \returns Bit mask for given bit \p Shift and bit \p Width.
41 unsigned getBitMask(unsigned Shift, unsigned Width) {
42   return ((1 << Width) - 1) << Shift;
43 }
44 
45 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
46 ///
47 /// \returns Packed \p Dst.
48 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
49   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
50   Dst |= (Src << Shift) & getBitMask(Shift, Width);
51   return Dst;
52 }
53 
54 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
55 ///
56 /// \returns Unpacked bits.
57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
58   return (Src & getBitMask(Shift, Width)) >> Shift;
59 }
60 
61 /// \returns Vmcnt bit shift.
62 unsigned getVmcntBitShift() { return 0; }
63 
64 /// \returns Vmcnt bit width.
65 unsigned getVmcntBitWidth() { return 4; }
66 
67 /// \returns Expcnt bit shift.
68 unsigned getExpcntBitShift() { return 4; }
69 
70 /// \returns Expcnt bit width.
71 unsigned getExpcntBitWidth() { return 3; }
72 
73 /// \returns Lgkmcnt bit shift.
74 unsigned getLgkmcntBitShift() { return 8; }
75 
76 /// \returns Lgkmcnt bit width.
77 unsigned getLgkmcntBitWidth() { return 4; }
78 
79 } // namespace anonymous
80 
81 namespace llvm {
82 namespace AMDGPU {
83 
84 namespace IsaInfo {
85 
86 IsaVersion getIsaVersion(const FeatureBitset &Features) {
87   // CI.
88   if (Features.test(FeatureISAVersion7_0_0))
89     return {7, 0, 0};
90   if (Features.test(FeatureISAVersion7_0_1))
91     return {7, 0, 1};
92   if (Features.test(FeatureISAVersion7_0_2))
93     return {7, 0, 2};
94 
95   // VI.
96   if (Features.test(FeatureISAVersion8_0_0))
97     return {8, 0, 0};
98   if (Features.test(FeatureISAVersion8_0_1))
99     return {8, 0, 1};
100   if (Features.test(FeatureISAVersion8_0_2))
101     return {8, 0, 2};
102   if (Features.test(FeatureISAVersion8_0_3))
103     return {8, 0, 3};
104   if (Features.test(FeatureISAVersion8_0_4))
105     return {8, 0, 4};
106   if (Features.test(FeatureISAVersion8_1_0))
107     return {8, 1, 0};
108 
109   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
110     return {0, 0, 0};
111   return {7, 0, 0};
112 }
113 
114 unsigned getWavefrontSize(const FeatureBitset &Features) {
115   if (Features.test(FeatureWavefrontSize16))
116     return 16;
117   if (Features.test(FeatureWavefrontSize32))
118     return 32;
119 
120   return 64;
121 }
122 
123 unsigned getLocalMemorySize(const FeatureBitset &Features) {
124   if (Features.test(FeatureLocalMemorySize32768))
125     return 32768;
126   if (Features.test(FeatureLocalMemorySize65536))
127     return 65536;
128 
129   return 0;
130 }
131 
132 unsigned getEUsPerCU(const FeatureBitset &Features) {
133   return 4;
134 }
135 
136 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
137                                unsigned FlatWorkGroupSize) {
138   if (!Features.test(FeatureGCN))
139     return 8;
140   return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
141 }
142 
143 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
144   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
145 }
146 
147 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
148                           unsigned FlatWorkGroupSize) {
149   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
150 }
151 
152 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
153   return 1;
154 }
155 
156 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
157   if (!Features.test(FeatureGCN))
158     return 8;
159   // FIXME: Need to take scratch memory into account.
160   return 10;
161 }
162 
163 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
164                           unsigned FlatWorkGroupSize) {
165   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
166                  getEUsPerCU(Features)) / getEUsPerCU(Features);
167 }
168 
169 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
170   return 1;
171 }
172 
173 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
174   return 2048;
175 }
176 
177 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
178                               unsigned FlatWorkGroupSize) {
179   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
180                  getWavefrontSize(Features);
181 }
182 
183 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
184   IsaVersion Version = getIsaVersion(Features);
185   if (Version.Major >= 8)
186     return 16;
187   return 8;
188 }
189 
190 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
191   return 8;
192 }
193 
194 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
195   IsaVersion Version = getIsaVersion(Features);
196   if (Version.Major >= 8)
197     return 800;
198   return 512;
199 }
200 
201 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
202   if (Features.test(FeatureSGPRInitBug))
203     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
204 
205   IsaVersion Version = getIsaVersion(Features);
206   if (Version.Major >= 8)
207     return 102;
208   return 104;
209 }
210 
211 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
212   IsaVersion Version = getIsaVersion(Features);
213   if (Version.Major >= 8) {
214     switch (WavesPerEU) {
215       case 0:  return 0;
216       case 10: return 0;
217       case 9:  return 0;
218       case 8:  return 81;
219       default: return 97;
220     }
221   } else {
222     switch (WavesPerEU) {
223       case 0:  return 0;
224       case 10: return 0;
225       case 9:  return 49;
226       case 8:  return 57;
227       case 7:  return 65;
228       case 6:  return 73;
229       case 5:  return 81;
230       default: return 97;
231     }
232   }
233 }
234 
235 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
236                         bool Addressable) {
237   IsaVersion Version = getIsaVersion(Features);
238   if (Version.Major >= 8) {
239     switch (WavesPerEU) {
240       case 0:  return 80;
241       case 10: return 80;
242       case 9:  return 80;
243       case 8:  return 96;
244       default: return Addressable ? getAddressableNumSGPRs(Features) : 112;
245     }
246   } else {
247     switch (WavesPerEU) {
248       case 0:  return 48;
249       case 10: return 48;
250       case 9:  return 56;
251       case 8:  return 64;
252       case 7:  return 72;
253       case 6:  return 80;
254       case 5:  return 96;
255       default: return getAddressableNumSGPRs(Features);
256     }
257   }
258 }
259 
260 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
261   return 4;
262 }
263 
264 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
265   return getVGPRAllocGranule(Features);
266 }
267 
268 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
269   return 256;
270 }
271 
272 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
273   return getTotalNumVGPRs(Features);
274 }
275 
276 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
277   switch (WavesPerEU) {
278     case 0:  return 0;
279     case 10: return 0;
280     case 9:  return 25;
281     case 8:  return 29;
282     case 7:  return 33;
283     case 6:  return 37;
284     case 5:  return 41;
285     case 4:  return 49;
286     case 3:  return 65;
287     case 2:  return 85;
288     default: return 129;
289   }
290 }
291 
292 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
293   switch (WavesPerEU) {
294     case 0:  return 24;
295     case 10: return 24;
296     case 9:  return 28;
297     case 8:  return 32;
298     case 7:  return 36;
299     case 6:  return 40;
300     case 5:  return 48;
301     case 4:  return 64;
302     case 3:  return 84;
303     case 2:  return 128;
304     default: return getTotalNumVGPRs(Features);
305   }
306 }
307 
308 } // namespace IsaInfo
309 
310 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
311                                const FeatureBitset &Features) {
312   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
313 
314   memset(&Header, 0, sizeof(Header));
315 
316   Header.amd_kernel_code_version_major = 1;
317   Header.amd_kernel_code_version_minor = 0;
318   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
319   Header.amd_machine_version_major = ISA.Major;
320   Header.amd_machine_version_minor = ISA.Minor;
321   Header.amd_machine_version_stepping = ISA.Stepping;
322   Header.kernel_code_entry_byte_offset = sizeof(Header);
323   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
324   Header.wavefront_size = 6;
325 
326   // If the code object does not support indirect functions, then the value must
327   // be 0xffffffff.
328   Header.call_convention = -1;
329 
330   // These alignment values are specified in powers of two, so alignment =
331   // 2^n.  The minimum alignment is 2^4 = 16.
332   Header.kernarg_segment_alignment = 4;
333   Header.group_segment_alignment = 4;
334   Header.private_segment_alignment = 4;
335 }
336 
337 MCSection *getHSATextSection(MCContext &Ctx) {
338   return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
339                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
340                            ELF::SHF_EXECINSTR |
341                            ELF::SHF_AMDGPU_HSA_AGENT |
342                            ELF::SHF_AMDGPU_HSA_CODE);
343 }
344 
345 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
346   return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
347                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
348                            ELF::SHF_AMDGPU_HSA_GLOBAL |
349                            ELF::SHF_AMDGPU_HSA_AGENT);
350 }
351 
352 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
353   return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
354                             ELF::SHF_ALLOC | ELF::SHF_WRITE |
355                             ELF::SHF_AMDGPU_HSA_GLOBAL);
356 }
357 
358 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
359   return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
360                            ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
361                            ELF::SHF_AMDGPU_HSA_AGENT);
362 }
363 
364 bool isGroupSegment(const GlobalValue *GV) {
365   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
366 }
367 
368 bool isGlobalSegment(const GlobalValue *GV) {
369   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
370 }
371 
372 bool isReadOnlySegment(const GlobalValue *GV) {
373   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
374 }
375 
376 bool shouldEmitConstantsToTextSection(const Triple &TT) {
377   return TT.getOS() != Triple::AMDHSA;
378 }
379 
380 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
381   Attribute A = F.getFnAttribute(Name);
382   int Result = Default;
383 
384   if (A.isStringAttribute()) {
385     StringRef Str = A.getValueAsString();
386     if (Str.getAsInteger(0, Result)) {
387       LLVMContext &Ctx = F.getContext();
388       Ctx.emitError("can't parse integer attribute " + Name);
389     }
390   }
391 
392   return Result;
393 }
394 
395 std::pair<int, int> getIntegerPairAttribute(const Function &F,
396                                             StringRef Name,
397                                             std::pair<int, int> Default,
398                                             bool OnlyFirstRequired) {
399   Attribute A = F.getFnAttribute(Name);
400   if (!A.isStringAttribute())
401     return Default;
402 
403   LLVMContext &Ctx = F.getContext();
404   std::pair<int, int> Ints = Default;
405   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
406   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
407     Ctx.emitError("can't parse first integer attribute " + Name);
408     return Default;
409   }
410   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
411     if (!OnlyFirstRequired || Strs.second.trim().size()) {
412       Ctx.emitError("can't parse second integer attribute " + Name);
413       return Default;
414     }
415   }
416 
417   return Ints;
418 }
419 
420 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
421   return (1 << getVmcntBitWidth()) - 1;
422 }
423 
424 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
425   return (1 << getExpcntBitWidth()) - 1;
426 }
427 
428 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
429   return (1 << getLgkmcntBitWidth()) - 1;
430 }
431 
432 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
433   unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
434   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
435   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
436   return Vmcnt | Expcnt | Lgkmcnt;
437 }
438 
439 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
440   return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
441 }
442 
443 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
444   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
445 }
446 
447 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
448   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
449 }
450 
451 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
452                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
453   Vmcnt = decodeVmcnt(Version, Waitcnt);
454   Expcnt = decodeExpcnt(Version, Waitcnt);
455   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
456 }
457 
458 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
459                      unsigned Vmcnt) {
460   return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
461 }
462 
463 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
464                       unsigned Expcnt) {
465   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
466 }
467 
468 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
469                        unsigned Lgkmcnt) {
470   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
471 }
472 
473 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
474                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
475   unsigned Waitcnt = getWaitcntBitMask(Version);
476   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
477   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
478   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
479   return Waitcnt;
480 }
481 
482 unsigned getInitialPSInputAddr(const Function &F) {
483   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
484 }
485 
486 bool isShader(CallingConv::ID cc) {
487   switch(cc) {
488     case CallingConv::AMDGPU_VS:
489     case CallingConv::AMDGPU_GS:
490     case CallingConv::AMDGPU_PS:
491     case CallingConv::AMDGPU_CS:
492       return true;
493     default:
494       return false;
495   }
496 }
497 
498 bool isCompute(CallingConv::ID cc) {
499   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
500 }
501 
502 bool isSI(const MCSubtargetInfo &STI) {
503   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
504 }
505 
506 bool isCI(const MCSubtargetInfo &STI) {
507   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
508 }
509 
510 bool isVI(const MCSubtargetInfo &STI) {
511   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
512 }
513 
514 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
515 
516   switch(Reg) {
517   default: break;
518   case AMDGPU::FLAT_SCR:
519     assert(!isSI(STI));
520     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
521 
522   case AMDGPU::FLAT_SCR_LO:
523     assert(!isSI(STI));
524     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
525 
526   case AMDGPU::FLAT_SCR_HI:
527     assert(!isSI(STI));
528     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
529   }
530   return Reg;
531 }
532 
533 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
534   assert(OpNo < Desc.NumOperands);
535   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
536   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
537          OpType <= AMDGPU::OPERAND_SRC_LAST;
538 }
539 
540 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
541   assert(OpNo < Desc.NumOperands);
542   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
543   switch (OpType) {
544   case AMDGPU::OPERAND_REG_IMM_FP32:
545   case AMDGPU::OPERAND_REG_IMM_FP64:
546   case AMDGPU::OPERAND_REG_IMM_FP16:
547   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
548   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
549   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
550     return true;
551   default:
552     return false;
553   }
554 }
555 
556 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
557   assert(OpNo < Desc.NumOperands);
558   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
559   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
560          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
561 }
562 
563 // Avoid using MCRegisterClass::getSize, since that function will go away
564 // (move from MC* level to Target* level). Return size in bits.
565 unsigned getRegBitWidth(unsigned RCID) {
566   switch (RCID) {
567   case AMDGPU::SGPR_32RegClassID:
568   case AMDGPU::VGPR_32RegClassID:
569   case AMDGPU::VS_32RegClassID:
570   case AMDGPU::SReg_32RegClassID:
571   case AMDGPU::SReg_32_XM0RegClassID:
572     return 32;
573   case AMDGPU::SGPR_64RegClassID:
574   case AMDGPU::VS_64RegClassID:
575   case AMDGPU::SReg_64RegClassID:
576   case AMDGPU::VReg_64RegClassID:
577     return 64;
578   case AMDGPU::VReg_96RegClassID:
579     return 96;
580   case AMDGPU::SGPR_128RegClassID:
581   case AMDGPU::SReg_128RegClassID:
582   case AMDGPU::VReg_128RegClassID:
583     return 128;
584   case AMDGPU::SReg_256RegClassID:
585   case AMDGPU::VReg_256RegClassID:
586     return 256;
587   case AMDGPU::SReg_512RegClassID:
588   case AMDGPU::VReg_512RegClassID:
589     return 512;
590   default:
591     llvm_unreachable("Unexpected register class");
592   }
593 }
594 
595 unsigned getRegBitWidth(const MCRegisterClass &RC) {
596   return getRegBitWidth(RC.getID());
597 }
598 
599 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
600                            unsigned OpNo) {
601   assert(OpNo < Desc.NumOperands);
602   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
603   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
604 }
605 
606 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
607   if (Literal >= -16 && Literal <= 64)
608     return true;
609 
610   uint64_t Val = static_cast<uint64_t>(Literal);
611   return (Val == DoubleToBits(0.0)) ||
612          (Val == DoubleToBits(1.0)) ||
613          (Val == DoubleToBits(-1.0)) ||
614          (Val == DoubleToBits(0.5)) ||
615          (Val == DoubleToBits(-0.5)) ||
616          (Val == DoubleToBits(2.0)) ||
617          (Val == DoubleToBits(-2.0)) ||
618          (Val == DoubleToBits(4.0)) ||
619          (Val == DoubleToBits(-4.0)) ||
620          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
621 }
622 
623 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
624   if (Literal >= -16 && Literal <= 64)
625     return true;
626 
627   // The actual type of the operand does not seem to matter as long
628   // as the bits match one of the inline immediate values.  For example:
629   //
630   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
631   // so it is a legal inline immediate.
632   //
633   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
634   // floating-point, so it is a legal inline immediate.
635 
636   uint32_t Val = static_cast<uint32_t>(Literal);
637   return (Val == FloatToBits(0.0f)) ||
638          (Val == FloatToBits(1.0f)) ||
639          (Val == FloatToBits(-1.0f)) ||
640          (Val == FloatToBits(0.5f)) ||
641          (Val == FloatToBits(-0.5f)) ||
642          (Val == FloatToBits(2.0f)) ||
643          (Val == FloatToBits(-2.0f)) ||
644          (Val == FloatToBits(4.0f)) ||
645          (Val == FloatToBits(-4.0f)) ||
646          (Val == 0x3e22f983 && HasInv2Pi);
647 }
648 
649 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
650   if (!HasInv2Pi)
651     return false;
652 
653   if (Literal >= -16 && Literal <= 64)
654     return true;
655 
656   uint16_t Val = static_cast<uint16_t>(Literal);
657   return Val == 0x3C00 || // 1.0
658          Val == 0xBC00 || // -1.0
659          Val == 0x3800 || // 0.5
660          Val == 0xB800 || // -0.5
661          Val == 0x4000 || // 2.0
662          Val == 0xC000 || // -2.0
663          Val == 0x4400 || // 4.0
664          Val == 0xC400 || // -4.0
665          Val == 0x3118;   // 1/2pi
666 }
667 
668 bool isUniformMMO(const MachineMemOperand *MMO) {
669   const Value *Ptr = MMO->getValue();
670   // UndefValue means this is a load of a kernel input.  These are uniform.
671   // Sometimes LDS instructions have constant pointers.
672   // If Ptr is null, then that means this mem operand contains a
673   // PseudoSourceValue like GOT.
674   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
675       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
676     return true;
677 
678   const Instruction *I = dyn_cast<Instruction>(Ptr);
679   return I && I->getMetadata("amdgpu.uniform");
680 }
681 
682 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
683   if (isSI(ST) || isCI(ST))
684     return ByteOffset >> 2;
685 
686   return ByteOffset;
687 }
688 
689 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
690   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
691   return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
692                                 isUInt<20>(EncodedOffset);
693 }
694 
695 } // End namespace AMDGPU
696 } // End namespace llvm
697