1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "SIDefines.h"
12 #include "llvm/CodeGen/MachineMemOperand.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/LLVMContext.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCSectionELF.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/MC/SubtargetFeature.h"
23 
24 #define GET_SUBTARGETINFO_ENUM
25 #include "AMDGPUGenSubtargetInfo.inc"
26 #undef GET_SUBTARGETINFO_ENUM
27 
28 #define GET_REGINFO_ENUM
29 #include "AMDGPUGenRegisterInfo.inc"
30 #undef GET_REGINFO_ENUM
31 
32 #define GET_INSTRINFO_NAMED_OPS
33 #define GET_INSTRINFO_ENUM
34 #include "AMDGPUGenInstrInfo.inc"
35 #undef GET_INSTRINFO_NAMED_OPS
36 #undef GET_INSTRINFO_ENUM
37 
38 namespace {
39 
40 /// \returns Bit mask for given bit \p Shift and bit \p Width.
41 unsigned getBitMask(unsigned Shift, unsigned Width) {
42   return ((1 << Width) - 1) << Shift;
43 }
44 
45 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
46 ///
47 /// \returns Packed \p Dst.
48 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
49   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
50   Dst |= (Src << Shift) & getBitMask(Shift, Width);
51   return Dst;
52 }
53 
54 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
55 ///
56 /// \returns Unpacked bits.
57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
58   return (Src & getBitMask(Shift, Width)) >> Shift;
59 }
60 
61 /// \returns Vmcnt bit shift.
62 unsigned getVmcntBitShift() { return 0; }
63 
64 /// \returns Vmcnt bit width.
65 unsigned getVmcntBitWidth() { return 4; }
66 
67 /// \returns Expcnt bit shift.
68 unsigned getExpcntBitShift() { return 4; }
69 
70 /// \returns Expcnt bit width.
71 unsigned getExpcntBitWidth() { return 3; }
72 
73 /// \returns Lgkmcnt bit shift.
74 unsigned getLgkmcntBitShift() { return 8; }
75 
76 /// \returns Lgkmcnt bit width.
77 unsigned getLgkmcntBitWidth() { return 4; }
78 
79 } // namespace anonymous
80 
81 namespace llvm {
82 namespace AMDGPU {
83 
84 namespace IsaInfo {
85 
86 IsaVersion getIsaVersion(const FeatureBitset &Features) {
87   // CI.
88   if (Features.test(FeatureISAVersion7_0_0))
89     return {7, 0, 0};
90   if (Features.test(FeatureISAVersion7_0_1))
91     return {7, 0, 1};
92   if (Features.test(FeatureISAVersion7_0_2))
93     return {7, 0, 2};
94 
95   // VI.
96   if (Features.test(FeatureISAVersion8_0_0))
97     return {8, 0, 0};
98   if (Features.test(FeatureISAVersion8_0_1))
99     return {8, 0, 1};
100   if (Features.test(FeatureISAVersion8_0_2))
101     return {8, 0, 2};
102   if (Features.test(FeatureISAVersion8_0_3))
103     return {8, 0, 3};
104   if (Features.test(FeatureISAVersion8_0_4))
105     return {8, 0, 4};
106   if (Features.test(FeatureISAVersion8_1_0))
107     return {8, 1, 0};
108 
109   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
110     return {0, 0, 0};
111   return {7, 0, 0};
112 }
113 
114 unsigned getWavefrontSize(const FeatureBitset &Features) {
115   if (Features.test(FeatureWavefrontSize16))
116     return 16;
117   if (Features.test(FeatureWavefrontSize32))
118     return 32;
119 
120   return 64;
121 }
122 
123 unsigned getLocalMemorySize(const FeatureBitset &Features) {
124   if (Features.test(FeatureLocalMemorySize32768))
125     return 32768;
126   if (Features.test(FeatureLocalMemorySize65536))
127     return 65536;
128 
129   return 0;
130 }
131 
132 unsigned getEUsPerCU(const FeatureBitset &Features) {
133   return 4;
134 }
135 
136 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
137                                unsigned FlatWorkGroupSize) {
138   if (!Features.test(FeatureGCN))
139     return 8;
140   return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
141 }
142 
143 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
144   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
145 }
146 
147 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
148                           unsigned FlatWorkGroupSize) {
149   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
150 }
151 
152 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
153   return 1;
154 }
155 
156 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
157   if (!Features.test(FeatureGCN))
158     return 8;
159   // FIXME: Need to take scratch memory into account.
160   return 10;
161 }
162 
163 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
164                           unsigned FlatWorkGroupSize) {
165   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
166                  getEUsPerCU(Features)) / getEUsPerCU(Features);
167 }
168 
169 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
170   return 1;
171 }
172 
173 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
174   return 2048;
175 }
176 
177 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
178                               unsigned FlatWorkGroupSize) {
179   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
180                  getWavefrontSize(Features);
181 }
182 
183 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
184   IsaVersion Version = getIsaVersion(Features);
185   if (Version.Major >= 8)
186     return 16;
187   return 8;
188 }
189 
190 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
191   return 8;
192 }
193 
194 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
195   IsaVersion Version = getIsaVersion(Features);
196   if (Version.Major >= 8)
197     return 800;
198   return 512;
199 }
200 
201 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
202   if (Features.test(FeatureSGPRInitBug))
203     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
204 
205   IsaVersion Version = getIsaVersion(Features);
206   if (Version.Major >= 8)
207     return 102;
208   return 104;
209 }
210 
211 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
212   assert(WavesPerEU != 0);
213 
214   if (WavesPerEU >= getMaxWavesPerEU(Features))
215     return 0;
216   unsigned MinNumSGPRs =
217       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
218                 getSGPRAllocGranule(Features)) + 1;
219   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
220 }
221 
222 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
223                         bool Addressable) {
224   assert(WavesPerEU != 0);
225 
226   IsaVersion Version = getIsaVersion(Features);
227   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
228                                    getSGPRAllocGranule(Features));
229   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
230   if (Version.Major >= 8 && !Addressable)
231     AddressableNumSGPRs = 112;
232   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
233 }
234 
235 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
236   return 4;
237 }
238 
239 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
240   return getVGPRAllocGranule(Features);
241 }
242 
243 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
244   return 256;
245 }
246 
247 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
248   return getTotalNumVGPRs(Features);
249 }
250 
251 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
252   assert(WavesPerEU != 0);
253 
254   if (WavesPerEU >= getMaxWavesPerEU(Features))
255     return 0;
256   unsigned MinNumVGPRs =
257       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
258                 getVGPRAllocGranule(Features)) + 1;
259   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
260 }
261 
262 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
263   assert(WavesPerEU != 0);
264 
265   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
266                                    getVGPRAllocGranule(Features));
267   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
268   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
269 }
270 
271 } // namespace IsaInfo
272 
273 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
274                                const FeatureBitset &Features) {
275   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
276 
277   memset(&Header, 0, sizeof(Header));
278 
279   Header.amd_kernel_code_version_major = 1;
280   Header.amd_kernel_code_version_minor = 0;
281   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
282   Header.amd_machine_version_major = ISA.Major;
283   Header.amd_machine_version_minor = ISA.Minor;
284   Header.amd_machine_version_stepping = ISA.Stepping;
285   Header.kernel_code_entry_byte_offset = sizeof(Header);
286   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
287   Header.wavefront_size = 6;
288 
289   // If the code object does not support indirect functions, then the value must
290   // be 0xffffffff.
291   Header.call_convention = -1;
292 
293   // These alignment values are specified in powers of two, so alignment =
294   // 2^n.  The minimum alignment is 2^4 = 16.
295   Header.kernarg_segment_alignment = 4;
296   Header.group_segment_alignment = 4;
297   Header.private_segment_alignment = 4;
298 }
299 
300 MCSection *getHSATextSection(MCContext &Ctx) {
301   return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
302                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
303                            ELF::SHF_EXECINSTR |
304                            ELF::SHF_AMDGPU_HSA_AGENT |
305                            ELF::SHF_AMDGPU_HSA_CODE);
306 }
307 
308 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
309   return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
310                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
311                            ELF::SHF_AMDGPU_HSA_GLOBAL |
312                            ELF::SHF_AMDGPU_HSA_AGENT);
313 }
314 
315 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
316   return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
317                             ELF::SHF_ALLOC | ELF::SHF_WRITE |
318                             ELF::SHF_AMDGPU_HSA_GLOBAL);
319 }
320 
321 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
322   return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
323                            ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
324                            ELF::SHF_AMDGPU_HSA_AGENT);
325 }
326 
327 bool isGroupSegment(const GlobalValue *GV) {
328   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
329 }
330 
331 bool isGlobalSegment(const GlobalValue *GV) {
332   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
333 }
334 
335 bool isReadOnlySegment(const GlobalValue *GV) {
336   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
337 }
338 
339 bool shouldEmitConstantsToTextSection(const Triple &TT) {
340   return TT.getOS() != Triple::AMDHSA;
341 }
342 
343 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
344   Attribute A = F.getFnAttribute(Name);
345   int Result = Default;
346 
347   if (A.isStringAttribute()) {
348     StringRef Str = A.getValueAsString();
349     if (Str.getAsInteger(0, Result)) {
350       LLVMContext &Ctx = F.getContext();
351       Ctx.emitError("can't parse integer attribute " + Name);
352     }
353   }
354 
355   return Result;
356 }
357 
358 std::pair<int, int> getIntegerPairAttribute(const Function &F,
359                                             StringRef Name,
360                                             std::pair<int, int> Default,
361                                             bool OnlyFirstRequired) {
362   Attribute A = F.getFnAttribute(Name);
363   if (!A.isStringAttribute())
364     return Default;
365 
366   LLVMContext &Ctx = F.getContext();
367   std::pair<int, int> Ints = Default;
368   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
369   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
370     Ctx.emitError("can't parse first integer attribute " + Name);
371     return Default;
372   }
373   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
374     if (!OnlyFirstRequired || Strs.second.trim().size()) {
375       Ctx.emitError("can't parse second integer attribute " + Name);
376       return Default;
377     }
378   }
379 
380   return Ints;
381 }
382 
383 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
384   return (1 << getVmcntBitWidth()) - 1;
385 }
386 
387 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
388   return (1 << getExpcntBitWidth()) - 1;
389 }
390 
391 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
392   return (1 << getLgkmcntBitWidth()) - 1;
393 }
394 
395 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
396   unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
397   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
398   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
399   return Vmcnt | Expcnt | Lgkmcnt;
400 }
401 
402 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
403   return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
404 }
405 
406 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
407   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
408 }
409 
410 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
411   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
412 }
413 
414 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
415                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
416   Vmcnt = decodeVmcnt(Version, Waitcnt);
417   Expcnt = decodeExpcnt(Version, Waitcnt);
418   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
419 }
420 
421 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
422                      unsigned Vmcnt) {
423   return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
424 }
425 
426 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
427                       unsigned Expcnt) {
428   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
429 }
430 
431 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
432                        unsigned Lgkmcnt) {
433   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
434 }
435 
436 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
437                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
438   unsigned Waitcnt = getWaitcntBitMask(Version);
439   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
440   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
441   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
442   return Waitcnt;
443 }
444 
445 unsigned getInitialPSInputAddr(const Function &F) {
446   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
447 }
448 
449 bool isShader(CallingConv::ID cc) {
450   switch(cc) {
451     case CallingConv::AMDGPU_VS:
452     case CallingConv::AMDGPU_GS:
453     case CallingConv::AMDGPU_PS:
454     case CallingConv::AMDGPU_CS:
455       return true;
456     default:
457       return false;
458   }
459 }
460 
461 bool isCompute(CallingConv::ID cc) {
462   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
463 }
464 
465 bool isSI(const MCSubtargetInfo &STI) {
466   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
467 }
468 
469 bool isCI(const MCSubtargetInfo &STI) {
470   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
471 }
472 
473 bool isVI(const MCSubtargetInfo &STI) {
474   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
475 }
476 
477 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
478 
479   switch(Reg) {
480   default: break;
481   case AMDGPU::FLAT_SCR:
482     assert(!isSI(STI));
483     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
484 
485   case AMDGPU::FLAT_SCR_LO:
486     assert(!isSI(STI));
487     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
488 
489   case AMDGPU::FLAT_SCR_HI:
490     assert(!isSI(STI));
491     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
492   }
493   return Reg;
494 }
495 
496 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
497   assert(OpNo < Desc.NumOperands);
498   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
499   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
500          OpType <= AMDGPU::OPERAND_SRC_LAST;
501 }
502 
503 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
504   assert(OpNo < Desc.NumOperands);
505   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
506   switch (OpType) {
507   case AMDGPU::OPERAND_REG_IMM_FP32:
508   case AMDGPU::OPERAND_REG_IMM_FP64:
509   case AMDGPU::OPERAND_REG_IMM_FP16:
510   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
511   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
512   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
513     return true;
514   default:
515     return false;
516   }
517 }
518 
519 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
520   assert(OpNo < Desc.NumOperands);
521   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
522   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
523          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
524 }
525 
526 // Avoid using MCRegisterClass::getSize, since that function will go away
527 // (move from MC* level to Target* level). Return size in bits.
528 unsigned getRegBitWidth(unsigned RCID) {
529   switch (RCID) {
530   case AMDGPU::SGPR_32RegClassID:
531   case AMDGPU::VGPR_32RegClassID:
532   case AMDGPU::VS_32RegClassID:
533   case AMDGPU::SReg_32RegClassID:
534   case AMDGPU::SReg_32_XM0RegClassID:
535     return 32;
536   case AMDGPU::SGPR_64RegClassID:
537   case AMDGPU::VS_64RegClassID:
538   case AMDGPU::SReg_64RegClassID:
539   case AMDGPU::VReg_64RegClassID:
540     return 64;
541   case AMDGPU::VReg_96RegClassID:
542     return 96;
543   case AMDGPU::SGPR_128RegClassID:
544   case AMDGPU::SReg_128RegClassID:
545   case AMDGPU::VReg_128RegClassID:
546     return 128;
547   case AMDGPU::SReg_256RegClassID:
548   case AMDGPU::VReg_256RegClassID:
549     return 256;
550   case AMDGPU::SReg_512RegClassID:
551   case AMDGPU::VReg_512RegClassID:
552     return 512;
553   default:
554     llvm_unreachable("Unexpected register class");
555   }
556 }
557 
558 unsigned getRegBitWidth(const MCRegisterClass &RC) {
559   return getRegBitWidth(RC.getID());
560 }
561 
562 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
563                            unsigned OpNo) {
564   assert(OpNo < Desc.NumOperands);
565   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
566   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
567 }
568 
569 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
570   if (Literal >= -16 && Literal <= 64)
571     return true;
572 
573   uint64_t Val = static_cast<uint64_t>(Literal);
574   return (Val == DoubleToBits(0.0)) ||
575          (Val == DoubleToBits(1.0)) ||
576          (Val == DoubleToBits(-1.0)) ||
577          (Val == DoubleToBits(0.5)) ||
578          (Val == DoubleToBits(-0.5)) ||
579          (Val == DoubleToBits(2.0)) ||
580          (Val == DoubleToBits(-2.0)) ||
581          (Val == DoubleToBits(4.0)) ||
582          (Val == DoubleToBits(-4.0)) ||
583          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
584 }
585 
586 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
587   if (Literal >= -16 && Literal <= 64)
588     return true;
589 
590   // The actual type of the operand does not seem to matter as long
591   // as the bits match one of the inline immediate values.  For example:
592   //
593   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
594   // so it is a legal inline immediate.
595   //
596   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
597   // floating-point, so it is a legal inline immediate.
598 
599   uint32_t Val = static_cast<uint32_t>(Literal);
600   return (Val == FloatToBits(0.0f)) ||
601          (Val == FloatToBits(1.0f)) ||
602          (Val == FloatToBits(-1.0f)) ||
603          (Val == FloatToBits(0.5f)) ||
604          (Val == FloatToBits(-0.5f)) ||
605          (Val == FloatToBits(2.0f)) ||
606          (Val == FloatToBits(-2.0f)) ||
607          (Val == FloatToBits(4.0f)) ||
608          (Val == FloatToBits(-4.0f)) ||
609          (Val == 0x3e22f983 && HasInv2Pi);
610 }
611 
612 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
613   if (!HasInv2Pi)
614     return false;
615 
616   if (Literal >= -16 && Literal <= 64)
617     return true;
618 
619   uint16_t Val = static_cast<uint16_t>(Literal);
620   return Val == 0x3C00 || // 1.0
621          Val == 0xBC00 || // -1.0
622          Val == 0x3800 || // 0.5
623          Val == 0xB800 || // -0.5
624          Val == 0x4000 || // 2.0
625          Val == 0xC000 || // -2.0
626          Val == 0x4400 || // 4.0
627          Val == 0xC400 || // -4.0
628          Val == 0x3118;   // 1/2pi
629 }
630 
631 bool isUniformMMO(const MachineMemOperand *MMO) {
632   const Value *Ptr = MMO->getValue();
633   // UndefValue means this is a load of a kernel input.  These are uniform.
634   // Sometimes LDS instructions have constant pointers.
635   // If Ptr is null, then that means this mem operand contains a
636   // PseudoSourceValue like GOT.
637   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
638       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
639     return true;
640 
641   const Instruction *I = dyn_cast<Instruction>(Ptr);
642   return I && I->getMetadata("amdgpu.uniform");
643 }
644 
645 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
646   if (isSI(ST) || isCI(ST))
647     return ByteOffset >> 2;
648 
649   return ByteOffset;
650 }
651 
652 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
653   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
654   return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
655                                 isUInt<20>(EncodedOffset);
656 }
657 
658 } // End namespace AMDGPU
659 } // End namespace llvm
660