1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "SIDefines.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/CodeGen/MachineMemOperand.h"
16 #include "llvm/IR/Attributes.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Instruction.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCInstrDesc.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSectionELF.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/ELF.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include <algorithm>
34 #include <cassert>
35 #include <cstdint>
36 #include <cstring>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_ENUM
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #undef GET_SUBTARGETINFO_ENUM
42 
43 #define GET_REGINFO_ENUM
44 #include "AMDGPUGenRegisterInfo.inc"
45 #undef GET_REGINFO_ENUM
46 
47 #define GET_INSTRINFO_NAMED_OPS
48 #define GET_INSTRINFO_ENUM
49 #include "AMDGPUGenInstrInfo.inc"
50 #undef GET_INSTRINFO_NAMED_OPS
51 #undef GET_INSTRINFO_ENUM
52 
53 namespace {
54 
55 /// \returns Bit mask for given bit \p Shift and bit \p Width.
56 unsigned getBitMask(unsigned Shift, unsigned Width) {
57   return ((1 << Width) - 1) << Shift;
58 }
59 
60 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
61 ///
62 /// \returns Packed \p Dst.
63 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
64   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
65   Dst |= (Src << Shift) & getBitMask(Shift, Width);
66   return Dst;
67 }
68 
69 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
70 ///
71 /// \returns Unpacked bits.
72 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
73   return (Src & getBitMask(Shift, Width)) >> Shift;
74 }
75 
76 /// \returns Vmcnt bit shift (lower bits).
77 unsigned getVmcntBitShiftLo() { return 0; }
78 
79 /// \returns Vmcnt bit width (lower bits).
80 unsigned getVmcntBitWidthLo() { return 4; }
81 
82 /// \returns Expcnt bit shift.
83 unsigned getExpcntBitShift() { return 4; }
84 
85 /// \returns Expcnt bit width.
86 unsigned getExpcntBitWidth() { return 3; }
87 
88 /// \returns Lgkmcnt bit shift.
89 unsigned getLgkmcntBitShift() { return 8; }
90 
91 /// \returns Lgkmcnt bit width.
92 unsigned getLgkmcntBitWidth() { return 4; }
93 
94 /// \returns Vmcnt bit shift (higher bits).
95 unsigned getVmcntBitShiftHi() { return 14; }
96 
97 /// \returns Vmcnt bit width (higher bits).
98 unsigned getVmcntBitWidthHi() { return 2; }
99 
100 } // end namespace anonymous
101 
102 namespace llvm {
103 namespace AMDGPU {
104 
105 namespace IsaInfo {
106 
107 IsaVersion getIsaVersion(const FeatureBitset &Features) {
108   // CI.
109   if (Features.test(FeatureISAVersion7_0_0))
110     return {7, 0, 0};
111   if (Features.test(FeatureISAVersion7_0_1))
112     return {7, 0, 1};
113   if (Features.test(FeatureISAVersion7_0_2))
114     return {7, 0, 2};
115 
116   // VI.
117   if (Features.test(FeatureISAVersion8_0_0))
118     return {8, 0, 0};
119   if (Features.test(FeatureISAVersion8_0_1))
120     return {8, 0, 1};
121   if (Features.test(FeatureISAVersion8_0_2))
122     return {8, 0, 2};
123   if (Features.test(FeatureISAVersion8_0_3))
124     return {8, 0, 3};
125   if (Features.test(FeatureISAVersion8_0_4))
126     return {8, 0, 4};
127   if (Features.test(FeatureISAVersion8_1_0))
128     return {8, 1, 0};
129 
130   // GFX9.
131   if (Features.test(FeatureISAVersion9_0_0))
132     return {9, 0, 0};
133   if (Features.test(FeatureISAVersion9_0_1))
134     return {9, 0, 1};
135 
136   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
137     return {0, 0, 0};
138   return {7, 0, 0};
139 }
140 
141 unsigned getWavefrontSize(const FeatureBitset &Features) {
142   if (Features.test(FeatureWavefrontSize16))
143     return 16;
144   if (Features.test(FeatureWavefrontSize32))
145     return 32;
146 
147   return 64;
148 }
149 
150 unsigned getLocalMemorySize(const FeatureBitset &Features) {
151   if (Features.test(FeatureLocalMemorySize32768))
152     return 32768;
153   if (Features.test(FeatureLocalMemorySize65536))
154     return 65536;
155 
156   return 0;
157 }
158 
159 unsigned getEUsPerCU(const FeatureBitset &Features) {
160   return 4;
161 }
162 
163 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
164                                unsigned FlatWorkGroupSize) {
165   if (!Features.test(FeatureGCN))
166     return 8;
167   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
168   if (N == 1)
169     return 40;
170   N = 40 / N;
171   return std::min(N, 16u);
172 }
173 
174 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
175   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
176 }
177 
178 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
179                           unsigned FlatWorkGroupSize) {
180   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
181 }
182 
183 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
184   return 1;
185 }
186 
187 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
188   if (!Features.test(FeatureGCN))
189     return 8;
190   // FIXME: Need to take scratch memory into account.
191   return 10;
192 }
193 
194 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
195                           unsigned FlatWorkGroupSize) {
196   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
197                  getEUsPerCU(Features)) / getEUsPerCU(Features);
198 }
199 
200 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
201   return 1;
202 }
203 
204 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
205   return 2048;
206 }
207 
208 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
209                               unsigned FlatWorkGroupSize) {
210   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
211                  getWavefrontSize(Features);
212 }
213 
214 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
215   IsaVersion Version = getIsaVersion(Features);
216   if (Version.Major >= 8)
217     return 16;
218   return 8;
219 }
220 
221 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
222   return 8;
223 }
224 
225 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
226   IsaVersion Version = getIsaVersion(Features);
227   if (Version.Major >= 8)
228     return 800;
229   return 512;
230 }
231 
232 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
233   if (Features.test(FeatureSGPRInitBug))
234     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
235 
236   IsaVersion Version = getIsaVersion(Features);
237   if (Version.Major >= 8)
238     return 102;
239   return 104;
240 }
241 
242 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
243   assert(WavesPerEU != 0);
244 
245   if (WavesPerEU >= getMaxWavesPerEU(Features))
246     return 0;
247   unsigned MinNumSGPRs =
248       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
249                 getSGPRAllocGranule(Features)) + 1;
250   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
251 }
252 
253 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
254                         bool Addressable) {
255   assert(WavesPerEU != 0);
256 
257   IsaVersion Version = getIsaVersion(Features);
258   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
259                                    getSGPRAllocGranule(Features));
260   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
261   if (Version.Major >= 8 && !Addressable)
262     AddressableNumSGPRs = 112;
263   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
264 }
265 
266 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
267   return 4;
268 }
269 
270 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
271   return getVGPRAllocGranule(Features);
272 }
273 
274 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
275   return 256;
276 }
277 
278 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
279   return getTotalNumVGPRs(Features);
280 }
281 
282 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
283   assert(WavesPerEU != 0);
284 
285   if (WavesPerEU >= getMaxWavesPerEU(Features))
286     return 0;
287   unsigned MinNumVGPRs =
288       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
289                 getVGPRAllocGranule(Features)) + 1;
290   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
291 }
292 
293 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
294   assert(WavesPerEU != 0);
295 
296   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
297                                    getVGPRAllocGranule(Features));
298   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
299   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
300 }
301 
302 } // end namespace IsaInfo
303 
304 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
305                                const FeatureBitset &Features) {
306   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
307 
308   memset(&Header, 0, sizeof(Header));
309 
310   Header.amd_kernel_code_version_major = 1;
311   Header.amd_kernel_code_version_minor = 1;
312   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
313   Header.amd_machine_version_major = ISA.Major;
314   Header.amd_machine_version_minor = ISA.Minor;
315   Header.amd_machine_version_stepping = ISA.Stepping;
316   Header.kernel_code_entry_byte_offset = sizeof(Header);
317   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
318   Header.wavefront_size = 6;
319 
320   // If the code object does not support indirect functions, then the value must
321   // be 0xffffffff.
322   Header.call_convention = -1;
323 
324   // These alignment values are specified in powers of two, so alignment =
325   // 2^n.  The minimum alignment is 2^4 = 16.
326   Header.kernarg_segment_alignment = 4;
327   Header.group_segment_alignment = 4;
328   Header.private_segment_alignment = 4;
329 }
330 
331 MCSection *getHSATextSection(MCContext &Ctx) {
332   return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
333                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
334                            ELF::SHF_EXECINSTR |
335                            ELF::SHF_AMDGPU_HSA_AGENT |
336                            ELF::SHF_AMDGPU_HSA_CODE);
337 }
338 
339 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
340   return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
341                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
342                            ELF::SHF_AMDGPU_HSA_GLOBAL |
343                            ELF::SHF_AMDGPU_HSA_AGENT);
344 }
345 
346 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
347   return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
348                             ELF::SHF_ALLOC | ELF::SHF_WRITE |
349                             ELF::SHF_AMDGPU_HSA_GLOBAL);
350 }
351 
352 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
353   return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
354                            ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
355                            ELF::SHF_AMDGPU_HSA_AGENT);
356 }
357 
358 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
359   return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
360 }
361 
362 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
363   return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
364 }
365 
366 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
367   return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
368 }
369 
370 bool shouldEmitConstantsToTextSection(const Triple &TT) {
371   return TT.getOS() != Triple::AMDHSA;
372 }
373 
374 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
375   Attribute A = F.getFnAttribute(Name);
376   int Result = Default;
377 
378   if (A.isStringAttribute()) {
379     StringRef Str = A.getValueAsString();
380     if (Str.getAsInteger(0, Result)) {
381       LLVMContext &Ctx = F.getContext();
382       Ctx.emitError("can't parse integer attribute " + Name);
383     }
384   }
385 
386   return Result;
387 }
388 
389 std::pair<int, int> getIntegerPairAttribute(const Function &F,
390                                             StringRef Name,
391                                             std::pair<int, int> Default,
392                                             bool OnlyFirstRequired) {
393   Attribute A = F.getFnAttribute(Name);
394   if (!A.isStringAttribute())
395     return Default;
396 
397   LLVMContext &Ctx = F.getContext();
398   std::pair<int, int> Ints = Default;
399   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
400   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
401     Ctx.emitError("can't parse first integer attribute " + Name);
402     return Default;
403   }
404   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
405     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
406       Ctx.emitError("can't parse second integer attribute " + Name);
407       return Default;
408     }
409   }
410 
411   return Ints;
412 }
413 
414 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
415   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
416   if (Version.Major < 9)
417     return VmcntLo;
418 
419   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
420   return VmcntLo | VmcntHi;
421 }
422 
423 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
424   return (1 << getExpcntBitWidth()) - 1;
425 }
426 
427 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
428   return (1 << getLgkmcntBitWidth()) - 1;
429 }
430 
431 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
432   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
433   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
434   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
435   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
436   if (Version.Major < 9)
437     return Waitcnt;
438 
439   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
440   return Waitcnt | VmcntHi;
441 }
442 
443 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
444   unsigned VmcntLo =
445       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
446   if (Version.Major < 9)
447     return VmcntLo;
448 
449   unsigned VmcntHi =
450       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
451   VmcntHi <<= getVmcntBitWidthLo();
452   return VmcntLo | VmcntHi;
453 }
454 
455 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
456   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
457 }
458 
459 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
460   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
461 }
462 
463 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
464                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
465   Vmcnt = decodeVmcnt(Version, Waitcnt);
466   Expcnt = decodeExpcnt(Version, Waitcnt);
467   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
468 }
469 
470 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
471                      unsigned Vmcnt) {
472   Waitcnt =
473       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
474   if (Version.Major < 9)
475     return Waitcnt;
476 
477   Vmcnt >>= getVmcntBitWidthLo();
478   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
479 }
480 
481 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
482                       unsigned Expcnt) {
483   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
484 }
485 
486 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
487                        unsigned Lgkmcnt) {
488   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
489 }
490 
491 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
492                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
493   unsigned Waitcnt = getWaitcntBitMask(Version);
494   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
495   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
496   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
497   return Waitcnt;
498 }
499 
500 unsigned getInitialPSInputAddr(const Function &F) {
501   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
502 }
503 
504 bool isShader(CallingConv::ID cc) {
505   switch(cc) {
506     case CallingConv::AMDGPU_VS:
507     case CallingConv::AMDGPU_GS:
508     case CallingConv::AMDGPU_PS:
509     case CallingConv::AMDGPU_CS:
510       return true;
511     default:
512       return false;
513   }
514 }
515 
516 bool isCompute(CallingConv::ID cc) {
517   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
518 }
519 
520 bool isSI(const MCSubtargetInfo &STI) {
521   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
522 }
523 
524 bool isCI(const MCSubtargetInfo &STI) {
525   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
526 }
527 
528 bool isVI(const MCSubtargetInfo &STI) {
529   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
530 }
531 
532 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
533 
534   switch(Reg) {
535   default: break;
536   case AMDGPU::FLAT_SCR:
537     assert(!isSI(STI));
538     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
539 
540   case AMDGPU::FLAT_SCR_LO:
541     assert(!isSI(STI));
542     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
543 
544   case AMDGPU::FLAT_SCR_HI:
545     assert(!isSI(STI));
546     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
547   }
548   return Reg;
549 }
550 
551 unsigned mc2PseudoReg(unsigned Reg) {
552   switch (Reg) {
553   case AMDGPU::FLAT_SCR_ci:
554   case AMDGPU::FLAT_SCR_vi:
555     return FLAT_SCR;
556 
557   case AMDGPU::FLAT_SCR_LO_ci:
558   case AMDGPU::FLAT_SCR_LO_vi:
559     return AMDGPU::FLAT_SCR_LO;
560 
561   case AMDGPU::FLAT_SCR_HI_ci:
562   case AMDGPU::FLAT_SCR_HI_vi:
563     return AMDGPU::FLAT_SCR_HI;
564 
565   default:
566     return Reg;
567   }
568 }
569 
570 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
571   assert(OpNo < Desc.NumOperands);
572   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
573   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
574          OpType <= AMDGPU::OPERAND_SRC_LAST;
575 }
576 
577 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
578   assert(OpNo < Desc.NumOperands);
579   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
580   switch (OpType) {
581   case AMDGPU::OPERAND_REG_IMM_FP32:
582   case AMDGPU::OPERAND_REG_IMM_FP64:
583   case AMDGPU::OPERAND_REG_IMM_FP16:
584   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
585   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
586   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
587   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
588     return true;
589   default:
590     return false;
591   }
592 }
593 
594 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
595   assert(OpNo < Desc.NumOperands);
596   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
597   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
598          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
599 }
600 
601 // Avoid using MCRegisterClass::getSize, since that function will go away
602 // (move from MC* level to Target* level). Return size in bits.
603 unsigned getRegBitWidth(unsigned RCID) {
604   switch (RCID) {
605   case AMDGPU::SGPR_32RegClassID:
606   case AMDGPU::VGPR_32RegClassID:
607   case AMDGPU::VS_32RegClassID:
608   case AMDGPU::SReg_32RegClassID:
609   case AMDGPU::SReg_32_XM0RegClassID:
610     return 32;
611   case AMDGPU::SGPR_64RegClassID:
612   case AMDGPU::VS_64RegClassID:
613   case AMDGPU::SReg_64RegClassID:
614   case AMDGPU::VReg_64RegClassID:
615     return 64;
616   case AMDGPU::VReg_96RegClassID:
617     return 96;
618   case AMDGPU::SGPR_128RegClassID:
619   case AMDGPU::SReg_128RegClassID:
620   case AMDGPU::VReg_128RegClassID:
621     return 128;
622   case AMDGPU::SReg_256RegClassID:
623   case AMDGPU::VReg_256RegClassID:
624     return 256;
625   case AMDGPU::SReg_512RegClassID:
626   case AMDGPU::VReg_512RegClassID:
627     return 512;
628   default:
629     llvm_unreachable("Unexpected register class");
630   }
631 }
632 
633 unsigned getRegBitWidth(const MCRegisterClass &RC) {
634   return getRegBitWidth(RC.getID());
635 }
636 
637 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
638                            unsigned OpNo) {
639   assert(OpNo < Desc.NumOperands);
640   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
641   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
642 }
643 
644 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
645   if (Literal >= -16 && Literal <= 64)
646     return true;
647 
648   uint64_t Val = static_cast<uint64_t>(Literal);
649   return (Val == DoubleToBits(0.0)) ||
650          (Val == DoubleToBits(1.0)) ||
651          (Val == DoubleToBits(-1.0)) ||
652          (Val == DoubleToBits(0.5)) ||
653          (Val == DoubleToBits(-0.5)) ||
654          (Val == DoubleToBits(2.0)) ||
655          (Val == DoubleToBits(-2.0)) ||
656          (Val == DoubleToBits(4.0)) ||
657          (Val == DoubleToBits(-4.0)) ||
658          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
659 }
660 
661 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
662   if (Literal >= -16 && Literal <= 64)
663     return true;
664 
665   // The actual type of the operand does not seem to matter as long
666   // as the bits match one of the inline immediate values.  For example:
667   //
668   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
669   // so it is a legal inline immediate.
670   //
671   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
672   // floating-point, so it is a legal inline immediate.
673 
674   uint32_t Val = static_cast<uint32_t>(Literal);
675   return (Val == FloatToBits(0.0f)) ||
676          (Val == FloatToBits(1.0f)) ||
677          (Val == FloatToBits(-1.0f)) ||
678          (Val == FloatToBits(0.5f)) ||
679          (Val == FloatToBits(-0.5f)) ||
680          (Val == FloatToBits(2.0f)) ||
681          (Val == FloatToBits(-2.0f)) ||
682          (Val == FloatToBits(4.0f)) ||
683          (Val == FloatToBits(-4.0f)) ||
684          (Val == 0x3e22f983 && HasInv2Pi);
685 }
686 
687 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
688   if (!HasInv2Pi)
689     return false;
690 
691   if (Literal >= -16 && Literal <= 64)
692     return true;
693 
694   uint16_t Val = static_cast<uint16_t>(Literal);
695   return Val == 0x3C00 || // 1.0
696          Val == 0xBC00 || // -1.0
697          Val == 0x3800 || // 0.5
698          Val == 0xB800 || // -0.5
699          Val == 0x4000 || // 2.0
700          Val == 0xC000 || // -2.0
701          Val == 0x4400 || // 4.0
702          Val == 0xC400 || // -4.0
703          Val == 0x3118;   // 1/2pi
704 }
705 
706 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
707   assert(HasInv2Pi);
708 
709   int16_t Lo16 = static_cast<int16_t>(Literal);
710   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
711   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
712 }
713 
714 bool isUniformMMO(const MachineMemOperand *MMO) {
715   const Value *Ptr = MMO->getValue();
716   // UndefValue means this is a load of a kernel input.  These are uniform.
717   // Sometimes LDS instructions have constant pointers.
718   // If Ptr is null, then that means this mem operand contains a
719   // PseudoSourceValue like GOT.
720   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
721       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
722     return true;
723 
724   const Instruction *I = dyn_cast<Instruction>(Ptr);
725   return I && I->getMetadata("amdgpu.uniform");
726 }
727 
728 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
729   if (isSI(ST) || isCI(ST))
730     return ByteOffset >> 2;
731 
732   return ByteOffset;
733 }
734 
735 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
736   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
737   return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
738                                 isUInt<20>(EncodedOffset);
739 }
740 } // end namespace AMDGPU
741 
742 } // end namespace llvm
743 
744 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
745 const unsigned AMDGPUAS::GLOBAL_ADDRESS;
746 const unsigned AMDGPUAS::LOCAL_ADDRESS;
747 const unsigned AMDGPUAS::PARAM_D_ADDRESS;
748 const unsigned AMDGPUAS::PARAM_I_ADDRESS;
749 const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
750 const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
751 const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
752 const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
753 const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
754 const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
755 const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
756 const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
757 const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
758 const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
759 const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
760 const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
761 const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
762 const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
763 const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
764 const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
765 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
766 
767 namespace llvm {
768 namespace AMDGPU {
769 
770 AMDGPUAS getAMDGPUAS(Triple T) {
771   auto Env = T.getEnvironmentName();
772   AMDGPUAS AS;
773   if (Env == "amdgiz" || Env == "amdgizcl") {
774     AS.FLAT_ADDRESS     = 0;
775     AS.CONSTANT_ADDRESS = 4;
776     AS.PRIVATE_ADDRESS  = 5;
777     AS.REGION_ADDRESS   = 2;
778   }
779   else {
780     AS.FLAT_ADDRESS     = 4;
781     AS.CONSTANT_ADDRESS = 2;
782     AS.PRIVATE_ADDRESS  = 0;
783     AS.REGION_ADDRESS   = 5;
784    }
785   return AS;
786 }
787 
788 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
789   return getAMDGPUAS(M.getTargetTriple());
790 }
791 
792 AMDGPUAS getAMDGPUAS(const Module &M) {
793   return getAMDGPUAS(Triple(M.getTargetTriple()));
794 }
795 } // namespace AMDGPU
796 } // namespace llvm
797