1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
11 #include "AMDGPUTargetTransformInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/CodeGen/MachineMemOperand.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/SubtargetFeature.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
41 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53   return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61   Dst |= (Src << Shift) & getBitMask(Shift, Width);
62   return Dst;
63 }
64 
65 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69   return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 static cl::opt<bool> EnablePackedInlinableLiterals(
101     "enable-packed-inlinable-literals",
102     cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
103     cl::init(false));
104 
105 namespace AMDGPU {
106 
107 LLVM_READNONE
108 static inline Channels indexToChannel(unsigned Channel) {
109   switch (Channel) {
110   case 1:
111     return AMDGPU::Channels_1;
112   case 2:
113     return AMDGPU::Channels_2;
114   case 3:
115     return AMDGPU::Channels_3;
116   case 4:
117     return AMDGPU::Channels_4;
118   default:
119     llvm_unreachable("invalid MIMG channel");
120   }
121 }
122 
123 
124 // FIXME: Need to handle d16 images correctly.
125 static unsigned rcToChannels(unsigned RCID) {
126   switch (RCID) {
127   case AMDGPU::VGPR_32RegClassID:
128     return 1;
129   case AMDGPU::VReg_64RegClassID:
130     return 2;
131   case AMDGPU::VReg_96RegClassID:
132     return 3;
133   case AMDGPU::VReg_128RegClassID:
134     return 4;
135   default:
136     llvm_unreachable("invalid MIMG register class");
137   }
138 }
139 
140 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
141   AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
142   unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
143   if (NewChannels == OrigChannels)
144     return Opc;
145 
146   switch (OrigChannels) {
147   case 1:
148     return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
149   case 2:
150     return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
151   case 3:
152     return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
153   case 4:
154     return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
155   default:
156     llvm_unreachable("invalid MIMG channel");
157   }
158 }
159 
160 int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
161   assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1);
162   assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4);
163 
164   unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
165   assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4);
166 
167   if (NewChannels == OrigChannels) return Opc;
168 
169   if (OrigChannels <= 2 && NewChannels <= 2) {
170     // This is an ordinary atomic (not an atomic_cmpswap)
171     return (OrigChannels == 1)?
172       AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc);
173   } else if (OrigChannels >= 2 && NewChannels >= 2) {
174     // This is an atomic_cmpswap
175     return (OrigChannels == 2)?
176       AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc);
177   } else { // invalid OrigChannels/NewChannels value
178     return -1;
179   }
180 }
181 
182 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
183 // header files, so we need to wrap it in a function that takes unsigned
184 // instead.
185 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
186   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
187 }
188 
189 namespace IsaInfo {
190 
191 IsaVersion getIsaVersion(const FeatureBitset &Features) {
192   // GCN GFX6 (Southern Islands (SI)).
193   if (Features.test(FeatureISAVersion6_0_0))
194     return {6, 0, 0};
195   if (Features.test(FeatureISAVersion6_0_1))
196     return {6, 0, 1};
197 
198   // GCN GFX7 (Sea Islands (CI)).
199   if (Features.test(FeatureISAVersion7_0_0))
200     return {7, 0, 0};
201   if (Features.test(FeatureISAVersion7_0_1))
202     return {7, 0, 1};
203   if (Features.test(FeatureISAVersion7_0_2))
204     return {7, 0, 2};
205   if (Features.test(FeatureISAVersion7_0_3))
206     return {7, 0, 3};
207   if (Features.test(FeatureISAVersion7_0_4))
208     return {7, 0, 4};
209   if (Features.test(FeatureSeaIslands))
210     return {7, 0, 0};
211 
212   // GCN GFX8 (Volcanic Islands (VI)).
213   if (Features.test(FeatureISAVersion8_0_1))
214     return {8, 0, 1};
215   if (Features.test(FeatureISAVersion8_0_2))
216     return {8, 0, 2};
217   if (Features.test(FeatureISAVersion8_0_3))
218     return {8, 0, 3};
219   if (Features.test(FeatureISAVersion8_1_0))
220     return {8, 1, 0};
221   if (Features.test(FeatureVolcanicIslands))
222     return {8, 0, 0};
223 
224   // GCN GFX9.
225   if (Features.test(FeatureISAVersion9_0_0))
226     return {9, 0, 0};
227   if (Features.test(FeatureISAVersion9_0_2))
228     return {9, 0, 2};
229   if (Features.test(FeatureGFX9))
230     return {9, 0, 0};
231 
232   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
233     return {0, 0, 0};
234   return {7, 0, 0};
235 }
236 
237 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
238   auto TargetTriple = STI->getTargetTriple();
239   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
240 
241   Stream << TargetTriple.getArchName() << '-'
242          << TargetTriple.getVendorName() << '-'
243          << TargetTriple.getOSName() << '-'
244          << TargetTriple.getEnvironmentName() << '-'
245          << "gfx"
246          << ISAVersion.Major
247          << ISAVersion.Minor
248          << ISAVersion.Stepping;
249   Stream.flush();
250 }
251 
252 bool hasCodeObjectV3(const FeatureBitset &Features) {
253   return Features.test(FeatureCodeObjectV3);
254 }
255 
256 unsigned getWavefrontSize(const FeatureBitset &Features) {
257   if (Features.test(FeatureWavefrontSize16))
258     return 16;
259   if (Features.test(FeatureWavefrontSize32))
260     return 32;
261 
262   return 64;
263 }
264 
265 unsigned getLocalMemorySize(const FeatureBitset &Features) {
266   if (Features.test(FeatureLocalMemorySize32768))
267     return 32768;
268   if (Features.test(FeatureLocalMemorySize65536))
269     return 65536;
270 
271   return 0;
272 }
273 
274 unsigned getEUsPerCU(const FeatureBitset &Features) {
275   return 4;
276 }
277 
278 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
279                                unsigned FlatWorkGroupSize) {
280   if (!Features.test(FeatureGCN))
281     return 8;
282   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
283   if (N == 1)
284     return 40;
285   N = 40 / N;
286   return std::min(N, 16u);
287 }
288 
289 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
290   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
291 }
292 
293 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
294                           unsigned FlatWorkGroupSize) {
295   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
296 }
297 
298 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
299   return 1;
300 }
301 
302 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
303   if (!Features.test(FeatureGCN))
304     return 8;
305   // FIXME: Need to take scratch memory into account.
306   return 10;
307 }
308 
309 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
310                           unsigned FlatWorkGroupSize) {
311   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
312                  getEUsPerCU(Features)) / getEUsPerCU(Features);
313 }
314 
315 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
316   return 1;
317 }
318 
319 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
320   return 2048;
321 }
322 
323 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
324                               unsigned FlatWorkGroupSize) {
325   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
326                  getWavefrontSize(Features);
327 }
328 
329 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
330   IsaVersion Version = getIsaVersion(Features);
331   if (Version.Major >= 8)
332     return 16;
333   return 8;
334 }
335 
336 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
337   return 8;
338 }
339 
340 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
341   IsaVersion Version = getIsaVersion(Features);
342   if (Version.Major >= 8)
343     return 800;
344   return 512;
345 }
346 
347 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
348   if (Features.test(FeatureSGPRInitBug))
349     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
350 
351   IsaVersion Version = getIsaVersion(Features);
352   if (Version.Major >= 8)
353     return 102;
354   return 104;
355 }
356 
357 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
358   assert(WavesPerEU != 0);
359 
360   if (WavesPerEU >= getMaxWavesPerEU(Features))
361     return 0;
362   unsigned MinNumSGPRs =
363       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
364                 getSGPRAllocGranule(Features)) + 1;
365   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
366 }
367 
368 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
369                         bool Addressable) {
370   assert(WavesPerEU != 0);
371 
372   IsaVersion Version = getIsaVersion(Features);
373   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
374                                    getSGPRAllocGranule(Features));
375   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
376   if (Version.Major >= 8 && !Addressable)
377     AddressableNumSGPRs = 112;
378   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
379 }
380 
381 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
382   return 4;
383 }
384 
385 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
386   return getVGPRAllocGranule(Features);
387 }
388 
389 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
390   return 256;
391 }
392 
393 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
394   return getTotalNumVGPRs(Features);
395 }
396 
397 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
398   assert(WavesPerEU != 0);
399 
400   if (WavesPerEU >= getMaxWavesPerEU(Features))
401     return 0;
402   unsigned MinNumVGPRs =
403       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
404                 getVGPRAllocGranule(Features)) + 1;
405   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
406 }
407 
408 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
409   assert(WavesPerEU != 0);
410 
411   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
412                                    getVGPRAllocGranule(Features));
413   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
414   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
415 }
416 
417 } // end namespace IsaInfo
418 
419 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
420                                const FeatureBitset &Features) {
421   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
422 
423   memset(&Header, 0, sizeof(Header));
424 
425   Header.amd_kernel_code_version_major = 1;
426   Header.amd_kernel_code_version_minor = 2;
427   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
428   Header.amd_machine_version_major = ISA.Major;
429   Header.amd_machine_version_minor = ISA.Minor;
430   Header.amd_machine_version_stepping = ISA.Stepping;
431   Header.kernel_code_entry_byte_offset = sizeof(Header);
432   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
433   Header.wavefront_size = 6;
434 
435   // If the code object does not support indirect functions, then the value must
436   // be 0xffffffff.
437   Header.call_convention = -1;
438 
439   // These alignment values are specified in powers of two, so alignment =
440   // 2^n.  The minimum alignment is 2^4 = 16.
441   Header.kernarg_segment_alignment = 4;
442   Header.group_segment_alignment = 4;
443   Header.private_segment_alignment = 4;
444 }
445 
446 bool isGroupSegment(const GlobalValue *GV) {
447   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
448 }
449 
450 bool isGlobalSegment(const GlobalValue *GV) {
451   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
452 }
453 
454 bool isReadOnlySegment(const GlobalValue *GV) {
455   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
456          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
457 }
458 
459 bool shouldEmitConstantsToTextSection(const Triple &TT) {
460   return TT.getOS() != Triple::AMDHSA;
461 }
462 
463 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
464   Attribute A = F.getFnAttribute(Name);
465   int Result = Default;
466 
467   if (A.isStringAttribute()) {
468     StringRef Str = A.getValueAsString();
469     if (Str.getAsInteger(0, Result)) {
470       LLVMContext &Ctx = F.getContext();
471       Ctx.emitError("can't parse integer attribute " + Name);
472     }
473   }
474 
475   return Result;
476 }
477 
478 std::pair<int, int> getIntegerPairAttribute(const Function &F,
479                                             StringRef Name,
480                                             std::pair<int, int> Default,
481                                             bool OnlyFirstRequired) {
482   Attribute A = F.getFnAttribute(Name);
483   if (!A.isStringAttribute())
484     return Default;
485 
486   LLVMContext &Ctx = F.getContext();
487   std::pair<int, int> Ints = Default;
488   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
489   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
490     Ctx.emitError("can't parse first integer attribute " + Name);
491     return Default;
492   }
493   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
494     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
495       Ctx.emitError("can't parse second integer attribute " + Name);
496       return Default;
497     }
498   }
499 
500   return Ints;
501 }
502 
503 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
504   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
505   if (Version.Major < 9)
506     return VmcntLo;
507 
508   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
509   return VmcntLo | VmcntHi;
510 }
511 
512 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
513   return (1 << getExpcntBitWidth()) - 1;
514 }
515 
516 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
517   return (1 << getLgkmcntBitWidth()) - 1;
518 }
519 
520 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
521   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
522   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
523   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
524   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
525   if (Version.Major < 9)
526     return Waitcnt;
527 
528   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
529   return Waitcnt | VmcntHi;
530 }
531 
532 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
533   unsigned VmcntLo =
534       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
535   if (Version.Major < 9)
536     return VmcntLo;
537 
538   unsigned VmcntHi =
539       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
540   VmcntHi <<= getVmcntBitWidthLo();
541   return VmcntLo | VmcntHi;
542 }
543 
544 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
545   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
546 }
547 
548 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
549   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
550 }
551 
552 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
553                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
554   Vmcnt = decodeVmcnt(Version, Waitcnt);
555   Expcnt = decodeExpcnt(Version, Waitcnt);
556   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
557 }
558 
559 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
560                      unsigned Vmcnt) {
561   Waitcnt =
562       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
563   if (Version.Major < 9)
564     return Waitcnt;
565 
566   Vmcnt >>= getVmcntBitWidthLo();
567   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
568 }
569 
570 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
571                       unsigned Expcnt) {
572   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
573 }
574 
575 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
576                        unsigned Lgkmcnt) {
577   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
578 }
579 
580 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
581                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
582   unsigned Waitcnt = getWaitcntBitMask(Version);
583   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
584   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
585   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
586   return Waitcnt;
587 }
588 
589 unsigned getInitialPSInputAddr(const Function &F) {
590   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
591 }
592 
593 bool isShader(CallingConv::ID cc) {
594   switch(cc) {
595     case CallingConv::AMDGPU_VS:
596     case CallingConv::AMDGPU_LS:
597     case CallingConv::AMDGPU_HS:
598     case CallingConv::AMDGPU_ES:
599     case CallingConv::AMDGPU_GS:
600     case CallingConv::AMDGPU_PS:
601     case CallingConv::AMDGPU_CS:
602       return true;
603     default:
604       return false;
605   }
606 }
607 
608 bool isCompute(CallingConv::ID cc) {
609   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
610 }
611 
612 bool isEntryFunctionCC(CallingConv::ID CC) {
613   switch (CC) {
614   case CallingConv::AMDGPU_KERNEL:
615   case CallingConv::SPIR_KERNEL:
616   case CallingConv::AMDGPU_VS:
617   case CallingConv::AMDGPU_GS:
618   case CallingConv::AMDGPU_PS:
619   case CallingConv::AMDGPU_CS:
620   case CallingConv::AMDGPU_ES:
621   case CallingConv::AMDGPU_HS:
622   case CallingConv::AMDGPU_LS:
623     return true;
624   default:
625     return false;
626   }
627 }
628 
629 bool hasXNACK(const MCSubtargetInfo &STI) {
630   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
631 }
632 
633 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
634   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
635 }
636 
637 bool hasPackedD16(const MCSubtargetInfo &STI) {
638   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
639 }
640 
641 bool isSI(const MCSubtargetInfo &STI) {
642   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
643 }
644 
645 bool isCI(const MCSubtargetInfo &STI) {
646   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
647 }
648 
649 bool isVI(const MCSubtargetInfo &STI) {
650   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
651 }
652 
653 bool isGFX9(const MCSubtargetInfo &STI) {
654   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
655 }
656 
657 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
658   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
659 }
660 
661 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
662   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
663   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
664   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
665     Reg == AMDGPU::SCC;
666 }
667 
668 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
669   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
670     if (*R == Reg1) return true;
671   }
672   return false;
673 }
674 
675 #define MAP_REG2REG \
676   using namespace AMDGPU; \
677   switch(Reg) { \
678   default: return Reg; \
679   CASE_CI_VI(FLAT_SCR) \
680   CASE_CI_VI(FLAT_SCR_LO) \
681   CASE_CI_VI(FLAT_SCR_HI) \
682   CASE_VI_GFX9(TTMP0) \
683   CASE_VI_GFX9(TTMP1) \
684   CASE_VI_GFX9(TTMP2) \
685   CASE_VI_GFX9(TTMP3) \
686   CASE_VI_GFX9(TTMP4) \
687   CASE_VI_GFX9(TTMP5) \
688   CASE_VI_GFX9(TTMP6) \
689   CASE_VI_GFX9(TTMP7) \
690   CASE_VI_GFX9(TTMP8) \
691   CASE_VI_GFX9(TTMP9) \
692   CASE_VI_GFX9(TTMP10) \
693   CASE_VI_GFX9(TTMP11) \
694   CASE_VI_GFX9(TTMP12) \
695   CASE_VI_GFX9(TTMP13) \
696   CASE_VI_GFX9(TTMP14) \
697   CASE_VI_GFX9(TTMP15) \
698   CASE_VI_GFX9(TTMP0_TTMP1) \
699   CASE_VI_GFX9(TTMP2_TTMP3) \
700   CASE_VI_GFX9(TTMP4_TTMP5) \
701   CASE_VI_GFX9(TTMP6_TTMP7) \
702   CASE_VI_GFX9(TTMP8_TTMP9) \
703   CASE_VI_GFX9(TTMP10_TTMP11) \
704   CASE_VI_GFX9(TTMP12_TTMP13) \
705   CASE_VI_GFX9(TTMP14_TTMP15) \
706   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
707   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
708   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
709   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
710   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
711   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
712   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
713   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
714   }
715 
716 #define CASE_CI_VI(node) \
717   assert(!isSI(STI)); \
718   case node: return isCI(STI) ? node##_ci : node##_vi;
719 
720 #define CASE_VI_GFX9(node) \
721   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
722 
723 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
724   MAP_REG2REG
725 }
726 
727 #undef CASE_CI_VI
728 #undef CASE_VI_GFX9
729 
730 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
731 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
732 
733 unsigned mc2PseudoReg(unsigned Reg) {
734   MAP_REG2REG
735 }
736 
737 #undef CASE_CI_VI
738 #undef CASE_VI_GFX9
739 #undef MAP_REG2REG
740 
741 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
742   assert(OpNo < Desc.NumOperands);
743   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
744   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
745          OpType <= AMDGPU::OPERAND_SRC_LAST;
746 }
747 
748 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
749   assert(OpNo < Desc.NumOperands);
750   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
751   switch (OpType) {
752   case AMDGPU::OPERAND_REG_IMM_FP32:
753   case AMDGPU::OPERAND_REG_IMM_FP64:
754   case AMDGPU::OPERAND_REG_IMM_FP16:
755   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
756   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
757   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
758   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
759     return true;
760   default:
761     return false;
762   }
763 }
764 
765 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
766   assert(OpNo < Desc.NumOperands);
767   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
768   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
769          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
770 }
771 
772 // Avoid using MCRegisterClass::getSize, since that function will go away
773 // (move from MC* level to Target* level). Return size in bits.
774 unsigned getRegBitWidth(unsigned RCID) {
775   switch (RCID) {
776   case AMDGPU::SGPR_32RegClassID:
777   case AMDGPU::VGPR_32RegClassID:
778   case AMDGPU::VS_32RegClassID:
779   case AMDGPU::SReg_32RegClassID:
780   case AMDGPU::SReg_32_XM0RegClassID:
781     return 32;
782   case AMDGPU::SGPR_64RegClassID:
783   case AMDGPU::VS_64RegClassID:
784   case AMDGPU::SReg_64RegClassID:
785   case AMDGPU::VReg_64RegClassID:
786     return 64;
787   case AMDGPU::VReg_96RegClassID:
788     return 96;
789   case AMDGPU::SGPR_128RegClassID:
790   case AMDGPU::SReg_128RegClassID:
791   case AMDGPU::VReg_128RegClassID:
792     return 128;
793   case AMDGPU::SReg_256RegClassID:
794   case AMDGPU::VReg_256RegClassID:
795     return 256;
796   case AMDGPU::SReg_512RegClassID:
797   case AMDGPU::VReg_512RegClassID:
798     return 512;
799   default:
800     llvm_unreachable("Unexpected register class");
801   }
802 }
803 
804 unsigned getRegBitWidth(const MCRegisterClass &RC) {
805   return getRegBitWidth(RC.getID());
806 }
807 
808 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
809                            unsigned OpNo) {
810   assert(OpNo < Desc.NumOperands);
811   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
812   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
813 }
814 
815 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
816   if (Literal >= -16 && Literal <= 64)
817     return true;
818 
819   uint64_t Val = static_cast<uint64_t>(Literal);
820   return (Val == DoubleToBits(0.0)) ||
821          (Val == DoubleToBits(1.0)) ||
822          (Val == DoubleToBits(-1.0)) ||
823          (Val == DoubleToBits(0.5)) ||
824          (Val == DoubleToBits(-0.5)) ||
825          (Val == DoubleToBits(2.0)) ||
826          (Val == DoubleToBits(-2.0)) ||
827          (Val == DoubleToBits(4.0)) ||
828          (Val == DoubleToBits(-4.0)) ||
829          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
830 }
831 
832 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
833   if (Literal >= -16 && Literal <= 64)
834     return true;
835 
836   // The actual type of the operand does not seem to matter as long
837   // as the bits match one of the inline immediate values.  For example:
838   //
839   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
840   // so it is a legal inline immediate.
841   //
842   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
843   // floating-point, so it is a legal inline immediate.
844 
845   uint32_t Val = static_cast<uint32_t>(Literal);
846   return (Val == FloatToBits(0.0f)) ||
847          (Val == FloatToBits(1.0f)) ||
848          (Val == FloatToBits(-1.0f)) ||
849          (Val == FloatToBits(0.5f)) ||
850          (Val == FloatToBits(-0.5f)) ||
851          (Val == FloatToBits(2.0f)) ||
852          (Val == FloatToBits(-2.0f)) ||
853          (Val == FloatToBits(4.0f)) ||
854          (Val == FloatToBits(-4.0f)) ||
855          (Val == 0x3e22f983 && HasInv2Pi);
856 }
857 
858 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
859   if (!HasInv2Pi)
860     return false;
861 
862   if (Literal >= -16 && Literal <= 64)
863     return true;
864 
865   uint16_t Val = static_cast<uint16_t>(Literal);
866   return Val == 0x3C00 || // 1.0
867          Val == 0xBC00 || // -1.0
868          Val == 0x3800 || // 0.5
869          Val == 0xB800 || // -0.5
870          Val == 0x4000 || // 2.0
871          Val == 0xC000 || // -2.0
872          Val == 0x4400 || // 4.0
873          Val == 0xC400 || // -4.0
874          Val == 0x3118;   // 1/2pi
875 }
876 
877 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
878   assert(HasInv2Pi);
879 
880   if (!EnablePackedInlinableLiterals)
881     return false;
882 
883   int16_t Lo16 = static_cast<int16_t>(Literal);
884   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
885   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
886 }
887 
888 bool isArgPassedInSGPR(const Argument *A) {
889   const Function *F = A->getParent();
890 
891   // Arguments to compute shaders are never a source of divergence.
892   CallingConv::ID CC = F->getCallingConv();
893   switch (CC) {
894   case CallingConv::AMDGPU_KERNEL:
895   case CallingConv::SPIR_KERNEL:
896     return true;
897   case CallingConv::AMDGPU_VS:
898   case CallingConv::AMDGPU_LS:
899   case CallingConv::AMDGPU_HS:
900   case CallingConv::AMDGPU_ES:
901   case CallingConv::AMDGPU_GS:
902   case CallingConv::AMDGPU_PS:
903   case CallingConv::AMDGPU_CS:
904     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
905     // Everything else is in VGPRs.
906     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
907            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
908   default:
909     // TODO: Should calls support inreg for SGPR inputs?
910     return false;
911   }
912 }
913 
914 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
915   if (isGCN3Encoding(ST))
916     return ByteOffset;
917   return ByteOffset >> 2;
918 }
919 
920 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
921   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
922   return isGCN3Encoding(ST) ?
923     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
924 }
925 
926 } // end namespace AMDGPU
927 
928 } // end namespace llvm
929 
930 namespace llvm {
931 namespace AMDGPU {
932 
933 AMDGPUAS getAMDGPUAS(Triple T) {
934   AMDGPUAS AS;
935   AS.FLAT_ADDRESS = 0;
936   AS.PRIVATE_ADDRESS = 5;
937   AS.REGION_ADDRESS = 2;
938   return AS;
939 }
940 
941 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
942   return getAMDGPUAS(M.getTargetTriple());
943 }
944 
945 AMDGPUAS getAMDGPUAS(const Module &M) {
946   return getAMDGPUAS(Triple(M.getTargetTriple()));
947 }
948 
949 namespace {
950 
951 struct SourceOfDivergence {
952   unsigned Intr;
953 };
954 const SourceOfDivergence *lookupSourceOfDivergenceByIntr(unsigned Intr);
955 
956 #define GET_SOURCEOFDIVERGENCE_IMPL
957 #include "AMDGPUGenSearchableTables.inc"
958 
959 } // end anonymous namespace
960 
961 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
962   return lookupSourceOfDivergenceByIntr(IntrID);
963 }
964 } // namespace AMDGPU
965 } // namespace llvm
966