1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDGPUTargetTransformInfo.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/CodeGen/MachineMemOperand.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/IR/IntrinsicsR600.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCInstrInfo.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/SubtargetFeature.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include <algorithm>
38 #include <cassert>
39 #include <cstdint>
40 #include <cstring>
41 #include <utility>
42 
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 
45 #define GET_INSTRINFO_NAMED_OPS
46 #define GET_INSTRMAP_INFO
47 #include "AMDGPUGenInstrInfo.inc"
48 #undef GET_INSTRMAP_INFO
49 #undef GET_INSTRINFO_NAMED_OPS
50 
51 namespace {
52 
53 /// \returns Bit mask for given bit \p Shift and bit \p Width.
54 unsigned getBitMask(unsigned Shift, unsigned Width) {
55   return ((1 << Width) - 1) << Shift;
56 }
57 
58 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
59 ///
60 /// \returns Packed \p Dst.
61 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
62   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
63   Dst |= (Src << Shift) & getBitMask(Shift, Width);
64   return Dst;
65 }
66 
67 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
68 ///
69 /// \returns Unpacked bits.
70 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
71   return (Src & getBitMask(Shift, Width)) >> Shift;
72 }
73 
74 /// \returns Vmcnt bit shift (lower bits).
75 unsigned getVmcntBitShiftLo() { return 0; }
76 
77 /// \returns Vmcnt bit width (lower bits).
78 unsigned getVmcntBitWidthLo() { return 4; }
79 
80 /// \returns Expcnt bit shift.
81 unsigned getExpcntBitShift() { return 4; }
82 
83 /// \returns Expcnt bit width.
84 unsigned getExpcntBitWidth() { return 3; }
85 
86 /// \returns Lgkmcnt bit shift.
87 unsigned getLgkmcntBitShift() { return 8; }
88 
89 /// \returns Lgkmcnt bit width.
90 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
91   return (VersionMajor >= 10) ? 6 : 4;
92 }
93 
94 /// \returns Vmcnt bit shift (higher bits).
95 unsigned getVmcntBitShiftHi() { return 14; }
96 
97 /// \returns Vmcnt bit width (higher bits).
98 unsigned getVmcntBitWidthHi() { return 2; }
99 
100 } // end namespace anonymous
101 
102 namespace llvm {
103 
104 namespace AMDGPU {
105 
106 #define GET_MIMGBaseOpcodesTable_IMPL
107 #define GET_MIMGDimInfoTable_IMPL
108 #define GET_MIMGInfoTable_IMPL
109 #define GET_MIMGLZMappingTable_IMPL
110 #define GET_MIMGMIPMappingTable_IMPL
111 #include "AMDGPUGenSearchableTables.inc"
112 
113 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
114                   unsigned VDataDwords, unsigned VAddrDwords) {
115   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
116                                              VDataDwords, VAddrDwords);
117   return Info ? Info->Opcode : -1;
118 }
119 
120 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
121   const MIMGInfo *Info = getMIMGInfo(Opc);
122   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
123 }
124 
125 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
126   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
127   const MIMGInfo *NewInfo =
128       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
129                           NewChannels, OrigInfo->VAddrDwords);
130   return NewInfo ? NewInfo->Opcode : -1;
131 }
132 
133 struct MUBUFInfo {
134   uint16_t Opcode;
135   uint16_t BaseOpcode;
136   uint8_t elements;
137   bool has_vaddr;
138   bool has_srsrc;
139   bool has_soffset;
140 };
141 
142 struct MTBUFInfo {
143   uint16_t Opcode;
144   uint16_t BaseOpcode;
145   uint8_t elements;
146   bool has_vaddr;
147   bool has_srsrc;
148   bool has_soffset;
149 };
150 
151 struct SMInfo {
152   uint16_t Opcode;
153   bool IsBuffer;
154 };
155 
156 #define GET_MTBUFInfoTable_DECL
157 #define GET_MTBUFInfoTable_IMPL
158 #define GET_MUBUFInfoTable_DECL
159 #define GET_MUBUFInfoTable_IMPL
160 #define GET_SMInfoTable_DECL
161 #define GET_SMInfoTable_IMPL
162 #include "AMDGPUGenSearchableTables.inc"
163 
164 int getMTBUFBaseOpcode(unsigned Opc) {
165   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
166   return Info ? Info->BaseOpcode : -1;
167 }
168 
169 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
170   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
171   return Info ? Info->Opcode : -1;
172 }
173 
174 int getMTBUFElements(unsigned Opc) {
175   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
176   return Info ? Info->elements : 0;
177 }
178 
179 bool getMTBUFHasVAddr(unsigned Opc) {
180   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
181   return Info ? Info->has_vaddr : false;
182 }
183 
184 bool getMTBUFHasSrsrc(unsigned Opc) {
185   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
186   return Info ? Info->has_srsrc : false;
187 }
188 
189 bool getMTBUFHasSoffset(unsigned Opc) {
190   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
191   return Info ? Info->has_soffset : false;
192 }
193 
194 int getMUBUFBaseOpcode(unsigned Opc) {
195   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
196   return Info ? Info->BaseOpcode : -1;
197 }
198 
199 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
200   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
201   return Info ? Info->Opcode : -1;
202 }
203 
204 int getMUBUFElements(unsigned Opc) {
205   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
206   return Info ? Info->elements : 0;
207 }
208 
209 bool getMUBUFHasVAddr(unsigned Opc) {
210   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
211   return Info ? Info->has_vaddr : false;
212 }
213 
214 bool getMUBUFHasSrsrc(unsigned Opc) {
215   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
216   return Info ? Info->has_srsrc : false;
217 }
218 
219 bool getMUBUFHasSoffset(unsigned Opc) {
220   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
221   return Info ? Info->has_soffset : false;
222 }
223 
224 bool getSMEMIsBuffer(unsigned Opc) {
225   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
226   return Info ? Info->IsBuffer : false;
227 }
228 
229 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
230 // header files, so we need to wrap it in a function that takes unsigned
231 // instead.
232 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
233   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
234 }
235 
236 namespace IsaInfo {
237 
238 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
239   auto TargetTriple = STI->getTargetTriple();
240   auto Version = getIsaVersion(STI->getCPU());
241 
242   Stream << TargetTriple.getArchName() << '-'
243          << TargetTriple.getVendorName() << '-'
244          << TargetTriple.getOSName() << '-'
245          << TargetTriple.getEnvironmentName() << '-'
246          << "gfx"
247          << Version.Major
248          << Version.Minor
249          << Version.Stepping;
250 
251   if (hasXNACK(*STI))
252     Stream << "+xnack";
253   if (hasSRAMECC(*STI))
254     Stream << "+sram-ecc";
255 
256   Stream.flush();
257 }
258 
259 bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
260   return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
261              STI->getFeatureBits().test(FeatureCodeObjectV3);
262 }
263 
264 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
265   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
266     return 16;
267   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
268     return 32;
269 
270   return 64;
271 }
272 
273 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
274   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
275     return 32768;
276   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
277     return 65536;
278 
279   return 0;
280 }
281 
282 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
283   // "Per CU" really means "per whatever functional block the waves of a
284   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
285   // two SIMDs.
286   if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode))
287     return 2;
288   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
289   // two CUs, so a total of four SIMDs.
290   return 4;
291 }
292 
293 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
294                                unsigned FlatWorkGroupSize) {
295   assert(FlatWorkGroupSize != 0);
296   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
297     return 8;
298   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
299   if (N == 1)
300     return 40;
301   N = 40 / N;
302   return std::min(N, 16u);
303 }
304 
305 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
306   return 1;
307 }
308 
309 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
310   // FIXME: Need to take scratch memory into account.
311   if (!isGFX10(*STI))
312     return 10;
313   return 20;
314 }
315 
316 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
317                                    unsigned FlatWorkGroupSize) {
318   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
319                     getEUsPerCU(STI));
320 }
321 
322 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
323   return 1;
324 }
325 
326 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
327   // Some subtargets allow encoding 2048, but this isn't tested or supported.
328   return 1024;
329 }
330 
331 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
332                               unsigned FlatWorkGroupSize) {
333   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
334 }
335 
336 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
337   IsaVersion Version = getIsaVersion(STI->getCPU());
338   if (Version.Major >= 10)
339     return getAddressableNumSGPRs(STI);
340   if (Version.Major >= 8)
341     return 16;
342   return 8;
343 }
344 
345 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
346   return 8;
347 }
348 
349 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
350   IsaVersion Version = getIsaVersion(STI->getCPU());
351   if (Version.Major >= 8)
352     return 800;
353   return 512;
354 }
355 
356 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
357   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
358     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
359 
360   IsaVersion Version = getIsaVersion(STI->getCPU());
361   if (Version.Major >= 10)
362     return 106;
363   if (Version.Major >= 8)
364     return 102;
365   return 104;
366 }
367 
368 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
369   assert(WavesPerEU != 0);
370 
371   IsaVersion Version = getIsaVersion(STI->getCPU());
372   if (Version.Major >= 10)
373     return 0;
374 
375   if (WavesPerEU >= getMaxWavesPerEU(STI))
376     return 0;
377 
378   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
379   if (STI->getFeatureBits().test(FeatureTrapHandler))
380     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
381   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
382   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
383 }
384 
385 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
386                         bool Addressable) {
387   assert(WavesPerEU != 0);
388 
389   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
390   IsaVersion Version = getIsaVersion(STI->getCPU());
391   if (Version.Major >= 10)
392     return Addressable ? AddressableNumSGPRs : 108;
393   if (Version.Major >= 8 && !Addressable)
394     AddressableNumSGPRs = 112;
395   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
396   if (STI->getFeatureBits().test(FeatureTrapHandler))
397     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
398   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
399   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
400 }
401 
402 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
403                           bool FlatScrUsed, bool XNACKUsed) {
404   unsigned ExtraSGPRs = 0;
405   if (VCCUsed)
406     ExtraSGPRs = 2;
407 
408   IsaVersion Version = getIsaVersion(STI->getCPU());
409   if (Version.Major >= 10)
410     return ExtraSGPRs;
411 
412   if (Version.Major < 8) {
413     if (FlatScrUsed)
414       ExtraSGPRs = 4;
415   } else {
416     if (XNACKUsed)
417       ExtraSGPRs = 4;
418 
419     if (FlatScrUsed)
420       ExtraSGPRs = 6;
421   }
422 
423   return ExtraSGPRs;
424 }
425 
426 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
427                           bool FlatScrUsed) {
428   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
429                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
430 }
431 
432 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
433   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
434   // SGPRBlocks is actual number of SGPR blocks minus 1.
435   return NumSGPRs / getSGPREncodingGranule(STI) - 1;
436 }
437 
438 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
439                              Optional<bool> EnableWavefrontSize32) {
440   bool IsWave32 = EnableWavefrontSize32 ?
441       *EnableWavefrontSize32 :
442       STI->getFeatureBits().test(FeatureWavefrontSize32);
443   return IsWave32 ? 8 : 4;
444 }
445 
446 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
447                                 Optional<bool> EnableWavefrontSize32) {
448   return getVGPRAllocGranule(STI, EnableWavefrontSize32);
449 }
450 
451 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
452   if (!isGFX10(*STI))
453     return 256;
454   return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
455 }
456 
457 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
458   return 256;
459 }
460 
461 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
462   assert(WavesPerEU != 0);
463 
464   if (WavesPerEU >= getMaxWavesPerEU(STI))
465     return 0;
466   unsigned MinNumVGPRs =
467       alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
468                 getVGPRAllocGranule(STI)) + 1;
469   return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
470 }
471 
472 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
473   assert(WavesPerEU != 0);
474 
475   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
476                                    getVGPRAllocGranule(STI));
477   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
478   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
479 }
480 
481 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
482                           Optional<bool> EnableWavefrontSize32) {
483   NumVGPRs = alignTo(std::max(1u, NumVGPRs),
484                      getVGPREncodingGranule(STI, EnableWavefrontSize32));
485   // VGPRBlocks is actual number of VGPR blocks minus 1.
486   return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
487 }
488 
489 } // end namespace IsaInfo
490 
491 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
492                                const MCSubtargetInfo *STI) {
493   IsaVersion Version = getIsaVersion(STI->getCPU());
494 
495   memset(&Header, 0, sizeof(Header));
496 
497   Header.amd_kernel_code_version_major = 1;
498   Header.amd_kernel_code_version_minor = 2;
499   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
500   Header.amd_machine_version_major = Version.Major;
501   Header.amd_machine_version_minor = Version.Minor;
502   Header.amd_machine_version_stepping = Version.Stepping;
503   Header.kernel_code_entry_byte_offset = sizeof(Header);
504   Header.wavefront_size = 6;
505 
506   // If the code object does not support indirect functions, then the value must
507   // be 0xffffffff.
508   Header.call_convention = -1;
509 
510   // These alignment values are specified in powers of two, so alignment =
511   // 2^n.  The minimum alignment is 2^4 = 16.
512   Header.kernarg_segment_alignment = 4;
513   Header.group_segment_alignment = 4;
514   Header.private_segment_alignment = 4;
515 
516   if (Version.Major >= 10) {
517     if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
518       Header.wavefront_size = 5;
519       Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
520     }
521     Header.compute_pgm_resource_registers |=
522       S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
523       S_00B848_MEM_ORDERED(1);
524   }
525 }
526 
527 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
528     const MCSubtargetInfo *STI) {
529   IsaVersion Version = getIsaVersion(STI->getCPU());
530 
531   amdhsa::kernel_descriptor_t KD;
532   memset(&KD, 0, sizeof(KD));
533 
534   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
535                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
536                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
537   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
538                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
539   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
540                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
541   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
542                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
543   if (Version.Major >= 10) {
544     AMDHSA_BITS_SET(KD.kernel_code_properties,
545                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
546                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
547     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
548                     amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
549                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
550     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
551                     amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
552   }
553   return KD;
554 }
555 
556 bool isGroupSegment(const GlobalValue *GV) {
557   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
558 }
559 
560 bool isGlobalSegment(const GlobalValue *GV) {
561   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
562 }
563 
564 bool isReadOnlySegment(const GlobalValue *GV) {
565   unsigned AS = GV->getAddressSpace();
566   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
567          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
568 }
569 
570 bool shouldEmitConstantsToTextSection(const Triple &TT) {
571   return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
572 }
573 
574 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
575   Attribute A = F.getFnAttribute(Name);
576   int Result = Default;
577 
578   if (A.isStringAttribute()) {
579     StringRef Str = A.getValueAsString();
580     if (Str.getAsInteger(0, Result)) {
581       LLVMContext &Ctx = F.getContext();
582       Ctx.emitError("can't parse integer attribute " + Name);
583     }
584   }
585 
586   return Result;
587 }
588 
589 std::pair<int, int> getIntegerPairAttribute(const Function &F,
590                                             StringRef Name,
591                                             std::pair<int, int> Default,
592                                             bool OnlyFirstRequired) {
593   Attribute A = F.getFnAttribute(Name);
594   if (!A.isStringAttribute())
595     return Default;
596 
597   LLVMContext &Ctx = F.getContext();
598   std::pair<int, int> Ints = Default;
599   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
600   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
601     Ctx.emitError("can't parse first integer attribute " + Name);
602     return Default;
603   }
604   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
605     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
606       Ctx.emitError("can't parse second integer attribute " + Name);
607       return Default;
608     }
609   }
610 
611   return Ints;
612 }
613 
614 unsigned getVmcntBitMask(const IsaVersion &Version) {
615   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
616   if (Version.Major < 9)
617     return VmcntLo;
618 
619   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
620   return VmcntLo | VmcntHi;
621 }
622 
623 unsigned getExpcntBitMask(const IsaVersion &Version) {
624   return (1 << getExpcntBitWidth()) - 1;
625 }
626 
627 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
628   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
629 }
630 
631 unsigned getWaitcntBitMask(const IsaVersion &Version) {
632   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
633   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
634   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
635                                 getLgkmcntBitWidth(Version.Major));
636   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
637   if (Version.Major < 9)
638     return Waitcnt;
639 
640   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
641   return Waitcnt | VmcntHi;
642 }
643 
644 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
645   unsigned VmcntLo =
646       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
647   if (Version.Major < 9)
648     return VmcntLo;
649 
650   unsigned VmcntHi =
651       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
652   VmcntHi <<= getVmcntBitWidthLo();
653   return VmcntLo | VmcntHi;
654 }
655 
656 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
657   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
658 }
659 
660 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
661   return unpackBits(Waitcnt, getLgkmcntBitShift(),
662                     getLgkmcntBitWidth(Version.Major));
663 }
664 
665 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
666                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
667   Vmcnt = decodeVmcnt(Version, Waitcnt);
668   Expcnt = decodeExpcnt(Version, Waitcnt);
669   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
670 }
671 
672 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
673   Waitcnt Decoded;
674   Decoded.VmCnt = decodeVmcnt(Version, Encoded);
675   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
676   Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
677   return Decoded;
678 }
679 
680 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
681                      unsigned Vmcnt) {
682   Waitcnt =
683       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
684   if (Version.Major < 9)
685     return Waitcnt;
686 
687   Vmcnt >>= getVmcntBitWidthLo();
688   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
689 }
690 
691 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
692                       unsigned Expcnt) {
693   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
694 }
695 
696 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
697                        unsigned Lgkmcnt) {
698   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
699                                     getLgkmcntBitWidth(Version.Major));
700 }
701 
702 unsigned encodeWaitcnt(const IsaVersion &Version,
703                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
704   unsigned Waitcnt = getWaitcntBitMask(Version);
705   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
706   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
707   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
708   return Waitcnt;
709 }
710 
711 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
712   return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
713 }
714 
715 //===----------------------------------------------------------------------===//
716 // hwreg
717 //===----------------------------------------------------------------------===//
718 
719 namespace Hwreg {
720 
721 int64_t getHwregId(const StringRef Name) {
722   for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
723     if (IdSymbolic[Id] && Name == IdSymbolic[Id])
724       return Id;
725   }
726   return ID_UNKNOWN_;
727 }
728 
729 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
730   if (isSI(STI) || isCI(STI) || isVI(STI))
731     return ID_SYMBOLIC_FIRST_GFX9_;
732   else if (isGFX9(STI))
733     return ID_SYMBOLIC_FIRST_GFX10_;
734   else
735     return ID_SYMBOLIC_LAST_;
736 }
737 
738 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
739   return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
740          IdSymbolic[Id];
741 }
742 
743 bool isValidHwreg(int64_t Id) {
744   return 0 <= Id && isUInt<ID_WIDTH_>(Id);
745 }
746 
747 bool isValidHwregOffset(int64_t Offset) {
748   return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
749 }
750 
751 bool isValidHwregWidth(int64_t Width) {
752   return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
753 }
754 
755 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
756   return (Id << ID_SHIFT_) |
757          (Offset << OFFSET_SHIFT_) |
758          ((Width - 1) << WIDTH_M1_SHIFT_);
759 }
760 
761 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
762   return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
763 }
764 
765 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
766   Id = (Val & ID_MASK_) >> ID_SHIFT_;
767   Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
768   Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
769 }
770 
771 } // namespace Hwreg
772 
773 //===----------------------------------------------------------------------===//
774 // SendMsg
775 //===----------------------------------------------------------------------===//
776 
777 namespace SendMsg {
778 
779 int64_t getMsgId(const StringRef Name) {
780   for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
781     if (IdSymbolic[i] && Name == IdSymbolic[i])
782       return i;
783   }
784   return ID_UNKNOWN_;
785 }
786 
787 static bool isValidMsgId(int64_t MsgId) {
788   return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
789 }
790 
791 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
792   if (Strict) {
793     if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
794       return isGFX9(STI) || isGFX10(STI);
795     else
796       return isValidMsgId(MsgId);
797   } else {
798     return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
799   }
800 }
801 
802 StringRef getMsgName(int64_t MsgId) {
803   return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
804 }
805 
806 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
807   const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
808   const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
809   const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
810   for (int i = F; i < L; ++i) {
811     if (Name == S[i]) {
812       return i;
813     }
814   }
815   return OP_UNKNOWN_;
816 }
817 
818 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
819 
820   if (!Strict)
821     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
822 
823   switch(MsgId)
824   {
825   case ID_GS:
826     return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
827   case ID_GS_DONE:
828     return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
829   case ID_SYSMSG:
830     return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
831   default:
832     return OpId == OP_NONE_;
833   }
834 }
835 
836 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
837   assert(msgRequiresOp(MsgId));
838   return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
839 }
840 
841 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
842 
843   if (!Strict)
844     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
845 
846   switch(MsgId)
847   {
848   case ID_GS:
849     return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
850   case ID_GS_DONE:
851     return (OpId == OP_GS_NOP)?
852            (StreamId == STREAM_ID_NONE_) :
853            (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
854   default:
855     return StreamId == STREAM_ID_NONE_;
856   }
857 }
858 
859 bool msgRequiresOp(int64_t MsgId) {
860   return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
861 }
862 
863 bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
864   return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
865 }
866 
867 void decodeMsg(unsigned Val,
868                uint16_t &MsgId,
869                uint16_t &OpId,
870                uint16_t &StreamId) {
871   MsgId = Val & ID_MASK_;
872   OpId = (Val & OP_MASK_) >> OP_SHIFT_;
873   StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
874 }
875 
876 uint64_t encodeMsg(uint64_t MsgId,
877                    uint64_t OpId,
878                    uint64_t StreamId) {
879   return (MsgId << ID_SHIFT_) |
880          (OpId << OP_SHIFT_) |
881          (StreamId << STREAM_ID_SHIFT_);
882 }
883 
884 } // namespace SendMsg
885 
886 //===----------------------------------------------------------------------===//
887 //
888 //===----------------------------------------------------------------------===//
889 
890 unsigned getInitialPSInputAddr(const Function &F) {
891   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
892 }
893 
894 bool isShader(CallingConv::ID cc) {
895   switch(cc) {
896     case CallingConv::AMDGPU_VS:
897     case CallingConv::AMDGPU_LS:
898     case CallingConv::AMDGPU_HS:
899     case CallingConv::AMDGPU_ES:
900     case CallingConv::AMDGPU_GS:
901     case CallingConv::AMDGPU_PS:
902     case CallingConv::AMDGPU_CS:
903       return true;
904     default:
905       return false;
906   }
907 }
908 
909 bool isCompute(CallingConv::ID cc) {
910   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
911 }
912 
913 bool isEntryFunctionCC(CallingConv::ID CC) {
914   switch (CC) {
915   case CallingConv::AMDGPU_KERNEL:
916   case CallingConv::SPIR_KERNEL:
917   case CallingConv::AMDGPU_VS:
918   case CallingConv::AMDGPU_GS:
919   case CallingConv::AMDGPU_PS:
920   case CallingConv::AMDGPU_CS:
921   case CallingConv::AMDGPU_ES:
922   case CallingConv::AMDGPU_HS:
923   case CallingConv::AMDGPU_LS:
924     return true;
925   default:
926     return false;
927   }
928 }
929 
930 bool hasXNACK(const MCSubtargetInfo &STI) {
931   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
932 }
933 
934 bool hasSRAMECC(const MCSubtargetInfo &STI) {
935   return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
936 }
937 
938 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
939   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
940 }
941 
942 bool hasGFX10A16(const MCSubtargetInfo &STI) {
943   return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
944 }
945 
946 bool hasPackedD16(const MCSubtargetInfo &STI) {
947   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
948 }
949 
950 bool isSI(const MCSubtargetInfo &STI) {
951   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
952 }
953 
954 bool isCI(const MCSubtargetInfo &STI) {
955   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
956 }
957 
958 bool isVI(const MCSubtargetInfo &STI) {
959   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
960 }
961 
962 bool isGFX9(const MCSubtargetInfo &STI) {
963   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
964 }
965 
966 bool isGFX10(const MCSubtargetInfo &STI) {
967   return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
968 }
969 
970 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
971   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
972 }
973 
974 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
975   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
976   const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
977   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
978     Reg == AMDGPU::SCC;
979 }
980 
981 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
982   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
983     if (*R == Reg1) return true;
984   }
985   return false;
986 }
987 
988 #define MAP_REG2REG \
989   using namespace AMDGPU; \
990   switch(Reg) { \
991   default: return Reg; \
992   CASE_CI_VI(FLAT_SCR) \
993   CASE_CI_VI(FLAT_SCR_LO) \
994   CASE_CI_VI(FLAT_SCR_HI) \
995   CASE_VI_GFX9_GFX10(TTMP0) \
996   CASE_VI_GFX9_GFX10(TTMP1) \
997   CASE_VI_GFX9_GFX10(TTMP2) \
998   CASE_VI_GFX9_GFX10(TTMP3) \
999   CASE_VI_GFX9_GFX10(TTMP4) \
1000   CASE_VI_GFX9_GFX10(TTMP5) \
1001   CASE_VI_GFX9_GFX10(TTMP6) \
1002   CASE_VI_GFX9_GFX10(TTMP7) \
1003   CASE_VI_GFX9_GFX10(TTMP8) \
1004   CASE_VI_GFX9_GFX10(TTMP9) \
1005   CASE_VI_GFX9_GFX10(TTMP10) \
1006   CASE_VI_GFX9_GFX10(TTMP11) \
1007   CASE_VI_GFX9_GFX10(TTMP12) \
1008   CASE_VI_GFX9_GFX10(TTMP13) \
1009   CASE_VI_GFX9_GFX10(TTMP14) \
1010   CASE_VI_GFX9_GFX10(TTMP15) \
1011   CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
1012   CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
1013   CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
1014   CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
1015   CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
1016   CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
1017   CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
1018   CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
1019   CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
1020   CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
1021   CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
1022   CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
1023   CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1024   CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1025   CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1026   CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1027   }
1028 
1029 #define CASE_CI_VI(node) \
1030   assert(!isSI(STI)); \
1031   case node: return isCI(STI) ? node##_ci : node##_vi;
1032 
1033 #define CASE_VI_GFX9_GFX10(node) \
1034   case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
1035 
1036 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1037   if (STI.getTargetTriple().getArch() == Triple::r600)
1038     return Reg;
1039   MAP_REG2REG
1040 }
1041 
1042 #undef CASE_CI_VI
1043 #undef CASE_VI_GFX9_GFX10
1044 
1045 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
1046 #define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
1047 
1048 unsigned mc2PseudoReg(unsigned Reg) {
1049   MAP_REG2REG
1050 }
1051 
1052 #undef CASE_CI_VI
1053 #undef CASE_VI_GFX9_GFX10
1054 #undef MAP_REG2REG
1055 
1056 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1057   assert(OpNo < Desc.NumOperands);
1058   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1059   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1060          OpType <= AMDGPU::OPERAND_SRC_LAST;
1061 }
1062 
1063 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1064   assert(OpNo < Desc.NumOperands);
1065   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1066   switch (OpType) {
1067   case AMDGPU::OPERAND_REG_IMM_FP32:
1068   case AMDGPU::OPERAND_REG_IMM_FP64:
1069   case AMDGPU::OPERAND_REG_IMM_FP16:
1070   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1071   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1072   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1073   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1074   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1075   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1076   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1077   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1078   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1079   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1080   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1081     return true;
1082   default:
1083     return false;
1084   }
1085 }
1086 
1087 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1088   assert(OpNo < Desc.NumOperands);
1089   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1090   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1091          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1092 }
1093 
1094 // Avoid using MCRegisterClass::getSize, since that function will go away
1095 // (move from MC* level to Target* level). Return size in bits.
1096 unsigned getRegBitWidth(unsigned RCID) {
1097   switch (RCID) {
1098   case AMDGPU::VGPR_LO16RegClassID:
1099   case AMDGPU::VGPR_HI16RegClassID:
1100   case AMDGPU::SGPR_LO16RegClassID:
1101   case AMDGPU::AGPR_LO16RegClassID:
1102     return 16;
1103   case AMDGPU::SGPR_32RegClassID:
1104   case AMDGPU::VGPR_32RegClassID:
1105   case AMDGPU::VRegOrLds_32RegClassID:
1106   case AMDGPU::AGPR_32RegClassID:
1107   case AMDGPU::VS_32RegClassID:
1108   case AMDGPU::AV_32RegClassID:
1109   case AMDGPU::SReg_32RegClassID:
1110   case AMDGPU::SReg_32_XM0RegClassID:
1111   case AMDGPU::SRegOrLds_32RegClassID:
1112     return 32;
1113   case AMDGPU::SGPR_64RegClassID:
1114   case AMDGPU::VS_64RegClassID:
1115   case AMDGPU::AV_64RegClassID:
1116   case AMDGPU::SReg_64RegClassID:
1117   case AMDGPU::VReg_64RegClassID:
1118   case AMDGPU::AReg_64RegClassID:
1119   case AMDGPU::SReg_64_XEXECRegClassID:
1120     return 64;
1121   case AMDGPU::SGPR_96RegClassID:
1122   case AMDGPU::SReg_96RegClassID:
1123   case AMDGPU::VReg_96RegClassID:
1124   case AMDGPU::AReg_96RegClassID:
1125     return 96;
1126   case AMDGPU::SGPR_128RegClassID:
1127   case AMDGPU::SReg_128RegClassID:
1128   case AMDGPU::VReg_128RegClassID:
1129   case AMDGPU::AReg_128RegClassID:
1130     return 128;
1131   case AMDGPU::SGPR_160RegClassID:
1132   case AMDGPU::SReg_160RegClassID:
1133   case AMDGPU::VReg_160RegClassID:
1134   case AMDGPU::AReg_160RegClassID:
1135     return 160;
1136   case AMDGPU::SGPR_192RegClassID:
1137   case AMDGPU::SReg_192RegClassID:
1138   case AMDGPU::VReg_192RegClassID:
1139   case AMDGPU::AReg_192RegClassID:
1140     return 192;
1141   case AMDGPU::SGPR_256RegClassID:
1142   case AMDGPU::SReg_256RegClassID:
1143   case AMDGPU::VReg_256RegClassID:
1144   case AMDGPU::AReg_256RegClassID:
1145     return 256;
1146   case AMDGPU::SGPR_512RegClassID:
1147   case AMDGPU::SReg_512RegClassID:
1148   case AMDGPU::VReg_512RegClassID:
1149   case AMDGPU::AReg_512RegClassID:
1150     return 512;
1151   case AMDGPU::SGPR_1024RegClassID:
1152   case AMDGPU::SReg_1024RegClassID:
1153   case AMDGPU::VReg_1024RegClassID:
1154   case AMDGPU::AReg_1024RegClassID:
1155     return 1024;
1156   default:
1157     llvm_unreachable("Unexpected register class");
1158   }
1159 }
1160 
1161 unsigned getRegBitWidth(const MCRegisterClass &RC) {
1162   return getRegBitWidth(RC.getID());
1163 }
1164 
1165 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1166                            unsigned OpNo) {
1167   assert(OpNo < Desc.NumOperands);
1168   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1169   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1170 }
1171 
1172 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1173   if (Literal >= -16 && Literal <= 64)
1174     return true;
1175 
1176   uint64_t Val = static_cast<uint64_t>(Literal);
1177   return (Val == DoubleToBits(0.0)) ||
1178          (Val == DoubleToBits(1.0)) ||
1179          (Val == DoubleToBits(-1.0)) ||
1180          (Val == DoubleToBits(0.5)) ||
1181          (Val == DoubleToBits(-0.5)) ||
1182          (Val == DoubleToBits(2.0)) ||
1183          (Val == DoubleToBits(-2.0)) ||
1184          (Val == DoubleToBits(4.0)) ||
1185          (Val == DoubleToBits(-4.0)) ||
1186          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1187 }
1188 
1189 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1190   if (Literal >= -16 && Literal <= 64)
1191     return true;
1192 
1193   // The actual type of the operand does not seem to matter as long
1194   // as the bits match one of the inline immediate values.  For example:
1195   //
1196   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1197   // so it is a legal inline immediate.
1198   //
1199   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1200   // floating-point, so it is a legal inline immediate.
1201 
1202   uint32_t Val = static_cast<uint32_t>(Literal);
1203   return (Val == FloatToBits(0.0f)) ||
1204          (Val == FloatToBits(1.0f)) ||
1205          (Val == FloatToBits(-1.0f)) ||
1206          (Val == FloatToBits(0.5f)) ||
1207          (Val == FloatToBits(-0.5f)) ||
1208          (Val == FloatToBits(2.0f)) ||
1209          (Val == FloatToBits(-2.0f)) ||
1210          (Val == FloatToBits(4.0f)) ||
1211          (Val == FloatToBits(-4.0f)) ||
1212          (Val == 0x3e22f983 && HasInv2Pi);
1213 }
1214 
1215 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1216   if (!HasInv2Pi)
1217     return false;
1218 
1219   if (Literal >= -16 && Literal <= 64)
1220     return true;
1221 
1222   uint16_t Val = static_cast<uint16_t>(Literal);
1223   return Val == 0x3C00 || // 1.0
1224          Val == 0xBC00 || // -1.0
1225          Val == 0x3800 || // 0.5
1226          Val == 0xB800 || // -0.5
1227          Val == 0x4000 || // 2.0
1228          Val == 0xC000 || // -2.0
1229          Val == 0x4400 || // 4.0
1230          Val == 0xC400 || // -4.0
1231          Val == 0x3118;   // 1/2pi
1232 }
1233 
1234 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1235   assert(HasInv2Pi);
1236 
1237   if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1238     int16_t Trunc = static_cast<int16_t>(Literal);
1239     return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1240   }
1241   if (!(Literal & 0xffff))
1242     return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1243 
1244   int16_t Lo16 = static_cast<int16_t>(Literal);
1245   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1246   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1247 }
1248 
1249 bool isArgPassedInSGPR(const Argument *A) {
1250   const Function *F = A->getParent();
1251 
1252   // Arguments to compute shaders are never a source of divergence.
1253   CallingConv::ID CC = F->getCallingConv();
1254   switch (CC) {
1255   case CallingConv::AMDGPU_KERNEL:
1256   case CallingConv::SPIR_KERNEL:
1257     return true;
1258   case CallingConv::AMDGPU_VS:
1259   case CallingConv::AMDGPU_LS:
1260   case CallingConv::AMDGPU_HS:
1261   case CallingConv::AMDGPU_ES:
1262   case CallingConv::AMDGPU_GS:
1263   case CallingConv::AMDGPU_PS:
1264   case CallingConv::AMDGPU_CS:
1265     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1266     // Everything else is in VGPRs.
1267     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1268            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1269   default:
1270     // TODO: Should calls support inreg for SGPR inputs?
1271     return false;
1272   }
1273 }
1274 
1275 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1276   return isGCN3Encoding(ST) || isGFX10(ST);
1277 }
1278 
1279 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
1280   return isGFX9(ST) || isGFX10(ST);
1281 }
1282 
1283 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1284                                       int64_t EncodedOffset) {
1285   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
1286                                : isUInt<8>(EncodedOffset);
1287 }
1288 
1289 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1290                                     int64_t EncodedOffset,
1291                                     bool IsBuffer) {
1292   return !IsBuffer &&
1293          hasSMRDSignedImmOffset(ST) &&
1294          isInt<21>(EncodedOffset);
1295 }
1296 
1297 static bool isDwordAligned(uint64_t ByteOffset) {
1298   return (ByteOffset & 3) == 0;
1299 }
1300 
1301 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
1302                                 uint64_t ByteOffset) {
1303   if (hasSMEMByteOffset(ST))
1304     return ByteOffset;
1305 
1306   assert(isDwordAligned(ByteOffset));
1307   return ByteOffset >> 2;
1308 }
1309 
1310 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1311                                        int64_t ByteOffset, bool IsBuffer) {
1312   // The signed version is always a byte offset.
1313   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
1314     assert(hasSMEMByteOffset(ST));
1315     return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
1316   }
1317 
1318   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
1319     return None;
1320 
1321   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1322   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
1323              ? Optional<int64_t>(EncodedOffset)
1324              : None;
1325 }
1326 
1327 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1328                                                 int64_t ByteOffset) {
1329   if (!isCI(ST) || !isDwordAligned(ByteOffset))
1330     return None;
1331 
1332   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1333   return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
1334 }
1335 
1336 // Given Imm, split it into the values to put into the SOffset and ImmOffset
1337 // fields in an MUBUF instruction. Return false if it is not possible (due to a
1338 // hardware bug needing a workaround).
1339 //
1340 // The required alignment ensures that individual address components remain
1341 // aligned if they are aligned to begin with. It also ensures that additional
1342 // offsets within the given alignment can be added to the resulting ImmOffset.
1343 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1344                       const GCNSubtarget *Subtarget, uint32_t Align) {
1345   const uint32_t MaxImm = alignDown(4095, Align);
1346   uint32_t Overflow = 0;
1347 
1348   if (Imm > MaxImm) {
1349     if (Imm <= MaxImm + 64) {
1350       // Use an SOffset inline constant for 4..64
1351       Overflow = Imm - MaxImm;
1352       Imm = MaxImm;
1353     } else {
1354       // Try to keep the same value in SOffset for adjacent loads, so that
1355       // the corresponding register contents can be re-used.
1356       //
1357       // Load values with all low-bits (except for alignment bits) set into
1358       // SOffset, so that a larger range of values can be covered using
1359       // s_movk_i32.
1360       //
1361       // Atomic operations fail to work correctly when individual address
1362       // components are unaligned, even if their sum is aligned.
1363       uint32_t High = (Imm + Align) & ~4095;
1364       uint32_t Low = (Imm + Align) & 4095;
1365       Imm = Low;
1366       Overflow = High - Align;
1367     }
1368   }
1369 
1370   // There is a hardware bug in SI and CI which prevents address clamping in
1371   // MUBUF instructions from working correctly with SOffsets. The immediate
1372   // offset is unaffected.
1373   if (Overflow > 0 &&
1374       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1375     return false;
1376 
1377   ImmOffset = Imm;
1378   SOffset = Overflow;
1379   return true;
1380 }
1381 
1382 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
1383   *this = getDefaultForCallingConv(F.getCallingConv());
1384 
1385   StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1386   if (!IEEEAttr.empty())
1387     IEEE = IEEEAttr == "true";
1388 
1389   StringRef DX10ClampAttr
1390     = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1391   if (!DX10ClampAttr.empty())
1392     DX10Clamp = DX10ClampAttr == "true";
1393 
1394   StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
1395   if (!DenormF32Attr.empty()) {
1396     DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
1397     FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1398     FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1399   }
1400 
1401   StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
1402   if (!DenormAttr.empty()) {
1403     DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
1404 
1405     if (DenormF32Attr.empty()) {
1406       FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1407       FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1408     }
1409 
1410     FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1411     FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1412   }
1413 }
1414 
1415 namespace {
1416 
1417 struct SourceOfDivergence {
1418   unsigned Intr;
1419 };
1420 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1421 
1422 #define GET_SourcesOfDivergence_IMPL
1423 #define GET_Gfx9BufferFormat_IMPL
1424 #define GET_Gfx10PlusBufferFormat_IMPL
1425 #include "AMDGPUGenSearchableTables.inc"
1426 
1427 } // end anonymous namespace
1428 
1429 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1430   return lookupSourceOfDivergence(IntrID);
1431 }
1432 
1433 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
1434                                                   uint8_t NumComponents,
1435                                                   uint8_t NumFormat,
1436                                                   const MCSubtargetInfo &STI) {
1437   return isGFX10(STI)
1438              ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
1439                                             NumFormat)
1440              : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
1441 }
1442 
1443 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
1444                                                   const MCSubtargetInfo &STI) {
1445   return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
1446                       : getGfx9BufferFormatInfo(Format);
1447 }
1448 
1449 } // namespace AMDGPU
1450 } // namespace llvm
1451