1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #define GET_INSTRMAP_INFO
27 #include "AMDGPUGenInstrInfo.inc"
28 
29 // Pin the vtable to this file.
30 void AMDGPUInstrInfo::anchor() {}
31 
32 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
33   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
34     ST(ST),
35     AMDGPUASI(ST.getAMDGPUAS()) {}
36 
37 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
38 // the first 16 loads will be interleaved with the stores, and the next 16 will
39 // be clustered as expected. It should really split into 2 16 store batches.
40 //
41 // Loads are clustered until this returns false, rather than trying to schedule
42 // groups of stores. This also means we have to deal with saying different
43 // address space loads should be clustered, and ones which might cause bank
44 // conflicts.
45 //
46 // This might be deprecated so it might not be worth that much effort to fix.
47 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
48                                               int64_t Offset0, int64_t Offset1,
49                                               unsigned NumLoads) const {
50   assert(Offset1 > Offset0 &&
51          "Second offset should be larger than first offset!");
52   // If we have less than 16 loads in a row, and the offsets are within 64
53   // bytes, then schedule together.
54 
55   // A cacheline is 64 bytes (for global memory).
56   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
57 }
58 
59 static AMDGPU::Channels indexToChannel(unsigned Channel) {
60   switch (Channel) {
61   case 1:
62     return AMDGPU::Channels_1;
63   case 2:
64     return AMDGPU::Channels_2;
65   case 3:
66     return AMDGPU::Channels_3;
67   case 4:
68     return AMDGPU::Channels_4;
69   default:
70     llvm_unreachable("invalid MIMG channel");
71   }
72 }
73 
74 // FIXME: Need to handle d16 images correctly.
75 static unsigned rcToChannels(unsigned RCID) {
76   switch (RCID) {
77   case AMDGPU::VGPR_32RegClassID:
78     return 1;
79   case AMDGPU::VReg_64RegClassID:
80     return 2;
81   case AMDGPU::VReg_96RegClassID:
82     return 3;
83   case AMDGPU::VReg_128RegClassID:
84     return 4;
85   default:
86     llvm_unreachable("invalid MIMG register class");
87   }
88 }
89 
90 int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc,
91                                      unsigned NewChannels) const {
92   AMDGPU::Channels Channel = indexToChannel(NewChannels);
93   unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass);
94   if (NewChannels == OrigChannels)
95     return Opc;
96 
97   switch (OrigChannels) {
98   case 1:
99     return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
100   case 2:
101     return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
102   case 3:
103     return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
104   case 4:
105     return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
106   default:
107     llvm_unreachable("invalid MIMG channel");
108   }
109 }
110 
111 
112 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
113 enum SIEncodingFamily {
114   SI = 0,
115   VI = 1,
116   SDWA = 2,
117   SDWA9 = 3,
118   GFX9 = 4
119 };
120 
121 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
122 // header files, so we need to wrap it in a function that takes unsigned
123 // instead.
124 namespace llvm {
125 namespace AMDGPU {
126 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
127   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
128 }
129 }
130 }
131 
132 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
133   switch (ST.getGeneration()) {
134   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
135   case AMDGPUSubtarget::SEA_ISLANDS:
136     return SIEncodingFamily::SI;
137   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
138   case AMDGPUSubtarget::GFX9:
139     return SIEncodingFamily::VI;
140 
141   // FIXME: This should never be called for r600 GPUs.
142   case AMDGPUSubtarget::R600:
143   case AMDGPUSubtarget::R700:
144   case AMDGPUSubtarget::EVERGREEN:
145   case AMDGPUSubtarget::NORTHERN_ISLANDS:
146     return SIEncodingFamily::SI;
147   }
148 
149   llvm_unreachable("Unknown subtarget generation!");
150 }
151 
152 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
153   SIEncodingFamily Gen = subtargetEncodingFamily(ST);
154 
155   if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
156     ST.getGeneration() >= AMDGPUSubtarget::GFX9)
157     Gen = SIEncodingFamily::GFX9;
158 
159   if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
160     Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
161                                                       : SIEncodingFamily::SDWA;
162 
163   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
164 
165   // -1 means that Opcode is already a native instruction.
166   if (MCOp == -1)
167     return Opcode;
168 
169   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
170   // no encoding in the given subtarget generation.
171   if (MCOp == (uint16_t)-1)
172     return -1;
173 
174   return MCOp;
175 }
176