1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #define GET_INSTRINFO_NAMED_OPS
27 #define GET_INSTRMAP_INFO
28 #include "AMDGPUGenInstrInfo.inc"
29 
30 // Pin the vtable to this file.
31 void AMDGPUInstrInfo::anchor() {}
32 
33 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
34   : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
35 
36 bool AMDGPUInstrInfo::enableClusterLoads() const {
37   return true;
38 }
39 
40 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
41 // the first 16 loads will be interleaved with the stores, and the next 16 will
42 // be clustered as expected. It should really split into 2 16 store batches.
43 //
44 // Loads are clustered until this returns false, rather than trying to schedule
45 // groups of stores. This also means we have to deal with saying different
46 // address space loads should be clustered, and ones which might cause bank
47 // conflicts.
48 //
49 // This might be deprecated so it might not be worth that much effort to fix.
50 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
51                                               int64_t Offset0, int64_t Offset1,
52                                               unsigned NumLoads) const {
53   assert(Offset1 > Offset0 &&
54          "Second offset should be larger than first offset!");
55   // If we have less than 16 loads in a row, and the offsets are within 64
56   // bytes, then schedule together.
57 
58   // A cacheline is 64 bytes (for global memory).
59   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
60 }
61 
62 int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
63   const MachineRegisterInfo &MRI = MF.getRegInfo();
64   const MachineFrameInfo *MFI = MF.getFrameInfo();
65   int Offset = -1;
66 
67   if (MFI->getNumObjects() == 0) {
68     return -1;
69   }
70 
71   if (MRI.livein_empty()) {
72     return 0;
73   }
74 
75   const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
76   for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
77                                             LE = MRI.livein_end();
78                                             LI != LE; ++LI) {
79     unsigned Reg = LI->first;
80     if (TargetRegisterInfo::isVirtualRegister(Reg) ||
81         !IndirectRC->contains(Reg))
82       continue;
83 
84     unsigned RegIndex;
85     unsigned RegEnd;
86     for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
87                                                           ++RegIndex) {
88       if (IndirectRC->getRegister(RegIndex) == Reg)
89         break;
90     }
91     Offset = std::max(Offset, (int)RegIndex);
92   }
93 
94   return Offset + 1;
95 }
96 
97 int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
98   int Offset = 0;
99   const MachineFrameInfo *MFI = MF.getFrameInfo();
100 
101   // Variable sized objects are not supported
102   if (MFI->hasVarSizedObjects()) {
103     return -1;
104   }
105 
106   if (MFI->getNumObjects() == 0) {
107     return -1;
108   }
109 
110   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
111   const AMDGPUFrameLowering *TFL = ST.getFrameLowering();
112 
113   unsigned IgnoredFrameReg;
114   Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
115 
116   return getIndirectIndexBegin(MF) + Offset;
117 }
118 
119 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
120   switch (Channels) {
121   default: return Opcode;
122   case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
123   case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
124   case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
125   }
126 }
127 
128 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
129 enum SIEncodingFamily {
130   SI = 0,
131   VI = 1
132 };
133 
134 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
135 // header files, so we need to wrap it in a function that takes unsigned
136 // instead.
137 namespace llvm {
138 namespace AMDGPU {
139 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
140   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
141 }
142 }
143 }
144 
145 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
146   switch (ST.getGeneration()) {
147   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
148   case AMDGPUSubtarget::SEA_ISLANDS:
149     return SIEncodingFamily::SI;
150   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
151     return SIEncodingFamily::VI;
152 
153   // FIXME: This should never be called for r600 GPUs.
154   case AMDGPUSubtarget::R600:
155   case AMDGPUSubtarget::R700:
156   case AMDGPUSubtarget::EVERGREEN:
157   case AMDGPUSubtarget::NORTHERN_ISLANDS:
158     return SIEncodingFamily::SI;
159   }
160 }
161 
162 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
163   int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
164 
165   // -1 means that Opcode is already a native instruction.
166   if (MCOp == -1)
167     return Opcode;
168 
169   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
170   // no encoding in the given subtarget generation.
171   if (MCOp == (uint16_t)-1)
172     return -1;
173 
174   return MCOp;
175 }
176