1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implementation of the TargetInstrInfo class that is common to all 12 /// AMD GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 23 using namespace llvm; 24 25 #define GET_INSTRINFO_CTOR_DTOR 26 #define GET_INSTRINFO_NAMED_OPS 27 #define GET_INSTRMAP_INFO 28 #include "AMDGPUGenInstrInfo.inc" 29 30 // Pin the vtable to this file. 31 void AMDGPUInstrInfo::anchor() {} 32 33 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st) 34 : AMDGPUGenInstrInfo(-1, -1), ST(st) {} 35 36 const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { 37 return RI; 38 } 39 40 bool AMDGPUInstrInfo::enableClusterLoads() const { 41 return true; 42 } 43 44 // FIXME: This behaves strangely. If, for example, you have 32 load + stores, 45 // the first 16 loads will be interleaved with the stores, and the next 16 will 46 // be clustered as expected. It should really split into 2 16 store batches. 47 // 48 // Loads are clustered until this returns false, rather than trying to schedule 49 // groups of stores. This also means we have to deal with saying different 50 // address space loads should be clustered, and ones which might cause bank 51 // conflicts. 52 // 53 // This might be deprecated so it might not be worth that much effort to fix. 54 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, 55 int64_t Offset0, int64_t Offset1, 56 unsigned NumLoads) const { 57 assert(Offset1 > Offset0 && 58 "Second offset should be larger than first offset!"); 59 // If we have less than 16 loads in a row, and the offsets are within 64 60 // bytes, then schedule together. 61 62 // A cacheline is 64 bytes (for global memory). 63 return (NumLoads <= 16 && (Offset1 - Offset0) < 64); 64 } 65 66 int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { 67 const MachineRegisterInfo &MRI = MF.getRegInfo(); 68 const MachineFrameInfo *MFI = MF.getFrameInfo(); 69 int Offset = -1; 70 71 if (MFI->getNumObjects() == 0) { 72 return -1; 73 } 74 75 if (MRI.livein_empty()) { 76 return 0; 77 } 78 79 const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); 80 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), 81 LE = MRI.livein_end(); 82 LI != LE; ++LI) { 83 unsigned Reg = LI->first; 84 if (TargetRegisterInfo::isVirtualRegister(Reg) || 85 !IndirectRC->contains(Reg)) 86 continue; 87 88 unsigned RegIndex; 89 unsigned RegEnd; 90 for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; 91 ++RegIndex) { 92 if (IndirectRC->getRegister(RegIndex) == Reg) 93 break; 94 } 95 Offset = std::max(Offset, (int)RegIndex); 96 } 97 98 return Offset + 1; 99 } 100 101 int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { 102 int Offset = 0; 103 const MachineFrameInfo *MFI = MF.getFrameInfo(); 104 105 // Variable sized objects are not supported 106 assert(!MFI->hasVarSizedObjects()); 107 108 if (MFI->getNumObjects() == 0) { 109 return -1; 110 } 111 112 unsigned IgnoredFrameReg; 113 Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference( 114 MF, -1, IgnoredFrameReg); 115 116 return getIndirectIndexBegin(MF) + Offset; 117 } 118 119 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { 120 switch (Channels) { 121 default: return Opcode; 122 case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1); 123 case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2); 124 case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); 125 } 126 } 127 128 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 129 // header files, so we need to wrap it in a function that takes unsigned 130 // instead. 131 namespace llvm { 132 namespace AMDGPU { 133 static int getMCOpcode(uint16_t Opcode, unsigned Gen) { 134 return getMCOpcodeGen(Opcode, (enum Subtarget)Gen); 135 } 136 } 137 } 138 139 // This must be kept in sync with the SISubtarget class in SIInstrInfo.td 140 enum SISubtarget { 141 SI = 0, 142 VI = 1 143 }; 144 145 static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) { 146 switch (Gen) { 147 default: 148 return SI; 149 case AMDGPUSubtarget::VOLCANIC_ISLANDS: 150 return VI; 151 } 152 } 153 154 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { 155 int MCOp = AMDGPU::getMCOpcode( 156 Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration())); 157 158 // -1 means that Opcode is already a native instruction. 159 if (MCOp == -1) 160 return Opcode; 161 162 // (uint16_t)-1 means that Opcode is a pseudo instruction that has 163 // no encoding in the given subtarget generation. 164 if (MCOp == (uint16_t)-1) 165 return -1; 166 167 return MCOp; 168 } 169