1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implementation of the TargetInstrInfo class that is common to all 12 /// AMD GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 23 using namespace llvm; 24 25 #define GET_INSTRINFO_CTOR_DTOR 26 #define GET_INSTRMAP_INFO 27 #include "AMDGPUGenInstrInfo.inc" 28 29 // Pin the vtable to this file. 30 void AMDGPUInstrInfo::anchor() {} 31 32 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) 33 : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), 34 ST(ST), 35 AMDGPUASI(ST.getAMDGPUAS()) {} 36 37 // FIXME: This behaves strangely. If, for example, you have 32 load + stores, 38 // the first 16 loads will be interleaved with the stores, and the next 16 will 39 // be clustered as expected. It should really split into 2 16 store batches. 40 // 41 // Loads are clustered until this returns false, rather than trying to schedule 42 // groups of stores. This also means we have to deal with saying different 43 // address space loads should be clustered, and ones which might cause bank 44 // conflicts. 45 // 46 // This might be deprecated so it might not be worth that much effort to fix. 47 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, 48 int64_t Offset0, int64_t Offset1, 49 unsigned NumLoads) const { 50 assert(Offset1 > Offset0 && 51 "Second offset should be larger than first offset!"); 52 // If we have less than 16 loads in a row, and the offsets are within 64 53 // bytes, then schedule together. 54 55 // A cacheline is 64 bytes (for global memory). 56 return (NumLoads <= 16 && (Offset1 - Offset0) < 64); 57 } 58 59 static AMDGPU::Channels indexToChannel(unsigned Channel) { 60 switch (Channel) { 61 case 1: 62 return AMDGPU::Channels_1; 63 case 2: 64 return AMDGPU::Channels_2; 65 case 3: 66 return AMDGPU::Channels_3; 67 case 4: 68 return AMDGPU::Channels_4; 69 default: 70 llvm_unreachable("invalid MIMG channel"); 71 } 72 } 73 74 // FIXME: Need to handle d16 images correctly. 75 static unsigned rcToChannels(unsigned RCID) { 76 switch (RCID) { 77 case AMDGPU::VGPR_32RegClassID: 78 return 1; 79 case AMDGPU::VReg_64RegClassID: 80 return 2; 81 case AMDGPU::VReg_96RegClassID: 82 return 3; 83 case AMDGPU::VReg_128RegClassID: 84 return 4; 85 default: 86 llvm_unreachable("invalid MIMG register class"); 87 } 88 } 89 90 int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc, 91 unsigned NewChannels) const { 92 AMDGPU::Channels Channel = indexToChannel(NewChannels); 93 unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass); 94 if (NewChannels == OrigChannels) 95 return Opc; 96 97 switch (OrigChannels) { 98 case 1: 99 return AMDGPU::getMaskedMIMGOp1(Opc, Channel); 100 case 2: 101 return AMDGPU::getMaskedMIMGOp2(Opc, Channel); 102 case 3: 103 return AMDGPU::getMaskedMIMGOp3(Opc, Channel); 104 case 4: 105 return AMDGPU::getMaskedMIMGOp4(Opc, Channel); 106 default: 107 llvm_unreachable("invalid MIMG channel"); 108 } 109 } 110 111 112 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td 113 enum SIEncodingFamily { 114 SI = 0, 115 VI = 1, 116 SDWA = 2, 117 SDWA9 = 3, 118 GFX9 = 4 119 }; 120 121 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 122 // header files, so we need to wrap it in a function that takes unsigned 123 // instead. 124 namespace llvm { 125 namespace AMDGPU { 126 static int getMCOpcode(uint16_t Opcode, unsigned Gen) { 127 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 128 } 129 } 130 } 131 132 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { 133 switch (ST.getGeneration()) { 134 case AMDGPUSubtarget::SOUTHERN_ISLANDS: 135 case AMDGPUSubtarget::SEA_ISLANDS: 136 return SIEncodingFamily::SI; 137 case AMDGPUSubtarget::VOLCANIC_ISLANDS: 138 case AMDGPUSubtarget::GFX9: 139 return SIEncodingFamily::VI; 140 141 // FIXME: This should never be called for r600 GPUs. 142 case AMDGPUSubtarget::R600: 143 case AMDGPUSubtarget::R700: 144 case AMDGPUSubtarget::EVERGREEN: 145 case AMDGPUSubtarget::NORTHERN_ISLANDS: 146 return SIEncodingFamily::SI; 147 } 148 149 llvm_unreachable("Unknown subtarget generation!"); 150 } 151 152 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { 153 SIEncodingFamily Gen = subtargetEncodingFamily(ST); 154 155 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && 156 ST.getGeneration() >= AMDGPUSubtarget::GFX9) 157 Gen = SIEncodingFamily::GFX9; 158 159 if (get(Opcode).TSFlags & SIInstrFlags::SDWA) 160 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 161 : SIEncodingFamily::SDWA; 162 163 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); 164 165 // -1 means that Opcode is already a native instruction. 166 if (MCOp == -1) 167 return Opcode; 168 169 // (uint16_t)-1 means that Opcode is a pseudo instruction that has 170 // no encoding in the given subtarget generation. 171 if (MCOp == (uint16_t)-1) 172 return -1; 173 174 return MCOp; 175 } 176