1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // This file implements a TargetTransformInfo analysis pass specific to the 11 // R600 target machine. It uses the target's detailed information to provide 12 // more precise answers to certain TTI queries, while letting the target 13 // independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "R600TargetTransformInfo.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "R600Subtarget.h" 20 21 using namespace llvm; 22 23 #define DEBUG_TYPE "R600tti" 24 25 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 26 : BaseT(TM, F.getParent()->getDataLayout()), 27 ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))), 28 TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} 29 30 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { 31 return 4 * 128; // XXX - 4 channels. Should these count as vector instead? 32 } 33 34 unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { 35 return getHardwareNumberOfRegisters(Vec); 36 } 37 38 TypeSize 39 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 40 return TypeSize::getFixed(32); 41 } 42 43 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } 44 45 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { 46 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || 47 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) 48 return 128; 49 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || 50 AddrSpace == AMDGPUAS::REGION_ADDRESS) 51 return 64; 52 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) 53 return 32; 54 55 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || 56 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || 57 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && 58 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) 59 return 128; 60 llvm_unreachable("unhandled address space"); 61 } 62 63 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, 64 Align Alignment, 65 unsigned AddrSpace) const { 66 // We allow vectorization of flat stores, even though we may need to decompose 67 // them later if they may access private memory. We don't have enough context 68 // here, and legalization can handle it. 69 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); 70 } 71 72 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 73 Align Alignment, 74 unsigned AddrSpace) const { 75 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 76 } 77 78 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 79 Align Alignment, 80 unsigned AddrSpace) const { 81 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 82 } 83 84 unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { 85 // Disable unrolling if the loop is not vectorized. 86 // TODO: Enable this again. 87 if (VF == 1) 88 return 1; 89 90 return 8; 91 } 92 93 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, 94 TTI::TargetCostKind CostKind, 95 const Instruction *I) { 96 if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) 97 return Opcode == Instruction::PHI ? 0 : 1; 98 99 // XXX - For some reason this isn't called for switch. 100 switch (Opcode) { 101 case Instruction::Br: 102 case Instruction::Ret: 103 return 10; 104 default: 105 return BaseT::getCFInstrCost(Opcode, CostKind, I); 106 } 107 } 108 109 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, 110 unsigned Index) { 111 switch (Opcode) { 112 case Instruction::ExtractElement: 113 case Instruction::InsertElement: { 114 unsigned EltSize = 115 DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); 116 if (EltSize < 32) { 117 return BaseT::getVectorInstrCost(Opcode, ValTy, Index); 118 } 119 120 // Extracts are just reads of a subregister, so are free. Inserts are 121 // considered free because we don't want to have any cost for scalarizing 122 // operations, and we don't have to copy into a different register class. 123 124 // Dynamic indexing isn't free and is best avoided. 125 return Index == ~0u ? 2 : 0; 126 } 127 default: 128 return BaseT::getVectorInstrCost(Opcode, ValTy, Index); 129 } 130 } 131 132 void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 133 TTI::UnrollingPreferences &UP, 134 OptimizationRemarkEmitter *ORE) { 135 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); 136 } 137 138 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 139 TTI::PeelingPreferences &PP) { 140 CommonTTI.getPeelingPreferences(L, SE, PP); 141 } 142