1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // R600 target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "R600TargetTransformInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "R600Subtarget.h"
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "R600tti"
24 
25 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
26     : BaseT(TM, F.getParent()->getDataLayout()),
27       ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
28       TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
29 
30 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
31   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
32 }
33 
34 unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
35   return getHardwareNumberOfRegisters(Vec);
36 }
37 
38 TypeSize
39 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
40   return TypeSize::getFixed(32);
41 }
42 
43 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
44 
45 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
46   if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
47       AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
48     return 128;
49   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
50       AddrSpace == AMDGPUAS::REGION_ADDRESS)
51     return 64;
52   if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
53     return 32;
54 
55   if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
56        AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
57        (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
58         AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
59     return 128;
60   llvm_unreachable("unhandled address space");
61 }
62 
63 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
64                                              Align Alignment,
65                                              unsigned AddrSpace) const {
66   // We allow vectorization of flat stores, even though we may need to decompose
67   // them later if they may access private memory. We don't have enough context
68   // here, and legalization can handle it.
69   return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
70 }
71 
72 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
73                                               Align Alignment,
74                                               unsigned AddrSpace) const {
75   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
76 }
77 
78 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
79                                                Align Alignment,
80                                                unsigned AddrSpace) const {
81   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
82 }
83 
84 unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
85   // Disable unrolling if the loop is not vectorized.
86   // TODO: Enable this again.
87   if (VF == 1)
88     return 1;
89 
90   return 8;
91 }
92 
93 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
94                                             TTI::TargetCostKind CostKind,
95                                             const Instruction *I) {
96   if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
97     return Opcode == Instruction::PHI ? 0 : 1;
98 
99   // XXX - For some reason this isn't called for switch.
100   switch (Opcode) {
101   case Instruction::Br:
102   case Instruction::Ret:
103     return 10;
104   default:
105     return BaseT::getCFInstrCost(Opcode, CostKind, I);
106   }
107 }
108 
109 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
110                                                 unsigned Index) {
111   switch (Opcode) {
112   case Instruction::ExtractElement:
113   case Instruction::InsertElement: {
114     unsigned EltSize =
115         DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
116     if (EltSize < 32) {
117       return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
118     }
119 
120     // Extracts are just reads of a subregister, so are free. Inserts are
121     // considered free because we don't want to have any cost for scalarizing
122     // operations, and we don't have to copy into a different register class.
123 
124     // Dynamic indexing isn't free and is best avoided.
125     return Index == ~0u ? 2 : 0;
126   }
127   default:
128     return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
129   }
130 }
131 
132 void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
133                                           TTI::UnrollingPreferences &UP,
134                                           OptimizationRemarkEmitter *ORE) {
135   CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
136 }
137 
138 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
139                                         TTI::PeelingPreferences &PP) {
140   CommonTTI.getPeelingPreferences(L, SE, PP);
141 }
142