148958d02SDaniil Fukalov //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
248958d02SDaniil Fukalov //
348958d02SDaniil Fukalov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
448958d02SDaniil Fukalov // See https://llvm.org/LICENSE.txt for license information.
548958d02SDaniil Fukalov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
648958d02SDaniil Fukalov //
748958d02SDaniil Fukalov //===----------------------------------------------------------------------===//
848958d02SDaniil Fukalov //
948958d02SDaniil Fukalov // \file
1048958d02SDaniil Fukalov // This file implements a TargetTransformInfo analysis pass specific to the
1148958d02SDaniil Fukalov // R600 target machine. It uses the target's detailed information to provide
1248958d02SDaniil Fukalov // more precise answers to certain TTI queries, while letting the target
1348958d02SDaniil Fukalov // independent and default TTI implementations handle the rest.
1448958d02SDaniil Fukalov //
1548958d02SDaniil Fukalov //===----------------------------------------------------------------------===//
1648958d02SDaniil Fukalov 
1748958d02SDaniil Fukalov #include "R600TargetTransformInfo.h"
18*47d6274dSDaniil Fukalov #include "AMDGPU.h"
1948958d02SDaniil Fukalov #include "AMDGPUTargetMachine.h"
2048958d02SDaniil Fukalov #include "R600Subtarget.h"
2148958d02SDaniil Fukalov 
2248958d02SDaniil Fukalov using namespace llvm;
2348958d02SDaniil Fukalov 
2448958d02SDaniil Fukalov #define DEBUG_TYPE "R600tti"
2548958d02SDaniil Fukalov 
R600TTIImpl(const AMDGPUTargetMachine * TM,const Function & F)2648958d02SDaniil Fukalov R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
2748958d02SDaniil Fukalov     : BaseT(TM, F.getParent()->getDataLayout()),
2848958d02SDaniil Fukalov       ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
2948958d02SDaniil Fukalov       TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
3048958d02SDaniil Fukalov 
getHardwareNumberOfRegisters(bool Vec) const3148958d02SDaniil Fukalov unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
3248958d02SDaniil Fukalov   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
3348958d02SDaniil Fukalov }
3448958d02SDaniil Fukalov 
getNumberOfRegisters(bool Vec) const3548958d02SDaniil Fukalov unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
3648958d02SDaniil Fukalov   return getHardwareNumberOfRegisters(Vec);
3748958d02SDaniil Fukalov }
3848958d02SDaniil Fukalov 
3948958d02SDaniil Fukalov TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const4048958d02SDaniil Fukalov R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
4148958d02SDaniil Fukalov   return TypeSize::getFixed(32);
4248958d02SDaniil Fukalov }
4348958d02SDaniil Fukalov 
getMinVectorRegisterBitWidth() const4448958d02SDaniil Fukalov unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
4548958d02SDaniil Fukalov 
getLoadStoreVecRegBitWidth(unsigned AddrSpace) const4648958d02SDaniil Fukalov unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
4748958d02SDaniil Fukalov   if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
4848958d02SDaniil Fukalov       AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
4948958d02SDaniil Fukalov     return 128;
5048958d02SDaniil Fukalov   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
5148958d02SDaniil Fukalov       AddrSpace == AMDGPUAS::REGION_ADDRESS)
5248958d02SDaniil Fukalov     return 64;
5348958d02SDaniil Fukalov   if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
5448958d02SDaniil Fukalov     return 32;
5548958d02SDaniil Fukalov 
5648958d02SDaniil Fukalov   if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
5748958d02SDaniil Fukalov        AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
5848958d02SDaniil Fukalov        (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
5948958d02SDaniil Fukalov         AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
6048958d02SDaniil Fukalov     return 128;
6148958d02SDaniil Fukalov   llvm_unreachable("unhandled address space");
6248958d02SDaniil Fukalov }
6348958d02SDaniil Fukalov 
isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const6448958d02SDaniil Fukalov bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
6548958d02SDaniil Fukalov                                              Align Alignment,
6648958d02SDaniil Fukalov                                              unsigned AddrSpace) const {
6748958d02SDaniil Fukalov   // We allow vectorization of flat stores, even though we may need to decompose
6848958d02SDaniil Fukalov   // them later if they may access private memory. We don't have enough context
6948958d02SDaniil Fukalov   // here, and legalization can handle it.
7048958d02SDaniil Fukalov   return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
7148958d02SDaniil Fukalov }
7248958d02SDaniil Fukalov 
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const7348958d02SDaniil Fukalov bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
7448958d02SDaniil Fukalov                                               Align Alignment,
7548958d02SDaniil Fukalov                                               unsigned AddrSpace) const {
7648958d02SDaniil Fukalov   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
7748958d02SDaniil Fukalov }
7848958d02SDaniil Fukalov 
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const7948958d02SDaniil Fukalov bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
8048958d02SDaniil Fukalov                                                Align Alignment,
8148958d02SDaniil Fukalov                                                unsigned AddrSpace) const {
8248958d02SDaniil Fukalov   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
8348958d02SDaniil Fukalov }
8448958d02SDaniil Fukalov 
getMaxInterleaveFactor(unsigned VF)8548958d02SDaniil Fukalov unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
8648958d02SDaniil Fukalov   // Disable unrolling if the loop is not vectorized.
8748958d02SDaniil Fukalov   // TODO: Enable this again.
8848958d02SDaniil Fukalov   if (VF == 1)
8948958d02SDaniil Fukalov     return 1;
9048958d02SDaniil Fukalov 
9148958d02SDaniil Fukalov   return 8;
9248958d02SDaniil Fukalov }
9348958d02SDaniil Fukalov 
getCFInstrCost(unsigned Opcode,TTI::TargetCostKind CostKind,const Instruction * I)9448958d02SDaniil Fukalov InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
9548958d02SDaniil Fukalov                                             TTI::TargetCostKind CostKind,
9648958d02SDaniil Fukalov                                             const Instruction *I) {
9748958d02SDaniil Fukalov   if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
9848958d02SDaniil Fukalov     return Opcode == Instruction::PHI ? 0 : 1;
9948958d02SDaniil Fukalov 
10048958d02SDaniil Fukalov   // XXX - For some reason this isn't called for switch.
10148958d02SDaniil Fukalov   switch (Opcode) {
10248958d02SDaniil Fukalov   case Instruction::Br:
10348958d02SDaniil Fukalov   case Instruction::Ret:
10448958d02SDaniil Fukalov     return 10;
10548958d02SDaniil Fukalov   default:
10648958d02SDaniil Fukalov     return BaseT::getCFInstrCost(Opcode, CostKind, I);
10748958d02SDaniil Fukalov   }
10848958d02SDaniil Fukalov }
10948958d02SDaniil Fukalov 
getVectorInstrCost(unsigned Opcode,Type * ValTy,unsigned Index)11048958d02SDaniil Fukalov InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
11148958d02SDaniil Fukalov                                                 unsigned Index) {
11248958d02SDaniil Fukalov   switch (Opcode) {
11348958d02SDaniil Fukalov   case Instruction::ExtractElement:
11448958d02SDaniil Fukalov   case Instruction::InsertElement: {
11548958d02SDaniil Fukalov     unsigned EltSize =
11648958d02SDaniil Fukalov         DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
11748958d02SDaniil Fukalov     if (EltSize < 32) {
11848958d02SDaniil Fukalov       return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
11948958d02SDaniil Fukalov     }
12048958d02SDaniil Fukalov 
12148958d02SDaniil Fukalov     // Extracts are just reads of a subregister, so are free. Inserts are
12248958d02SDaniil Fukalov     // considered free because we don't want to have any cost for scalarizing
12348958d02SDaniil Fukalov     // operations, and we don't have to copy into a different register class.
12448958d02SDaniil Fukalov 
12548958d02SDaniil Fukalov     // Dynamic indexing isn't free and is best avoided.
12648958d02SDaniil Fukalov     return Index == ~0u ? 2 : 0;
12748958d02SDaniil Fukalov   }
12848958d02SDaniil Fukalov   default:
12948958d02SDaniil Fukalov     return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
13048958d02SDaniil Fukalov   }
13148958d02SDaniil Fukalov }
13248958d02SDaniil Fukalov 
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)13348958d02SDaniil Fukalov void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
13448958d02SDaniil Fukalov                                           TTI::UnrollingPreferences &UP,
13548958d02SDaniil Fukalov                                           OptimizationRemarkEmitter *ORE) {
13648958d02SDaniil Fukalov   CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
13748958d02SDaniil Fukalov }
13848958d02SDaniil Fukalov 
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)13948958d02SDaniil Fukalov void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
14048958d02SDaniil Fukalov                                         TTI::PeelingPreferences &PP) {
14148958d02SDaniil Fukalov   CommonTTI.getPeelingPreferences(L, SE, PP);
14248958d02SDaniil Fukalov }
143