148958d02SDaniil Fukalov //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
248958d02SDaniil Fukalov //
348958d02SDaniil Fukalov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
448958d02SDaniil Fukalov // See https://llvm.org/LICENSE.txt for license information.
548958d02SDaniil Fukalov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
648958d02SDaniil Fukalov //
748958d02SDaniil Fukalov //===----------------------------------------------------------------------===//
848958d02SDaniil Fukalov //
948958d02SDaniil Fukalov // \file
1048958d02SDaniil Fukalov // This file implements a TargetTransformInfo analysis pass specific to the
1148958d02SDaniil Fukalov // R600 target machine. It uses the target's detailed information to provide
1248958d02SDaniil Fukalov // more precise answers to certain TTI queries, while letting the target
1348958d02SDaniil Fukalov // independent and default TTI implementations handle the rest.
1448958d02SDaniil Fukalov //
1548958d02SDaniil Fukalov //===----------------------------------------------------------------------===//
1648958d02SDaniil Fukalov
1748958d02SDaniil Fukalov #include "R600TargetTransformInfo.h"
18*47d6274dSDaniil Fukalov #include "AMDGPU.h"
1948958d02SDaniil Fukalov #include "AMDGPUTargetMachine.h"
2048958d02SDaniil Fukalov #include "R600Subtarget.h"
2148958d02SDaniil Fukalov
2248958d02SDaniil Fukalov using namespace llvm;
2348958d02SDaniil Fukalov
2448958d02SDaniil Fukalov #define DEBUG_TYPE "R600tti"
2548958d02SDaniil Fukalov
R600TTIImpl(const AMDGPUTargetMachine * TM,const Function & F)2648958d02SDaniil Fukalov R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
2748958d02SDaniil Fukalov : BaseT(TM, F.getParent()->getDataLayout()),
2848958d02SDaniil Fukalov ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
2948958d02SDaniil Fukalov TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
3048958d02SDaniil Fukalov
getHardwareNumberOfRegisters(bool Vec) const3148958d02SDaniil Fukalov unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
3248958d02SDaniil Fukalov return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
3348958d02SDaniil Fukalov }
3448958d02SDaniil Fukalov
getNumberOfRegisters(bool Vec) const3548958d02SDaniil Fukalov unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
3648958d02SDaniil Fukalov return getHardwareNumberOfRegisters(Vec);
3748958d02SDaniil Fukalov }
3848958d02SDaniil Fukalov
3948958d02SDaniil Fukalov TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const4048958d02SDaniil Fukalov R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
4148958d02SDaniil Fukalov return TypeSize::getFixed(32);
4248958d02SDaniil Fukalov }
4348958d02SDaniil Fukalov
getMinVectorRegisterBitWidth() const4448958d02SDaniil Fukalov unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
4548958d02SDaniil Fukalov
getLoadStoreVecRegBitWidth(unsigned AddrSpace) const4648958d02SDaniil Fukalov unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
4748958d02SDaniil Fukalov if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
4848958d02SDaniil Fukalov AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
4948958d02SDaniil Fukalov return 128;
5048958d02SDaniil Fukalov if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
5148958d02SDaniil Fukalov AddrSpace == AMDGPUAS::REGION_ADDRESS)
5248958d02SDaniil Fukalov return 64;
5348958d02SDaniil Fukalov if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
5448958d02SDaniil Fukalov return 32;
5548958d02SDaniil Fukalov
5648958d02SDaniil Fukalov if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
5748958d02SDaniil Fukalov AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
5848958d02SDaniil Fukalov (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
5948958d02SDaniil Fukalov AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
6048958d02SDaniil Fukalov return 128;
6148958d02SDaniil Fukalov llvm_unreachable("unhandled address space");
6248958d02SDaniil Fukalov }
6348958d02SDaniil Fukalov
isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const6448958d02SDaniil Fukalov bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
6548958d02SDaniil Fukalov Align Alignment,
6648958d02SDaniil Fukalov unsigned AddrSpace) const {
6748958d02SDaniil Fukalov // We allow vectorization of flat stores, even though we may need to decompose
6848958d02SDaniil Fukalov // them later if they may access private memory. We don't have enough context
6948958d02SDaniil Fukalov // here, and legalization can handle it.
7048958d02SDaniil Fukalov return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
7148958d02SDaniil Fukalov }
7248958d02SDaniil Fukalov
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const7348958d02SDaniil Fukalov bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
7448958d02SDaniil Fukalov Align Alignment,
7548958d02SDaniil Fukalov unsigned AddrSpace) const {
7648958d02SDaniil Fukalov return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
7748958d02SDaniil Fukalov }
7848958d02SDaniil Fukalov
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const7948958d02SDaniil Fukalov bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
8048958d02SDaniil Fukalov Align Alignment,
8148958d02SDaniil Fukalov unsigned AddrSpace) const {
8248958d02SDaniil Fukalov return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
8348958d02SDaniil Fukalov }
8448958d02SDaniil Fukalov
getMaxInterleaveFactor(unsigned VF)8548958d02SDaniil Fukalov unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
8648958d02SDaniil Fukalov // Disable unrolling if the loop is not vectorized.
8748958d02SDaniil Fukalov // TODO: Enable this again.
8848958d02SDaniil Fukalov if (VF == 1)
8948958d02SDaniil Fukalov return 1;
9048958d02SDaniil Fukalov
9148958d02SDaniil Fukalov return 8;
9248958d02SDaniil Fukalov }
9348958d02SDaniil Fukalov
getCFInstrCost(unsigned Opcode,TTI::TargetCostKind CostKind,const Instruction * I)9448958d02SDaniil Fukalov InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
9548958d02SDaniil Fukalov TTI::TargetCostKind CostKind,
9648958d02SDaniil Fukalov const Instruction *I) {
9748958d02SDaniil Fukalov if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
9848958d02SDaniil Fukalov return Opcode == Instruction::PHI ? 0 : 1;
9948958d02SDaniil Fukalov
10048958d02SDaniil Fukalov // XXX - For some reason this isn't called for switch.
10148958d02SDaniil Fukalov switch (Opcode) {
10248958d02SDaniil Fukalov case Instruction::Br:
10348958d02SDaniil Fukalov case Instruction::Ret:
10448958d02SDaniil Fukalov return 10;
10548958d02SDaniil Fukalov default:
10648958d02SDaniil Fukalov return BaseT::getCFInstrCost(Opcode, CostKind, I);
10748958d02SDaniil Fukalov }
10848958d02SDaniil Fukalov }
10948958d02SDaniil Fukalov
getVectorInstrCost(unsigned Opcode,Type * ValTy,unsigned Index)11048958d02SDaniil Fukalov InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
11148958d02SDaniil Fukalov unsigned Index) {
11248958d02SDaniil Fukalov switch (Opcode) {
11348958d02SDaniil Fukalov case Instruction::ExtractElement:
11448958d02SDaniil Fukalov case Instruction::InsertElement: {
11548958d02SDaniil Fukalov unsigned EltSize =
11648958d02SDaniil Fukalov DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
11748958d02SDaniil Fukalov if (EltSize < 32) {
11848958d02SDaniil Fukalov return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
11948958d02SDaniil Fukalov }
12048958d02SDaniil Fukalov
12148958d02SDaniil Fukalov // Extracts are just reads of a subregister, so are free. Inserts are
12248958d02SDaniil Fukalov // considered free because we don't want to have any cost for scalarizing
12348958d02SDaniil Fukalov // operations, and we don't have to copy into a different register class.
12448958d02SDaniil Fukalov
12548958d02SDaniil Fukalov // Dynamic indexing isn't free and is best avoided.
12648958d02SDaniil Fukalov return Index == ~0u ? 2 : 0;
12748958d02SDaniil Fukalov }
12848958d02SDaniil Fukalov default:
12948958d02SDaniil Fukalov return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
13048958d02SDaniil Fukalov }
13148958d02SDaniil Fukalov }
13248958d02SDaniil Fukalov
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)13348958d02SDaniil Fukalov void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
13448958d02SDaniil Fukalov TTI::UnrollingPreferences &UP,
13548958d02SDaniil Fukalov OptimizationRemarkEmitter *ORE) {
13648958d02SDaniil Fukalov CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
13748958d02SDaniil Fukalov }
13848958d02SDaniil Fukalov
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)13948958d02SDaniil Fukalov void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
14048958d02SDaniil Fukalov TTI::PeelingPreferences &PP) {
14148958d02SDaniil Fukalov CommonTTI.getPeelingPreferences(L, SE, PP);
14248958d02SDaniil Fukalov }
143