1a17f03bdSSanjay Patel //===------- VectorCombine.cpp - Optimize partial vector operations -------===// 2a17f03bdSSanjay Patel // 3a17f03bdSSanjay Patel // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a17f03bdSSanjay Patel // See https://llvm.org/LICENSE.txt for license information. 5a17f03bdSSanjay Patel // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a17f03bdSSanjay Patel // 7a17f03bdSSanjay Patel //===----------------------------------------------------------------------===// 8a17f03bdSSanjay Patel // 9a17f03bdSSanjay Patel // This pass optimizes scalar/vector interactions using target cost models. The 10a17f03bdSSanjay Patel // transforms implemented here may not fit in traditional loop-based or SLP 11a17f03bdSSanjay Patel // vectorization passes. 12a17f03bdSSanjay Patel // 13a17f03bdSSanjay Patel //===----------------------------------------------------------------------===// 14a17f03bdSSanjay Patel 15a17f03bdSSanjay Patel #include "llvm/Transforms/Vectorize/VectorCombine.h" 16a17f03bdSSanjay Patel #include "llvm/ADT/Statistic.h" 17a17f03bdSSanjay Patel #include "llvm/Analysis/GlobalsModRef.h" 18a17f03bdSSanjay Patel #include "llvm/Analysis/TargetTransformInfo.h" 1919b62b79SSanjay Patel #include "llvm/Analysis/ValueTracking.h" 20b6050ca1SSanjay Patel #include "llvm/Analysis/VectorUtils.h" 21a17f03bdSSanjay Patel #include "llvm/IR/Dominators.h" 22a17f03bdSSanjay Patel #include "llvm/IR/Function.h" 23a17f03bdSSanjay Patel #include "llvm/IR/IRBuilder.h" 24a17f03bdSSanjay Patel #include "llvm/IR/PatternMatch.h" 25a17f03bdSSanjay Patel #include "llvm/InitializePasses.h" 26a17f03bdSSanjay Patel #include "llvm/Pass.h" 2725c6544fSSanjay Patel #include "llvm/Support/CommandLine.h" 28a17f03bdSSanjay Patel #include "llvm/Transforms/Vectorize.h" 29a17f03bdSSanjay Patel #include "llvm/Transforms/Utils/Local.h" 30a17f03bdSSanjay Patel 31a17f03bdSSanjay Patel using namespace llvm; 32a17f03bdSSanjay Patel using namespace llvm::PatternMatch; 33a17f03bdSSanjay Patel 34a17f03bdSSanjay Patel #define DEBUG_TYPE "vector-combine" 35a17f03bdSSanjay Patel STATISTIC(NumVecCmp, "Number of vector compares formed"); 3619b62b79SSanjay Patel STATISTIC(NumVecBO, "Number of vector binops formed"); 37a17f03bdSSanjay Patel 3825c6544fSSanjay Patel static cl::opt<bool> DisableVectorCombine( 3925c6544fSSanjay Patel "disable-vector-combine", cl::init(false), cl::Hidden, 4025c6544fSSanjay Patel cl::desc("Disable all vector combine transforms")); 4125c6544fSSanjay Patel 42a69158c1SSanjay Patel static cl::opt<bool> DisableBinopExtractShuffle( 43a69158c1SSanjay Patel "disable-binop-extract-shuffle", cl::init(false), cl::Hidden, 44a69158c1SSanjay Patel cl::desc("Disable binop extract to shuffle transforms")); 45a69158c1SSanjay Patel 46a69158c1SSanjay Patel 47a69158c1SSanjay Patel /// Compare the relative costs of 2 extracts followed by scalar operation vs. 48a69158c1SSanjay Patel /// vector operation(s) followed by extract. Return true if the existing 49a69158c1SSanjay Patel /// instructions are cheaper than a vector alternative. Otherwise, return false 50a69158c1SSanjay Patel /// and if one of the extracts should be transformed to a shufflevector, set 51a69158c1SSanjay Patel /// \p ConvertToShuffle to that extract instruction. 5234e34855SSanjay Patel static bool isExtractExtractCheap(Instruction *Ext0, Instruction *Ext1, 5334e34855SSanjay Patel unsigned Opcode, 54a69158c1SSanjay Patel const TargetTransformInfo &TTI, 55ce97ce3aSSanjay Patel Instruction *&ConvertToShuffle, 56ce97ce3aSSanjay Patel unsigned PreferredExtractIndex) { 574fa63fd4SAustin Kerbow assert(isa<ConstantInt>(Ext0->getOperand(1)) && 58a69158c1SSanjay Patel isa<ConstantInt>(Ext1->getOperand(1)) && 59a69158c1SSanjay Patel "Expected constant extract indexes"); 6034e34855SSanjay Patel Type *ScalarTy = Ext0->getType(); 6134e34855SSanjay Patel Type *VecTy = Ext0->getOperand(0)->getType(); 6234e34855SSanjay Patel int ScalarOpCost, VectorOpCost; 6334e34855SSanjay Patel 6434e34855SSanjay Patel // Get cost estimates for scalar and vector versions of the operation. 6534e34855SSanjay Patel bool IsBinOp = Instruction::isBinaryOp(Opcode); 6634e34855SSanjay Patel if (IsBinOp) { 6734e34855SSanjay Patel ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy); 6834e34855SSanjay Patel VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy); 6934e34855SSanjay Patel } else { 7034e34855SSanjay Patel assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 7134e34855SSanjay Patel "Expected a compare"); 7234e34855SSanjay Patel ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy, 7334e34855SSanjay Patel CmpInst::makeCmpResultType(ScalarTy)); 7434e34855SSanjay Patel VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy, 7534e34855SSanjay Patel CmpInst::makeCmpResultType(VecTy)); 7634e34855SSanjay Patel } 7734e34855SSanjay Patel 78a69158c1SSanjay Patel // Get cost estimates for the extract elements. These costs will factor into 7934e34855SSanjay Patel // both sequences. 80a69158c1SSanjay Patel unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue(); 81a69158c1SSanjay Patel unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue(); 82a69158c1SSanjay Patel 83a69158c1SSanjay Patel int Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, 84a69158c1SSanjay Patel VecTy, Ext0Index); 85a69158c1SSanjay Patel int Extract1Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, 86a69158c1SSanjay Patel VecTy, Ext1Index); 87a69158c1SSanjay Patel 88a69158c1SSanjay Patel // A more expensive extract will always be replaced by a splat shuffle. 89a69158c1SSanjay Patel // For example, if Ext0 is more expensive: 90a69158c1SSanjay Patel // opcode (extelt V0, Ext0), (ext V1, Ext1) --> 91a69158c1SSanjay Patel // extelt (opcode (splat V0, Ext0), V1), Ext1 92a69158c1SSanjay Patel // TODO: Evaluate whether that always results in lowest cost. Alternatively, 93a69158c1SSanjay Patel // check the cost of creating a broadcast shuffle and shuffling both 94a69158c1SSanjay Patel // operands to element 0. 95a69158c1SSanjay Patel int CheapExtractCost = std::min(Extract0Cost, Extract1Cost); 9634e34855SSanjay Patel 9734e34855SSanjay Patel // Extra uses of the extracts mean that we include those costs in the 9834e34855SSanjay Patel // vector total because those instructions will not be eliminated. 99e9c79a7aSSanjay Patel int OldCost, NewCost; 100a69158c1SSanjay Patel if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) { 101a69158c1SSanjay Patel // Handle a special case. If the 2 extracts are identical, adjust the 10234e34855SSanjay Patel // formulas to account for that. The extra use charge allows for either the 10334e34855SSanjay Patel // CSE'd pattern or an unoptimized form with identical values: 10434e34855SSanjay Patel // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C 10534e34855SSanjay Patel bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2) 10634e34855SSanjay Patel : !Ext0->hasOneUse() || !Ext1->hasOneUse(); 107a69158c1SSanjay Patel OldCost = CheapExtractCost + ScalarOpCost; 108a69158c1SSanjay Patel NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost; 10934e34855SSanjay Patel } else { 11034e34855SSanjay Patel // Handle the general case. Each extract is actually a different value: 111a69158c1SSanjay Patel // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C 112a69158c1SSanjay Patel OldCost = Extract0Cost + Extract1Cost + ScalarOpCost; 113a69158c1SSanjay Patel NewCost = VectorOpCost + CheapExtractCost + 114a69158c1SSanjay Patel !Ext0->hasOneUse() * Extract0Cost + 115a69158c1SSanjay Patel !Ext1->hasOneUse() * Extract1Cost; 11634e34855SSanjay Patel } 117a69158c1SSanjay Patel 118a69158c1SSanjay Patel if (Ext0Index == Ext1Index) { 119a69158c1SSanjay Patel // If the extract indexes are identical, no shuffle is needed. 120a69158c1SSanjay Patel ConvertToShuffle = nullptr; 121a69158c1SSanjay Patel } else { 122a69158c1SSanjay Patel if (IsBinOp && DisableBinopExtractShuffle) 123a69158c1SSanjay Patel return true; 124a69158c1SSanjay Patel 125a69158c1SSanjay Patel // If we are extracting from 2 different indexes, then one operand must be 126a69158c1SSanjay Patel // shuffled before performing the vector operation. The shuffle mask is 127a69158c1SSanjay Patel // undefined except for 1 lane that is being translated to the remaining 128a69158c1SSanjay Patel // extraction lane. Therefore, it is a splat shuffle. Ex: 129a69158c1SSanjay Patel // ShufMask = { undef, undef, 0, undef } 130a69158c1SSanjay Patel // TODO: The cost model has an option for a "broadcast" shuffle 131a69158c1SSanjay Patel // (splat-from-element-0), but no option for a more general splat. 132a69158c1SSanjay Patel NewCost += 133a69158c1SSanjay Patel TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy); 134a69158c1SSanjay Patel 135ce97ce3aSSanjay Patel // The more expensive extract will be replaced by a shuffle. If the costs 136ce97ce3aSSanjay Patel // are equal and there is a preferred extract index, shuffle the opposite 137ce97ce3aSSanjay Patel // operand. Otherwise, replace the extract with the higher index. 138a69158c1SSanjay Patel if (Extract0Cost > Extract1Cost) 139a69158c1SSanjay Patel ConvertToShuffle = Ext0; 140a69158c1SSanjay Patel else if (Extract1Cost > Extract0Cost) 141a69158c1SSanjay Patel ConvertToShuffle = Ext1; 142ce97ce3aSSanjay Patel else if (PreferredExtractIndex == Ext0Index) 143ce97ce3aSSanjay Patel ConvertToShuffle = Ext1; 144ce97ce3aSSanjay Patel else if (PreferredExtractIndex == Ext1Index) 145ce97ce3aSSanjay Patel ConvertToShuffle = Ext0; 146a69158c1SSanjay Patel else 147a69158c1SSanjay Patel ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1; 148a69158c1SSanjay Patel } 149a69158c1SSanjay Patel 15010ea01d8SSanjay Patel // Aggressively form a vector op if the cost is equal because the transform 15110ea01d8SSanjay Patel // may enable further optimization. 15210ea01d8SSanjay Patel // Codegen can reverse this transform (scalarize) if it was not profitable. 15310ea01d8SSanjay Patel return OldCost < NewCost; 15434e34855SSanjay Patel } 15534e34855SSanjay Patel 156fc445589SSanjay Patel /// Try to reduce extract element costs by converting scalar compares to vector 157fc445589SSanjay Patel /// compares followed by extract. 158e9c79a7aSSanjay Patel /// cmp (ext0 V0, C), (ext1 V1, C) 159e9c79a7aSSanjay Patel static void foldExtExtCmp(Instruction *Ext0, Instruction *Ext1, 160fc445589SSanjay Patel Instruction &I, const TargetTransformInfo &TTI) { 161fc445589SSanjay Patel assert(isa<CmpInst>(&I) && "Expected a compare"); 162a17f03bdSSanjay Patel 163a17f03bdSSanjay Patel // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C 164a17f03bdSSanjay Patel ++NumVecCmp; 165a17f03bdSSanjay Patel IRBuilder<> Builder(&I); 166fc445589SSanjay Patel CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate(); 167e9c79a7aSSanjay Patel Value *V0 = Ext0->getOperand(0), *V1 = Ext1->getOperand(0); 16834e34855SSanjay Patel Value *VecCmp = 16934e34855SSanjay Patel Ext0->getType()->isFloatingPointTy() ? Builder.CreateFCmp(Pred, V0, V1) 170a17f03bdSSanjay Patel : Builder.CreateICmp(Pred, V0, V1); 171fc445589SSanjay Patel Value *Extract = Builder.CreateExtractElement(VecCmp, Ext0->getOperand(1)); 172fc445589SSanjay Patel I.replaceAllUsesWith(Extract); 173a17f03bdSSanjay Patel } 174a17f03bdSSanjay Patel 17519b62b79SSanjay Patel /// Try to reduce extract element costs by converting scalar binops to vector 17619b62b79SSanjay Patel /// binops followed by extract. 177e9c79a7aSSanjay Patel /// bo (ext0 V0, C), (ext1 V1, C) 178e9c79a7aSSanjay Patel static void foldExtExtBinop(Instruction *Ext0, Instruction *Ext1, 179fc445589SSanjay Patel Instruction &I, const TargetTransformInfo &TTI) { 180fc445589SSanjay Patel assert(isa<BinaryOperator>(&I) && "Expected a binary operator"); 18119b62b79SSanjay Patel 18234e34855SSanjay Patel // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C 18319b62b79SSanjay Patel ++NumVecBO; 18419b62b79SSanjay Patel IRBuilder<> Builder(&I); 185e9c79a7aSSanjay Patel Value *V0 = Ext0->getOperand(0), *V1 = Ext1->getOperand(0); 186e9c79a7aSSanjay Patel Value *VecBO = 18734e34855SSanjay Patel Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1); 188e9c79a7aSSanjay Patel 18919b62b79SSanjay Patel // All IR flags are safe to back-propagate because any potential poison 19019b62b79SSanjay Patel // created in unused vector elements is discarded by the extract. 191e9c79a7aSSanjay Patel if (auto *VecBOInst = dyn_cast<Instruction>(VecBO)) 19219b62b79SSanjay Patel VecBOInst->copyIRFlags(&I); 193e9c79a7aSSanjay Patel 194e9c79a7aSSanjay Patel Value *Extract = Builder.CreateExtractElement(VecBO, Ext0->getOperand(1)); 19519b62b79SSanjay Patel I.replaceAllUsesWith(Extract); 19619b62b79SSanjay Patel } 19719b62b79SSanjay Patel 198fc445589SSanjay Patel /// Match an instruction with extracted vector operands. 199fc445589SSanjay Patel static bool foldExtractExtract(Instruction &I, const TargetTransformInfo &TTI) { 200e9c79a7aSSanjay Patel // It is not safe to transform things like div, urem, etc. because we may 201e9c79a7aSSanjay Patel // create undefined behavior when executing those on unknown vector elements. 202e9c79a7aSSanjay Patel if (!isSafeToSpeculativelyExecute(&I)) 203e9c79a7aSSanjay Patel return false; 204e9c79a7aSSanjay Patel 205fc445589SSanjay Patel Instruction *Ext0, *Ext1; 206fc445589SSanjay Patel CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 207fc445589SSanjay Patel if (!match(&I, m_Cmp(Pred, m_Instruction(Ext0), m_Instruction(Ext1))) && 208fc445589SSanjay Patel !match(&I, m_BinOp(m_Instruction(Ext0), m_Instruction(Ext1)))) 209fc445589SSanjay Patel return false; 210fc445589SSanjay Patel 211fc445589SSanjay Patel Value *V0, *V1; 212fc445589SSanjay Patel uint64_t C0, C1; 213fc445589SSanjay Patel if (!match(Ext0, m_ExtractElement(m_Value(V0), m_ConstantInt(C0))) || 214fc445589SSanjay Patel !match(Ext1, m_ExtractElement(m_Value(V1), m_ConstantInt(C1))) || 215fc445589SSanjay Patel V0->getType() != V1->getType()) 216fc445589SSanjay Patel return false; 217fc445589SSanjay Patel 218ce97ce3aSSanjay Patel // If the scalar value 'I' is going to be re-inserted into a vector, then try 219ce97ce3aSSanjay Patel // to create an extract to that same element. The extract/insert can be 220ce97ce3aSSanjay Patel // reduced to a "select shuffle". 221ce97ce3aSSanjay Patel // TODO: If we add a larger pattern match that starts from an insert, this 222ce97ce3aSSanjay Patel // probably becomes unnecessary. 223ce97ce3aSSanjay Patel uint64_t InsertIndex = std::numeric_limits<uint64_t>::max(); 224ce97ce3aSSanjay Patel if (I.hasOneUse()) 225ce97ce3aSSanjay Patel match(I.user_back(), m_InsertElement(m_Value(), m_Value(), 226ce97ce3aSSanjay Patel m_ConstantInt(InsertIndex))); 227ce97ce3aSSanjay Patel 228a69158c1SSanjay Patel Instruction *ConvertToShuffle; 229ce97ce3aSSanjay Patel if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), TTI, ConvertToShuffle, 230ce97ce3aSSanjay Patel InsertIndex)) 231fc445589SSanjay Patel return false; 232e9c79a7aSSanjay Patel 233a69158c1SSanjay Patel if (ConvertToShuffle) { 234a69158c1SSanjay Patel // The shuffle mask is undefined except for 1 lane that is being translated 235a69158c1SSanjay Patel // to the cheap extraction lane. Example: 236a69158c1SSanjay Patel // ShufMask = { 2, undef, undef, undef } 237a69158c1SSanjay Patel uint64_t SplatIndex = ConvertToShuffle == Ext0 ? C0 : C1; 238a69158c1SSanjay Patel uint64_t CheapExtIndex = ConvertToShuffle == Ext0 ? C1 : C0; 2393297e9b7SChristopher Tetreault auto *VecTy = cast<VectorType>(V0->getType()); 240*6f64dacaSBenjamin Kramer SmallVector<int, 32> ShufMask(VecTy->getNumElements(), -1); 241*6f64dacaSBenjamin Kramer ShufMask[CheapExtIndex] = SplatIndex; 242a69158c1SSanjay Patel IRBuilder<> Builder(ConvertToShuffle); 243a69158c1SSanjay Patel 244a69158c1SSanjay Patel // extelt X, C --> extelt (splat X), C' 245a69158c1SSanjay Patel Value *Shuf = Builder.CreateShuffleVector(ConvertToShuffle->getOperand(0), 246*6f64dacaSBenjamin Kramer UndefValue::get(VecTy), ShufMask); 247a69158c1SSanjay Patel Value *NewExt = Builder.CreateExtractElement(Shuf, CheapExtIndex); 248a69158c1SSanjay Patel if (ConvertToShuffle == Ext0) 249a69158c1SSanjay Patel Ext0 = cast<Instruction>(NewExt); 250a69158c1SSanjay Patel else 251a69158c1SSanjay Patel Ext1 = cast<Instruction>(NewExt); 252a69158c1SSanjay Patel } 253e9c79a7aSSanjay Patel 254e9c79a7aSSanjay Patel if (Pred != CmpInst::BAD_ICMP_PREDICATE) 255e9c79a7aSSanjay Patel foldExtExtCmp(Ext0, Ext1, I, TTI); 256e9c79a7aSSanjay Patel else 257e9c79a7aSSanjay Patel foldExtExtBinop(Ext0, Ext1, I, TTI); 258e9c79a7aSSanjay Patel 259e9c79a7aSSanjay Patel return true; 260fc445589SSanjay Patel } 261fc445589SSanjay Patel 262b6050ca1SSanjay Patel /// If this is a bitcast to narrow elements from a shuffle of wider elements, 263b6050ca1SSanjay Patel /// try to bitcast the source vector to the narrow type followed by shuffle. 264b6050ca1SSanjay Patel /// This can enable further transforms by moving bitcasts or shuffles together. 265b6050ca1SSanjay Patel static bool foldBitcastShuf(Instruction &I, const TargetTransformInfo &TTI) { 266b6050ca1SSanjay Patel Value *V; 267b6050ca1SSanjay Patel ArrayRef<int> Mask; 268b6050ca1SSanjay Patel if (!match(&I, m_BitCast(m_OneUse(m_ShuffleVector(m_Value(V), m_Undef(), 269b6050ca1SSanjay Patel m_Mask(Mask)))))) 270b6050ca1SSanjay Patel return false; 271b6050ca1SSanjay Patel 2723297e9b7SChristopher Tetreault auto *DestTy = dyn_cast<VectorType>(I.getType()); 2733297e9b7SChristopher Tetreault auto *SrcTy = cast<VectorType>(V->getType()); 2743297e9b7SChristopher Tetreault if (!DestTy || I.getOperand(0)->getType() != SrcTy) 275b6050ca1SSanjay Patel return false; 276b6050ca1SSanjay Patel 277b6050ca1SSanjay Patel // TODO: Handle bitcast from narrow element type to wide element type. 2783297e9b7SChristopher Tetreault unsigned DestNumElts = DestTy->getNumElements(); 2793297e9b7SChristopher Tetreault unsigned SrcNumElts = SrcTy->getNumElements(); 280b6050ca1SSanjay Patel if (SrcNumElts > DestNumElts) 281b6050ca1SSanjay Patel return false; 282b6050ca1SSanjay Patel 283b6050ca1SSanjay Patel // The new shuffle must not cost more than the old shuffle. The bitcast is 284b6050ca1SSanjay Patel // moved ahead of the shuffle, so assume that it has the same cost as before. 285b6050ca1SSanjay Patel if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) > 286b6050ca1SSanjay Patel TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) 287b6050ca1SSanjay Patel return false; 288b6050ca1SSanjay Patel 289b6050ca1SSanjay Patel // Bitcast the source vector and expand the shuffle mask to the equivalent for 290b6050ca1SSanjay Patel // narrow elements. 291b6050ca1SSanjay Patel // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' 292b6050ca1SSanjay Patel IRBuilder<> Builder(&I); 293b6050ca1SSanjay Patel Value *CastV = Builder.CreateBitCast(V, DestTy); 294b6050ca1SSanjay Patel SmallVector<int, 16> NewMask; 295b6050ca1SSanjay Patel assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask"); 296b6050ca1SSanjay Patel unsigned ScaleFactor = DestNumElts / SrcNumElts; 2971318ddbcSSanjay Patel narrowShuffleMaskElts(ScaleFactor, Mask, NewMask); 298b6050ca1SSanjay Patel Value *Shuf = Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), 299b6050ca1SSanjay Patel NewMask); 300b6050ca1SSanjay Patel I.replaceAllUsesWith(Shuf); 301b6050ca1SSanjay Patel return true; 302b6050ca1SSanjay Patel } 303b6050ca1SSanjay Patel 304a17f03bdSSanjay Patel /// This is the entry point for all transforms. Pass manager differences are 305a17f03bdSSanjay Patel /// handled in the callers of this function. 306a17f03bdSSanjay Patel static bool runImpl(Function &F, const TargetTransformInfo &TTI, 307a17f03bdSSanjay Patel const DominatorTree &DT) { 30825c6544fSSanjay Patel if (DisableVectorCombine) 30925c6544fSSanjay Patel return false; 31025c6544fSSanjay Patel 311a17f03bdSSanjay Patel bool MadeChange = false; 312a17f03bdSSanjay Patel for (BasicBlock &BB : F) { 313a17f03bdSSanjay Patel // Ignore unreachable basic blocks. 314a17f03bdSSanjay Patel if (!DT.isReachableFromEntry(&BB)) 315a17f03bdSSanjay Patel continue; 316a17f03bdSSanjay Patel // Do not delete instructions under here and invalidate the iterator. 317a17f03bdSSanjay Patel // Walk the block backwards for efficiency. We're matching a chain of 318a17f03bdSSanjay Patel // use->defs, so we're more likely to succeed by starting from the bottom. 319a17f03bdSSanjay Patel // TODO: It could be more efficient to remove dead instructions 320a17f03bdSSanjay Patel // iteratively in this loop rather than waiting until the end. 321fc3cc8a4SSanjay Patel for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { 322fc3cc8a4SSanjay Patel if (isa<DbgInfoIntrinsic>(I)) 323fc3cc8a4SSanjay Patel continue; 324fc445589SSanjay Patel MadeChange |= foldExtractExtract(I, TTI); 325b6050ca1SSanjay Patel MadeChange |= foldBitcastShuf(I, TTI); 326a17f03bdSSanjay Patel } 327fc3cc8a4SSanjay Patel } 328a17f03bdSSanjay Patel 329a17f03bdSSanjay Patel // We're done with transforms, so remove dead instructions. 330a17f03bdSSanjay Patel if (MadeChange) 331a17f03bdSSanjay Patel for (BasicBlock &BB : F) 332a17f03bdSSanjay Patel SimplifyInstructionsInBlock(&BB); 333a17f03bdSSanjay Patel 334a17f03bdSSanjay Patel return MadeChange; 335a17f03bdSSanjay Patel } 336a17f03bdSSanjay Patel 337a17f03bdSSanjay Patel // Pass manager boilerplate below here. 338a17f03bdSSanjay Patel 339a17f03bdSSanjay Patel namespace { 340a17f03bdSSanjay Patel class VectorCombineLegacyPass : public FunctionPass { 341a17f03bdSSanjay Patel public: 342a17f03bdSSanjay Patel static char ID; 343a17f03bdSSanjay Patel VectorCombineLegacyPass() : FunctionPass(ID) { 344a17f03bdSSanjay Patel initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry()); 345a17f03bdSSanjay Patel } 346a17f03bdSSanjay Patel 347a17f03bdSSanjay Patel void getAnalysisUsage(AnalysisUsage &AU) const override { 348a17f03bdSSanjay Patel AU.addRequired<DominatorTreeWrapperPass>(); 349a17f03bdSSanjay Patel AU.addRequired<TargetTransformInfoWrapperPass>(); 350a17f03bdSSanjay Patel AU.setPreservesCFG(); 351a17f03bdSSanjay Patel AU.addPreserved<DominatorTreeWrapperPass>(); 352a17f03bdSSanjay Patel AU.addPreserved<GlobalsAAWrapperPass>(); 353a17f03bdSSanjay Patel FunctionPass::getAnalysisUsage(AU); 354a17f03bdSSanjay Patel } 355a17f03bdSSanjay Patel 356a17f03bdSSanjay Patel bool runOnFunction(Function &F) override { 357a17f03bdSSanjay Patel if (skipFunction(F)) 358a17f03bdSSanjay Patel return false; 359a17f03bdSSanjay Patel auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 360a17f03bdSSanjay Patel auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 361a17f03bdSSanjay Patel return runImpl(F, TTI, DT); 362a17f03bdSSanjay Patel } 363a17f03bdSSanjay Patel }; 364a17f03bdSSanjay Patel } // namespace 365a17f03bdSSanjay Patel 366a17f03bdSSanjay Patel char VectorCombineLegacyPass::ID = 0; 367a17f03bdSSanjay Patel INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", 368a17f03bdSSanjay Patel "Optimize scalar/vector ops", false, 369a17f03bdSSanjay Patel false) 370a17f03bdSSanjay Patel INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 371a17f03bdSSanjay Patel INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine", 372a17f03bdSSanjay Patel "Optimize scalar/vector ops", false, false) 373a17f03bdSSanjay Patel Pass *llvm::createVectorCombinePass() { 374a17f03bdSSanjay Patel return new VectorCombineLegacyPass(); 375a17f03bdSSanjay Patel } 376a17f03bdSSanjay Patel 377a17f03bdSSanjay Patel PreservedAnalyses VectorCombinePass::run(Function &F, 378a17f03bdSSanjay Patel FunctionAnalysisManager &FAM) { 379a17f03bdSSanjay Patel TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); 380a17f03bdSSanjay Patel DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); 381a17f03bdSSanjay Patel if (!runImpl(F, TTI, DT)) 382a17f03bdSSanjay Patel return PreservedAnalyses::all(); 383a17f03bdSSanjay Patel PreservedAnalyses PA; 384a17f03bdSSanjay Patel PA.preserveSet<CFGAnalyses>(); 385a17f03bdSSanjay Patel PA.preserve<GlobalsAA>(); 386a17f03bdSSanjay Patel return PA; 387a17f03bdSSanjay Patel } 388