1a17f03bdSSanjay Patel //===------- VectorCombine.cpp - Optimize partial vector operations -------===// 2a17f03bdSSanjay Patel // 3a17f03bdSSanjay Patel // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a17f03bdSSanjay Patel // See https://llvm.org/LICENSE.txt for license information. 5a17f03bdSSanjay Patel // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a17f03bdSSanjay Patel // 7a17f03bdSSanjay Patel //===----------------------------------------------------------------------===// 8a17f03bdSSanjay Patel // 9a17f03bdSSanjay Patel // This pass optimizes scalar/vector interactions using target cost models. The 10a17f03bdSSanjay Patel // transforms implemented here may not fit in traditional loop-based or SLP 11a17f03bdSSanjay Patel // vectorization passes. 12a17f03bdSSanjay Patel // 13a17f03bdSSanjay Patel //===----------------------------------------------------------------------===// 14a17f03bdSSanjay Patel 15a17f03bdSSanjay Patel #include "llvm/Transforms/Vectorize/VectorCombine.h" 16a17f03bdSSanjay Patel #include "llvm/ADT/Statistic.h" 17a17f03bdSSanjay Patel #include "llvm/Analysis/GlobalsModRef.h" 18a17f03bdSSanjay Patel #include "llvm/Analysis/TargetTransformInfo.h" 1919b62b79SSanjay Patel #include "llvm/Analysis/ValueTracking.h" 20*b6050ca1SSanjay Patel #include "llvm/Analysis/VectorUtils.h" 21a17f03bdSSanjay Patel #include "llvm/IR/Dominators.h" 22a17f03bdSSanjay Patel #include "llvm/IR/Function.h" 23a17f03bdSSanjay Patel #include "llvm/IR/IRBuilder.h" 24a17f03bdSSanjay Patel #include "llvm/IR/PatternMatch.h" 25a17f03bdSSanjay Patel #include "llvm/InitializePasses.h" 26a17f03bdSSanjay Patel #include "llvm/Pass.h" 2725c6544fSSanjay Patel #include "llvm/Support/CommandLine.h" 28a17f03bdSSanjay Patel #include "llvm/Transforms/Vectorize.h" 29a17f03bdSSanjay Patel #include "llvm/Transforms/Utils/Local.h" 30a17f03bdSSanjay Patel 31a17f03bdSSanjay Patel using namespace llvm; 32a17f03bdSSanjay Patel using namespace llvm::PatternMatch; 33a17f03bdSSanjay Patel 34a17f03bdSSanjay Patel #define DEBUG_TYPE "vector-combine" 35a17f03bdSSanjay Patel STATISTIC(NumVecCmp, "Number of vector compares formed"); 3619b62b79SSanjay Patel STATISTIC(NumVecBO, "Number of vector binops formed"); 37a17f03bdSSanjay Patel 3825c6544fSSanjay Patel static cl::opt<bool> DisableVectorCombine( 3925c6544fSSanjay Patel "disable-vector-combine", cl::init(false), cl::Hidden, 4025c6544fSSanjay Patel cl::desc("Disable all vector combine transforms")); 4125c6544fSSanjay Patel 42a69158c1SSanjay Patel static cl::opt<bool> DisableBinopExtractShuffle( 43a69158c1SSanjay Patel "disable-binop-extract-shuffle", cl::init(false), cl::Hidden, 44a69158c1SSanjay Patel cl::desc("Disable binop extract to shuffle transforms")); 45a69158c1SSanjay Patel 46a69158c1SSanjay Patel 47a69158c1SSanjay Patel /// Compare the relative costs of 2 extracts followed by scalar operation vs. 48a69158c1SSanjay Patel /// vector operation(s) followed by extract. Return true if the existing 49a69158c1SSanjay Patel /// instructions are cheaper than a vector alternative. Otherwise, return false 50a69158c1SSanjay Patel /// and if one of the extracts should be transformed to a shufflevector, set 51a69158c1SSanjay Patel /// \p ConvertToShuffle to that extract instruction. 5234e34855SSanjay Patel static bool isExtractExtractCheap(Instruction *Ext0, Instruction *Ext1, 5334e34855SSanjay Patel unsigned Opcode, 54a69158c1SSanjay Patel const TargetTransformInfo &TTI, 55a69158c1SSanjay Patel Instruction *&ConvertToShuffle) { 564fa63fd4SAustin Kerbow assert(isa<ConstantInt>(Ext0->getOperand(1)) && 57a69158c1SSanjay Patel isa<ConstantInt>(Ext1->getOperand(1)) && 58a69158c1SSanjay Patel "Expected constant extract indexes"); 5934e34855SSanjay Patel Type *ScalarTy = Ext0->getType(); 6034e34855SSanjay Patel Type *VecTy = Ext0->getOperand(0)->getType(); 6134e34855SSanjay Patel int ScalarOpCost, VectorOpCost; 6234e34855SSanjay Patel 6334e34855SSanjay Patel // Get cost estimates for scalar and vector versions of the operation. 6434e34855SSanjay Patel bool IsBinOp = Instruction::isBinaryOp(Opcode); 6534e34855SSanjay Patel if (IsBinOp) { 6634e34855SSanjay Patel ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy); 6734e34855SSanjay Patel VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy); 6834e34855SSanjay Patel } else { 6934e34855SSanjay Patel assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 7034e34855SSanjay Patel "Expected a compare"); 7134e34855SSanjay Patel ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy, 7234e34855SSanjay Patel CmpInst::makeCmpResultType(ScalarTy)); 7334e34855SSanjay Patel VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy, 7434e34855SSanjay Patel CmpInst::makeCmpResultType(VecTy)); 7534e34855SSanjay Patel } 7634e34855SSanjay Patel 77a69158c1SSanjay Patel // Get cost estimates for the extract elements. These costs will factor into 7834e34855SSanjay Patel // both sequences. 79a69158c1SSanjay Patel unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue(); 80a69158c1SSanjay Patel unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue(); 81a69158c1SSanjay Patel 82a69158c1SSanjay Patel int Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, 83a69158c1SSanjay Patel VecTy, Ext0Index); 84a69158c1SSanjay Patel int Extract1Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, 85a69158c1SSanjay Patel VecTy, Ext1Index); 86a69158c1SSanjay Patel 87a69158c1SSanjay Patel // A more expensive extract will always be replaced by a splat shuffle. 88a69158c1SSanjay Patel // For example, if Ext0 is more expensive: 89a69158c1SSanjay Patel // opcode (extelt V0, Ext0), (ext V1, Ext1) --> 90a69158c1SSanjay Patel // extelt (opcode (splat V0, Ext0), V1), Ext1 91a69158c1SSanjay Patel // TODO: Evaluate whether that always results in lowest cost. Alternatively, 92a69158c1SSanjay Patel // check the cost of creating a broadcast shuffle and shuffling both 93a69158c1SSanjay Patel // operands to element 0. 94a69158c1SSanjay Patel int CheapExtractCost = std::min(Extract0Cost, Extract1Cost); 9534e34855SSanjay Patel 9634e34855SSanjay Patel // Extra uses of the extracts mean that we include those costs in the 9734e34855SSanjay Patel // vector total because those instructions will not be eliminated. 98e9c79a7aSSanjay Patel int OldCost, NewCost; 99a69158c1SSanjay Patel if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) { 100a69158c1SSanjay Patel // Handle a special case. If the 2 extracts are identical, adjust the 10134e34855SSanjay Patel // formulas to account for that. The extra use charge allows for either the 10234e34855SSanjay Patel // CSE'd pattern or an unoptimized form with identical values: 10334e34855SSanjay Patel // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C 10434e34855SSanjay Patel bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2) 10534e34855SSanjay Patel : !Ext0->hasOneUse() || !Ext1->hasOneUse(); 106a69158c1SSanjay Patel OldCost = CheapExtractCost + ScalarOpCost; 107a69158c1SSanjay Patel NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost; 10834e34855SSanjay Patel } else { 10934e34855SSanjay Patel // Handle the general case. Each extract is actually a different value: 110a69158c1SSanjay Patel // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C 111a69158c1SSanjay Patel OldCost = Extract0Cost + Extract1Cost + ScalarOpCost; 112a69158c1SSanjay Patel NewCost = VectorOpCost + CheapExtractCost + 113a69158c1SSanjay Patel !Ext0->hasOneUse() * Extract0Cost + 114a69158c1SSanjay Patel !Ext1->hasOneUse() * Extract1Cost; 11534e34855SSanjay Patel } 116a69158c1SSanjay Patel 117a69158c1SSanjay Patel if (Ext0Index == Ext1Index) { 118a69158c1SSanjay Patel // If the extract indexes are identical, no shuffle is needed. 119a69158c1SSanjay Patel ConvertToShuffle = nullptr; 120a69158c1SSanjay Patel } else { 121a69158c1SSanjay Patel if (IsBinOp && DisableBinopExtractShuffle) 122a69158c1SSanjay Patel return true; 123a69158c1SSanjay Patel 124a69158c1SSanjay Patel // If we are extracting from 2 different indexes, then one operand must be 125a69158c1SSanjay Patel // shuffled before performing the vector operation. The shuffle mask is 126a69158c1SSanjay Patel // undefined except for 1 lane that is being translated to the remaining 127a69158c1SSanjay Patel // extraction lane. Therefore, it is a splat shuffle. Ex: 128a69158c1SSanjay Patel // ShufMask = { undef, undef, 0, undef } 129a69158c1SSanjay Patel // TODO: The cost model has an option for a "broadcast" shuffle 130a69158c1SSanjay Patel // (splat-from-element-0), but no option for a more general splat. 131a69158c1SSanjay Patel NewCost += 132a69158c1SSanjay Patel TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy); 133a69158c1SSanjay Patel 134a69158c1SSanjay Patel // The more expensive extract will be replaced by a shuffle. If the extracts 135a69158c1SSanjay Patel // have the same cost, replace the extract with the higher index. 136a69158c1SSanjay Patel if (Extract0Cost > Extract1Cost) 137a69158c1SSanjay Patel ConvertToShuffle = Ext0; 138a69158c1SSanjay Patel else if (Extract1Cost > Extract0Cost) 139a69158c1SSanjay Patel ConvertToShuffle = Ext1; 140a69158c1SSanjay Patel else 141a69158c1SSanjay Patel ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1; 142a69158c1SSanjay Patel } 143a69158c1SSanjay Patel 14410ea01d8SSanjay Patel // Aggressively form a vector op if the cost is equal because the transform 14510ea01d8SSanjay Patel // may enable further optimization. 14610ea01d8SSanjay Patel // Codegen can reverse this transform (scalarize) if it was not profitable. 14710ea01d8SSanjay Patel return OldCost < NewCost; 14834e34855SSanjay Patel } 14934e34855SSanjay Patel 150fc445589SSanjay Patel /// Try to reduce extract element costs by converting scalar compares to vector 151fc445589SSanjay Patel /// compares followed by extract. 152e9c79a7aSSanjay Patel /// cmp (ext0 V0, C), (ext1 V1, C) 153e9c79a7aSSanjay Patel static void foldExtExtCmp(Instruction *Ext0, Instruction *Ext1, 154fc445589SSanjay Patel Instruction &I, const TargetTransformInfo &TTI) { 155fc445589SSanjay Patel assert(isa<CmpInst>(&I) && "Expected a compare"); 156a17f03bdSSanjay Patel 157a17f03bdSSanjay Patel // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C 158a17f03bdSSanjay Patel ++NumVecCmp; 159a17f03bdSSanjay Patel IRBuilder<> Builder(&I); 160fc445589SSanjay Patel CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate(); 161e9c79a7aSSanjay Patel Value *V0 = Ext0->getOperand(0), *V1 = Ext1->getOperand(0); 16234e34855SSanjay Patel Value *VecCmp = 16334e34855SSanjay Patel Ext0->getType()->isFloatingPointTy() ? Builder.CreateFCmp(Pred, V0, V1) 164a17f03bdSSanjay Patel : Builder.CreateICmp(Pred, V0, V1); 165fc445589SSanjay Patel Value *Extract = Builder.CreateExtractElement(VecCmp, Ext0->getOperand(1)); 166fc445589SSanjay Patel I.replaceAllUsesWith(Extract); 167a17f03bdSSanjay Patel } 168a17f03bdSSanjay Patel 16919b62b79SSanjay Patel /// Try to reduce extract element costs by converting scalar binops to vector 17019b62b79SSanjay Patel /// binops followed by extract. 171e9c79a7aSSanjay Patel /// bo (ext0 V0, C), (ext1 V1, C) 172e9c79a7aSSanjay Patel static void foldExtExtBinop(Instruction *Ext0, Instruction *Ext1, 173fc445589SSanjay Patel Instruction &I, const TargetTransformInfo &TTI) { 174fc445589SSanjay Patel assert(isa<BinaryOperator>(&I) && "Expected a binary operator"); 17519b62b79SSanjay Patel 17634e34855SSanjay Patel // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C 17719b62b79SSanjay Patel ++NumVecBO; 17819b62b79SSanjay Patel IRBuilder<> Builder(&I); 179e9c79a7aSSanjay Patel Value *V0 = Ext0->getOperand(0), *V1 = Ext1->getOperand(0); 180e9c79a7aSSanjay Patel Value *VecBO = 18134e34855SSanjay Patel Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1); 182e9c79a7aSSanjay Patel 18319b62b79SSanjay Patel // All IR flags are safe to back-propagate because any potential poison 18419b62b79SSanjay Patel // created in unused vector elements is discarded by the extract. 185e9c79a7aSSanjay Patel if (auto *VecBOInst = dyn_cast<Instruction>(VecBO)) 18619b62b79SSanjay Patel VecBOInst->copyIRFlags(&I); 187e9c79a7aSSanjay Patel 188e9c79a7aSSanjay Patel Value *Extract = Builder.CreateExtractElement(VecBO, Ext0->getOperand(1)); 18919b62b79SSanjay Patel I.replaceAllUsesWith(Extract); 19019b62b79SSanjay Patel } 19119b62b79SSanjay Patel 192fc445589SSanjay Patel /// Match an instruction with extracted vector operands. 193fc445589SSanjay Patel static bool foldExtractExtract(Instruction &I, const TargetTransformInfo &TTI) { 194e9c79a7aSSanjay Patel // It is not safe to transform things like div, urem, etc. because we may 195e9c79a7aSSanjay Patel // create undefined behavior when executing those on unknown vector elements. 196e9c79a7aSSanjay Patel if (!isSafeToSpeculativelyExecute(&I)) 197e9c79a7aSSanjay Patel return false; 198e9c79a7aSSanjay Patel 199fc445589SSanjay Patel Instruction *Ext0, *Ext1; 200fc445589SSanjay Patel CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 201fc445589SSanjay Patel if (!match(&I, m_Cmp(Pred, m_Instruction(Ext0), m_Instruction(Ext1))) && 202fc445589SSanjay Patel !match(&I, m_BinOp(m_Instruction(Ext0), m_Instruction(Ext1)))) 203fc445589SSanjay Patel return false; 204fc445589SSanjay Patel 205fc445589SSanjay Patel Value *V0, *V1; 206fc445589SSanjay Patel uint64_t C0, C1; 207fc445589SSanjay Patel if (!match(Ext0, m_ExtractElement(m_Value(V0), m_ConstantInt(C0))) || 208fc445589SSanjay Patel !match(Ext1, m_ExtractElement(m_Value(V1), m_ConstantInt(C1))) || 209fc445589SSanjay Patel V0->getType() != V1->getType()) 210fc445589SSanjay Patel return false; 211fc445589SSanjay Patel 212a69158c1SSanjay Patel Instruction *ConvertToShuffle; 213a69158c1SSanjay Patel if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), TTI, ConvertToShuffle)) 214fc445589SSanjay Patel return false; 215e9c79a7aSSanjay Patel 216a69158c1SSanjay Patel if (ConvertToShuffle) { 217a69158c1SSanjay Patel // The shuffle mask is undefined except for 1 lane that is being translated 218a69158c1SSanjay Patel // to the cheap extraction lane. Example: 219a69158c1SSanjay Patel // ShufMask = { 2, undef, undef, undef } 220a69158c1SSanjay Patel uint64_t SplatIndex = ConvertToShuffle == Ext0 ? C0 : C1; 221a69158c1SSanjay Patel uint64_t CheapExtIndex = ConvertToShuffle == Ext0 ? C1 : C0; 222a69158c1SSanjay Patel Type *VecTy = V0->getType(); 223a69158c1SSanjay Patel Type *I32Ty = IntegerType::getInt32Ty(I.getContext()); 224a69158c1SSanjay Patel UndefValue *Undef = UndefValue::get(I32Ty); 225a69158c1SSanjay Patel SmallVector<Constant *, 32> ShufMask(VecTy->getVectorNumElements(), Undef); 226a69158c1SSanjay Patel ShufMask[CheapExtIndex] = ConstantInt::get(I32Ty, SplatIndex); 227a69158c1SSanjay Patel IRBuilder<> Builder(ConvertToShuffle); 228a69158c1SSanjay Patel 229a69158c1SSanjay Patel // extelt X, C --> extelt (splat X), C' 230a69158c1SSanjay Patel Value *Shuf = Builder.CreateShuffleVector(ConvertToShuffle->getOperand(0), 231a69158c1SSanjay Patel UndefValue::get(VecTy), 232a69158c1SSanjay Patel ConstantVector::get(ShufMask)); 233a69158c1SSanjay Patel Value *NewExt = Builder.CreateExtractElement(Shuf, CheapExtIndex); 234a69158c1SSanjay Patel if (ConvertToShuffle == Ext0) 235a69158c1SSanjay Patel Ext0 = cast<Instruction>(NewExt); 236a69158c1SSanjay Patel else 237a69158c1SSanjay Patel Ext1 = cast<Instruction>(NewExt); 238a69158c1SSanjay Patel } 239e9c79a7aSSanjay Patel 240e9c79a7aSSanjay Patel if (Pred != CmpInst::BAD_ICMP_PREDICATE) 241e9c79a7aSSanjay Patel foldExtExtCmp(Ext0, Ext1, I, TTI); 242e9c79a7aSSanjay Patel else 243e9c79a7aSSanjay Patel foldExtExtBinop(Ext0, Ext1, I, TTI); 244e9c79a7aSSanjay Patel 245e9c79a7aSSanjay Patel return true; 246fc445589SSanjay Patel } 247fc445589SSanjay Patel 248*b6050ca1SSanjay Patel /// If this is a bitcast to narrow elements from a shuffle of wider elements, 249*b6050ca1SSanjay Patel /// try to bitcast the source vector to the narrow type followed by shuffle. 250*b6050ca1SSanjay Patel /// This can enable further transforms by moving bitcasts or shuffles together. 251*b6050ca1SSanjay Patel static bool foldBitcastShuf(Instruction &I, const TargetTransformInfo &TTI) { 252*b6050ca1SSanjay Patel Value *V; 253*b6050ca1SSanjay Patel ArrayRef<int> Mask; 254*b6050ca1SSanjay Patel if (!match(&I, m_BitCast(m_OneUse(m_ShuffleVector(m_Value(V), m_Undef(), 255*b6050ca1SSanjay Patel m_Mask(Mask)))))) 256*b6050ca1SSanjay Patel return false; 257*b6050ca1SSanjay Patel 258*b6050ca1SSanjay Patel Type *DestTy = I.getType(); 259*b6050ca1SSanjay Patel Type *SrcTy = V->getType(); 260*b6050ca1SSanjay Patel if (!DestTy->isVectorTy() || I.getOperand(0)->getType() != SrcTy) 261*b6050ca1SSanjay Patel return false; 262*b6050ca1SSanjay Patel 263*b6050ca1SSanjay Patel // TODO: Handle bitcast from narrow element type to wide element type. 264*b6050ca1SSanjay Patel assert(SrcTy->isVectorTy() && "Shuffle of non-vector type?"); 265*b6050ca1SSanjay Patel unsigned DestNumElts = DestTy->getVectorNumElements(); 266*b6050ca1SSanjay Patel unsigned SrcNumElts = SrcTy->getVectorNumElements(); 267*b6050ca1SSanjay Patel if (SrcNumElts > DestNumElts) 268*b6050ca1SSanjay Patel return false; 269*b6050ca1SSanjay Patel 270*b6050ca1SSanjay Patel // The new shuffle must not cost more than the old shuffle. The bitcast is 271*b6050ca1SSanjay Patel // moved ahead of the shuffle, so assume that it has the same cost as before. 272*b6050ca1SSanjay Patel if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) > 273*b6050ca1SSanjay Patel TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) 274*b6050ca1SSanjay Patel return false; 275*b6050ca1SSanjay Patel 276*b6050ca1SSanjay Patel // Bitcast the source vector and expand the shuffle mask to the equivalent for 277*b6050ca1SSanjay Patel // narrow elements. 278*b6050ca1SSanjay Patel // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' 279*b6050ca1SSanjay Patel IRBuilder<> Builder(&I); 280*b6050ca1SSanjay Patel Value *CastV = Builder.CreateBitCast(V, DestTy); 281*b6050ca1SSanjay Patel SmallVector<int, 16> NewMask; 282*b6050ca1SSanjay Patel assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask"); 283*b6050ca1SSanjay Patel unsigned ScaleFactor = DestNumElts / SrcNumElts; 284*b6050ca1SSanjay Patel scaleShuffleMask(ScaleFactor, Mask, NewMask); 285*b6050ca1SSanjay Patel Value *Shuf = Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), 286*b6050ca1SSanjay Patel NewMask); 287*b6050ca1SSanjay Patel I.replaceAllUsesWith(Shuf); 288*b6050ca1SSanjay Patel return true; 289*b6050ca1SSanjay Patel } 290*b6050ca1SSanjay Patel 291a17f03bdSSanjay Patel /// This is the entry point for all transforms. Pass manager differences are 292a17f03bdSSanjay Patel /// handled in the callers of this function. 293a17f03bdSSanjay Patel static bool runImpl(Function &F, const TargetTransformInfo &TTI, 294a17f03bdSSanjay Patel const DominatorTree &DT) { 29525c6544fSSanjay Patel if (DisableVectorCombine) 29625c6544fSSanjay Patel return false; 29725c6544fSSanjay Patel 298a17f03bdSSanjay Patel bool MadeChange = false; 299a17f03bdSSanjay Patel for (BasicBlock &BB : F) { 300a17f03bdSSanjay Patel // Ignore unreachable basic blocks. 301a17f03bdSSanjay Patel if (!DT.isReachableFromEntry(&BB)) 302a17f03bdSSanjay Patel continue; 303a17f03bdSSanjay Patel // Do not delete instructions under here and invalidate the iterator. 304a17f03bdSSanjay Patel // Walk the block backwards for efficiency. We're matching a chain of 305a17f03bdSSanjay Patel // use->defs, so we're more likely to succeed by starting from the bottom. 306a17f03bdSSanjay Patel // TODO: It could be more efficient to remove dead instructions 307a17f03bdSSanjay Patel // iteratively in this loop rather than waiting until the end. 308fc3cc8a4SSanjay Patel for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { 309fc3cc8a4SSanjay Patel if (isa<DbgInfoIntrinsic>(I)) 310fc3cc8a4SSanjay Patel continue; 311fc445589SSanjay Patel MadeChange |= foldExtractExtract(I, TTI); 312*b6050ca1SSanjay Patel MadeChange |= foldBitcastShuf(I, TTI); 313a17f03bdSSanjay Patel } 314fc3cc8a4SSanjay Patel } 315a17f03bdSSanjay Patel 316a17f03bdSSanjay Patel // We're done with transforms, so remove dead instructions. 317a17f03bdSSanjay Patel if (MadeChange) 318a17f03bdSSanjay Patel for (BasicBlock &BB : F) 319a17f03bdSSanjay Patel SimplifyInstructionsInBlock(&BB); 320a17f03bdSSanjay Patel 321a17f03bdSSanjay Patel return MadeChange; 322a17f03bdSSanjay Patel } 323a17f03bdSSanjay Patel 324a17f03bdSSanjay Patel // Pass manager boilerplate below here. 325a17f03bdSSanjay Patel 326a17f03bdSSanjay Patel namespace { 327a17f03bdSSanjay Patel class VectorCombineLegacyPass : public FunctionPass { 328a17f03bdSSanjay Patel public: 329a17f03bdSSanjay Patel static char ID; 330a17f03bdSSanjay Patel VectorCombineLegacyPass() : FunctionPass(ID) { 331a17f03bdSSanjay Patel initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry()); 332a17f03bdSSanjay Patel } 333a17f03bdSSanjay Patel 334a17f03bdSSanjay Patel void getAnalysisUsage(AnalysisUsage &AU) const override { 335a17f03bdSSanjay Patel AU.addRequired<DominatorTreeWrapperPass>(); 336a17f03bdSSanjay Patel AU.addRequired<TargetTransformInfoWrapperPass>(); 337a17f03bdSSanjay Patel AU.setPreservesCFG(); 338a17f03bdSSanjay Patel AU.addPreserved<DominatorTreeWrapperPass>(); 339a17f03bdSSanjay Patel AU.addPreserved<GlobalsAAWrapperPass>(); 340a17f03bdSSanjay Patel FunctionPass::getAnalysisUsage(AU); 341a17f03bdSSanjay Patel } 342a17f03bdSSanjay Patel 343a17f03bdSSanjay Patel bool runOnFunction(Function &F) override { 344a17f03bdSSanjay Patel if (skipFunction(F)) 345a17f03bdSSanjay Patel return false; 346a17f03bdSSanjay Patel auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 347a17f03bdSSanjay Patel auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 348a17f03bdSSanjay Patel return runImpl(F, TTI, DT); 349a17f03bdSSanjay Patel } 350a17f03bdSSanjay Patel }; 351a17f03bdSSanjay Patel } // namespace 352a17f03bdSSanjay Patel 353a17f03bdSSanjay Patel char VectorCombineLegacyPass::ID = 0; 354a17f03bdSSanjay Patel INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", 355a17f03bdSSanjay Patel "Optimize scalar/vector ops", false, 356a17f03bdSSanjay Patel false) 357a17f03bdSSanjay Patel INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 358a17f03bdSSanjay Patel INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine", 359a17f03bdSSanjay Patel "Optimize scalar/vector ops", false, false) 360a17f03bdSSanjay Patel Pass *llvm::createVectorCombinePass() { 361a17f03bdSSanjay Patel return new VectorCombineLegacyPass(); 362a17f03bdSSanjay Patel } 363a17f03bdSSanjay Patel 364a17f03bdSSanjay Patel PreservedAnalyses VectorCombinePass::run(Function &F, 365a17f03bdSSanjay Patel FunctionAnalysisManager &FAM) { 366a17f03bdSSanjay Patel TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); 367a17f03bdSSanjay Patel DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); 368a17f03bdSSanjay Patel if (!runImpl(F, TTI, DT)) 369a17f03bdSSanjay Patel return PreservedAnalyses::all(); 370a17f03bdSSanjay Patel PreservedAnalyses PA; 371a17f03bdSSanjay Patel PA.preserveSet<CFGAnalyses>(); 372a17f03bdSSanjay Patel PA.preserve<GlobalsAA>(); 373a17f03bdSSanjay Patel return PA; 374a17f03bdSSanjay Patel } 375