1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Vectorize/VectorCombine.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/Analysis/VectorUtils.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Transforms/Utils/Local.h"
30 #include "llvm/Transforms/Vectorize.h"
31 
32 using namespace llvm;
33 using namespace llvm::PatternMatch;
34 
35 #define DEBUG_TYPE "vector-combine"
36 STATISTIC(NumVecCmp, "Number of vector compares formed");
37 STATISTIC(NumVecBO, "Number of vector binops formed");
38 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
39 STATISTIC(NumScalarBO, "Number of scalar binops formed");
40 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
41 
42 static cl::opt<bool> DisableVectorCombine(
43     "disable-vector-combine", cl::init(false), cl::Hidden,
44     cl::desc("Disable all vector combine transforms"));
45 
46 static cl::opt<bool> DisableBinopExtractShuffle(
47     "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
48     cl::desc("Disable binop extract to shuffle transforms"));
49 
50 class VectorCombine {
51 public:
52   VectorCombine(Function &F, const TargetTransformInfo &TTI,
53                 const DominatorTree &DT)
54       : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
55 
56   bool run();
57 
58 private:
59   Function &F;
60   IRBuilder<> Builder;
61   const TargetTransformInfo &TTI;
62   const DominatorTree &DT;
63 
64   bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
65                              unsigned Opcode,
66                              ExtractElementInst *&ConvertToShuffle,
67                              unsigned PreferredExtractIndex);
68   ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
69                                        unsigned NewIndex);
70   void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
71                      Instruction &I);
72   void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
73                        Instruction &I);
74   bool foldExtractExtract(Instruction &I);
75   bool foldBitcastShuf(Instruction &I);
76   bool scalarizeBinopOrCmp(Instruction &I);
77 };
78 
79 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
80 /// vector operation(s) followed by extract. Return true if the existing
81 /// instructions are cheaper than a vector alternative. Otherwise, return false
82 /// and if one of the extracts should be transformed to a shufflevector, set
83 /// \p ConvertToShuffle to that extract instruction.
84 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
85                                           ExtractElementInst *Ext1,
86                                           unsigned Opcode,
87                                           ExtractElementInst *&ConvertToShuffle,
88                                           unsigned PreferredExtractIndex) {
89   assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
90          isa<ConstantInt>(Ext1->getOperand(1)) &&
91          "Expected constant extract indexes");
92   Type *ScalarTy = Ext0->getType();
93   auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
94   int ScalarOpCost, VectorOpCost;
95 
96   // Get cost estimates for scalar and vector versions of the operation.
97   bool IsBinOp = Instruction::isBinaryOp(Opcode);
98   if (IsBinOp) {
99     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
100     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
101   } else {
102     assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
103            "Expected a compare");
104     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
105                                           CmpInst::makeCmpResultType(ScalarTy));
106     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
107                                           CmpInst::makeCmpResultType(VecTy));
108   }
109 
110   // Get cost estimates for the extract elements. These costs will factor into
111   // both sequences.
112   unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
113   unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
114 
115   int Extract0Cost =
116       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
117   int Extract1Cost =
118       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
119 
120   // A more expensive extract will always be replaced by a splat shuffle.
121   // For example, if Ext0 is more expensive:
122   // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
123   // extelt (opcode (splat V0, Ext0), V1), Ext1
124   // TODO: Evaluate whether that always results in lowest cost. Alternatively,
125   //       check the cost of creating a broadcast shuffle and shuffling both
126   //       operands to element 0.
127   int CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
128 
129   // Extra uses of the extracts mean that we include those costs in the
130   // vector total because those instructions will not be eliminated.
131   int OldCost, NewCost;
132   if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
133     // Handle a special case. If the 2 extracts are identical, adjust the
134     // formulas to account for that. The extra use charge allows for either the
135     // CSE'd pattern or an unoptimized form with identical values:
136     // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
137     bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
138                                   : !Ext0->hasOneUse() || !Ext1->hasOneUse();
139     OldCost = CheapExtractCost + ScalarOpCost;
140     NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
141   } else {
142     // Handle the general case. Each extract is actually a different value:
143     // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
144     OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
145     NewCost = VectorOpCost + CheapExtractCost +
146               !Ext0->hasOneUse() * Extract0Cost +
147               !Ext1->hasOneUse() * Extract1Cost;
148   }
149 
150   if (Ext0Index == Ext1Index) {
151     // If the extract indexes are identical, no shuffle is needed.
152     ConvertToShuffle = nullptr;
153   } else {
154     if (IsBinOp && DisableBinopExtractShuffle)
155       return true;
156 
157     // If we are extracting from 2 different indexes, then one operand must be
158     // shuffled before performing the vector operation. The shuffle mask is
159     // undefined except for 1 lane that is being translated to the remaining
160     // extraction lane. Therefore, it is a splat shuffle. Ex:
161     // ShufMask = { undef, undef, 0, undef }
162     // TODO: The cost model has an option for a "broadcast" shuffle
163     //       (splat-from-element-0), but no option for a more general splat.
164     NewCost +=
165         TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
166 
167     // The more expensive extract will be replaced by a shuffle. If the costs
168     // are equal and there is a preferred extract index, shuffle the opposite
169     // operand. Otherwise, replace the extract with the higher index.
170     if (Extract0Cost > Extract1Cost)
171       ConvertToShuffle = Ext0;
172     else if (Extract1Cost > Extract0Cost)
173       ConvertToShuffle = Ext1;
174     else if (PreferredExtractIndex == Ext0Index)
175       ConvertToShuffle = Ext1;
176     else if (PreferredExtractIndex == Ext1Index)
177       ConvertToShuffle = Ext0;
178     else
179       ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1;
180   }
181 
182   // Aggressively form a vector op if the cost is equal because the transform
183   // may enable further optimization.
184   // Codegen can reverse this transform (scalarize) if it was not profitable.
185   return OldCost < NewCost;
186 }
187 
188 /// Given an extract element instruction with constant index operand, shuffle
189 /// the source vector (shift the scalar element) to a NewIndex for extraction.
190 /// Return null if the input can be constant folded, so that we are not creating
191 /// unnecessary instructions.
192 ExtractElementInst *VectorCombine::translateExtract(ExtractElementInst *ExtElt,
193                                                     unsigned NewIndex) {
194   // If the extract can be constant-folded, this code is unsimplified. Defer
195   // to other passes to handle that.
196   Value *X = ExtElt->getVectorOperand();
197   Value *C = ExtElt->getIndexOperand();
198   assert(isa<ConstantInt>(C) && "Expected a constant index operand");
199   if (isa<Constant>(X))
200     return nullptr;
201 
202   // The shuffle mask is undefined except for 1 lane that is being translated
203   // to the cheap extraction lane. Example:
204   // ShufMask = { 2, undef, undef, undef }
205   auto *VecTy = cast<FixedVectorType>(X->getType());
206   SmallVector<int, 32> Mask(VecTy->getNumElements(), -1);
207   Mask[NewIndex] = cast<ConstantInt>(C)->getZExtValue();
208 
209   // extelt X, C --> extelt (shuffle X), NewIndex
210   Value *Shuf =
211       Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask, "shift");
212   return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
213 }
214 
215 /// Try to reduce extract element costs by converting scalar compares to vector
216 /// compares followed by extract.
217 /// cmp (ext0 V0, C), (ext1 V1, C)
218 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
219                                   ExtractElementInst *Ext1, Instruction &I) {
220   assert(isa<CmpInst>(&I) && "Expected a compare");
221   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
222              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
223          "Expected matching constant extract indexes");
224 
225   // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
226   ++NumVecCmp;
227   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
228   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
229   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
230   Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
231   I.replaceAllUsesWith(NewExt);
232   NewExt->takeName(&I);
233 }
234 
235 /// Try to reduce extract element costs by converting scalar binops to vector
236 /// binops followed by extract.
237 /// bo (ext0 V0, C), (ext1 V1, C)
238 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
239                                     ExtractElementInst *Ext1, Instruction &I) {
240   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
241   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
242              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
243          "Expected matching constant extract indexes");
244 
245   // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
246   ++NumVecBO;
247   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
248   Value *VecBO =
249       Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
250 
251   // All IR flags are safe to back-propagate because any potential poison
252   // created in unused vector elements is discarded by the extract.
253   if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
254     VecBOInst->copyIRFlags(&I);
255 
256   Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
257   I.replaceAllUsesWith(NewExt);
258   NewExt->takeName(&I);
259 }
260 
261 /// Match an instruction with extracted vector operands.
262 bool VectorCombine::foldExtractExtract(Instruction &I) {
263   // It is not safe to transform things like div, urem, etc. because we may
264   // create undefined behavior when executing those on unknown vector elements.
265   if (!isSafeToSpeculativelyExecute(&I))
266     return false;
267 
268   Instruction *I0, *I1;
269   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
270   if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
271       !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
272     return false;
273 
274   Value *V0, *V1;
275   uint64_t C0, C1;
276   if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
277       !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
278       V0->getType() != V1->getType())
279     return false;
280 
281   // If the scalar value 'I' is going to be re-inserted into a vector, then try
282   // to create an extract to that same element. The extract/insert can be
283   // reduced to a "select shuffle".
284   // TODO: If we add a larger pattern match that starts from an insert, this
285   //       probably becomes unnecessary.
286   auto *Ext0 = cast<ExtractElementInst>(I0);
287   auto *Ext1 = cast<ExtractElementInst>(I1);
288   uint64_t InsertIndex = std::numeric_limits<uint64_t>::max();
289   if (I.hasOneUse())
290     match(I.user_back(),
291           m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
292 
293   ExtractElementInst *ExtractToChange;
294   if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
295                             InsertIndex))
296     return false;
297 
298   if (ExtractToChange) {
299     unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
300     ExtractElementInst *NewExtract =
301         translateExtract(ExtractToChange, CheapExtractIdx);
302     if (!NewExtract)
303       return false;
304     if (ExtractToChange == Ext0)
305       Ext0 = NewExtract;
306     else
307       Ext1 = NewExtract;
308   }
309 
310   if (Pred != CmpInst::BAD_ICMP_PREDICATE)
311     foldExtExtCmp(Ext0, Ext1, I);
312   else
313     foldExtExtBinop(Ext0, Ext1, I);
314 
315   return true;
316 }
317 
318 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
319 /// destination type followed by shuffle. This can enable further transforms by
320 /// moving bitcasts or shuffles together.
321 bool VectorCombine::foldBitcastShuf(Instruction &I) {
322   Value *V;
323   ArrayRef<int> Mask;
324   if (!match(&I, m_BitCast(
325                      m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
326     return false;
327 
328   // Disallow non-vector casts and length-changing shuffles.
329   // TODO: We could allow any shuffle.
330   auto *DestTy = dyn_cast<VectorType>(I.getType());
331   auto *SrcTy = cast<VectorType>(V->getType());
332   if (!DestTy || I.getOperand(0)->getType() != SrcTy)
333     return false;
334 
335   // The new shuffle must not cost more than the old shuffle. The bitcast is
336   // moved ahead of the shuffle, so assume that it has the same cost as before.
337   if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
338       TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
339     return false;
340 
341   unsigned DestNumElts = DestTy->getNumElements();
342   unsigned SrcNumElts = SrcTy->getNumElements();
343   SmallVector<int, 16> NewMask;
344   if (SrcNumElts <= DestNumElts) {
345     // The bitcast is from wide to narrow/equal elements. The shuffle mask can
346     // always be expanded to the equivalent form choosing narrower elements.
347     assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
348     unsigned ScaleFactor = DestNumElts / SrcNumElts;
349     narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
350   } else {
351     // The bitcast is from narrow elements to wide elements. The shuffle mask
352     // must choose consecutive elements to allow casting first.
353     assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
354     unsigned ScaleFactor = SrcNumElts / DestNumElts;
355     if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
356       return false;
357   }
358   // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
359   ++NumShufOfBitcast;
360   Value *CastV = Builder.CreateBitCast(V, DestTy);
361   Value *Shuf =
362       Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
363   I.replaceAllUsesWith(Shuf);
364   return true;
365 }
366 
367 /// Match a vector binop or compare instruction with at least one inserted
368 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
369 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
370   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
371   Value *Ins0, *Ins1;
372   if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
373       !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
374     return false;
375 
376   // Do not convert the vector condition of a vector select into a scalar
377   // condition. That may cause problems for codegen because of differences in
378   // boolean formats and register-file transfers.
379   // TODO: Can we account for that in the cost model?
380   bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
381   if (IsCmp)
382     for (User *U : I.users())
383       if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
384         return false;
385 
386   // Match against one or both scalar values being inserted into constant
387   // vectors:
388   // vec_op VecC0, (inselt VecC1, V1, Index)
389   // vec_op (inselt VecC0, V0, Index), VecC1
390   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
391   // TODO: Deal with mismatched index constants and variable indexes?
392   Constant *VecC0 = nullptr, *VecC1 = nullptr;
393   Value *V0 = nullptr, *V1 = nullptr;
394   uint64_t Index0 = 0, Index1 = 0;
395   if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
396                                m_ConstantInt(Index0))) &&
397       !match(Ins0, m_Constant(VecC0)))
398     return false;
399   if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
400                                m_ConstantInt(Index1))) &&
401       !match(Ins1, m_Constant(VecC1)))
402     return false;
403 
404   bool IsConst0 = !V0;
405   bool IsConst1 = !V1;
406   if (IsConst0 && IsConst1)
407     return false;
408   if (!IsConst0 && !IsConst1 && Index0 != Index1)
409     return false;
410 
411   // Bail for single insertion if it is a load.
412   // TODO: Handle this once getVectorInstrCost can cost for load/stores.
413   auto *I0 = dyn_cast_or_null<Instruction>(V0);
414   auto *I1 = dyn_cast_or_null<Instruction>(V1);
415   if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
416       (IsConst1 && I0 && I0->mayReadFromMemory()))
417     return false;
418 
419   uint64_t Index = IsConst0 ? Index1 : Index0;
420   Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
421   Type *VecTy = I.getType();
422   assert(VecTy->isVectorTy() &&
423          (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
424          (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
425           ScalarTy->isPointerTy()) &&
426          "Unexpected types for insert element into binop or cmp");
427 
428   unsigned Opcode = I.getOpcode();
429   int ScalarOpCost, VectorOpCost;
430   if (IsCmp) {
431     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
432     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
433   } else {
434     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
435     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
436   }
437 
438   // Get cost estimate for the insert element. This cost will factor into
439   // both sequences.
440   int InsertCost =
441       TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
442   int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) +
443                 VectorOpCost;
444   int NewCost = ScalarOpCost + InsertCost +
445                 (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
446                 (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
447 
448   // We want to scalarize unless the vector variant actually has lower cost.
449   if (OldCost < NewCost)
450     return false;
451 
452   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
453   // inselt NewVecC, (scalar_op V0, V1), Index
454   if (IsCmp)
455     ++NumScalarCmp;
456   else
457     ++NumScalarBO;
458 
459   // For constant cases, extract the scalar element, this should constant fold.
460   if (IsConst0)
461     V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
462   if (IsConst1)
463     V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
464 
465   Value *Scalar =
466       IsCmp ? Builder.CreateCmp(Pred, V0, V1)
467             : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
468 
469   Scalar->setName(I.getName() + ".scalar");
470 
471   // All IR flags are safe to back-propagate. There is no potential for extra
472   // poison to be created by the scalar instruction.
473   if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
474     ScalarInst->copyIRFlags(&I);
475 
476   // Fold the vector constants in the original vectors into a new base vector.
477   Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
478                             : ConstantExpr::get(Opcode, VecC0, VecC1);
479   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
480   I.replaceAllUsesWith(Insert);
481   Insert->takeName(&I);
482   return true;
483 }
484 
485 /// This is the entry point for all transforms. Pass manager differences are
486 /// handled in the callers of this function.
487 bool VectorCombine::run() {
488   if (DisableVectorCombine)
489     return false;
490 
491   bool MadeChange = false;
492   for (BasicBlock &BB : F) {
493     // Ignore unreachable basic blocks.
494     if (!DT.isReachableFromEntry(&BB))
495       continue;
496     // Do not delete instructions under here and invalidate the iterator.
497     // Walk the block forwards to enable simple iterative chains of transforms.
498     // TODO: It could be more efficient to remove dead instructions
499     //       iteratively in this loop rather than waiting until the end.
500     for (Instruction &I : BB) {
501       if (isa<DbgInfoIntrinsic>(I))
502         continue;
503       Builder.SetInsertPoint(&I);
504       MadeChange |= foldExtractExtract(I);
505       MadeChange |= foldBitcastShuf(I);
506       MadeChange |= scalarizeBinopOrCmp(I);
507     }
508   }
509 
510   // We're done with transforms, so remove dead instructions.
511   if (MadeChange)
512     for (BasicBlock &BB : F)
513       SimplifyInstructionsInBlock(&BB);
514 
515   return MadeChange;
516 }
517 
518 // Pass manager boilerplate below here.
519 
520 namespace {
521 class VectorCombineLegacyPass : public FunctionPass {
522 public:
523   static char ID;
524   VectorCombineLegacyPass() : FunctionPass(ID) {
525     initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry());
526   }
527 
528   void getAnalysisUsage(AnalysisUsage &AU) const override {
529     AU.addRequired<DominatorTreeWrapperPass>();
530     AU.addRequired<TargetTransformInfoWrapperPass>();
531     AU.setPreservesCFG();
532     AU.addPreserved<DominatorTreeWrapperPass>();
533     AU.addPreserved<GlobalsAAWrapperPass>();
534     AU.addPreserved<AAResultsWrapperPass>();
535     AU.addPreserved<BasicAAWrapperPass>();
536     FunctionPass::getAnalysisUsage(AU);
537   }
538 
539   bool runOnFunction(Function &F) override {
540     if (skipFunction(F))
541       return false;
542     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
543     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
544     VectorCombine Combiner(F, TTI, DT);
545     return Combiner.run();
546   }
547 };
548 } // namespace
549 
550 char VectorCombineLegacyPass::ID = 0;
551 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
552                       "Optimize scalar/vector ops", false,
553                       false)
554 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
555 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
556                     "Optimize scalar/vector ops", false, false)
557 Pass *llvm::createVectorCombinePass() {
558   return new VectorCombineLegacyPass();
559 }
560 
561 PreservedAnalyses VectorCombinePass::run(Function &F,
562                                          FunctionAnalysisManager &FAM) {
563   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
564   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
565   VectorCombine Combiner(F, TTI, DT);
566   if (!Combiner.run())
567     return PreservedAnalyses::all();
568   PreservedAnalyses PA;
569   PA.preserveSet<CFGAnalyses>();
570   PA.preserve<GlobalsAA>();
571   PA.preserve<AAManager>();
572   PA.preserve<BasicAA>();
573   return PA;
574 }
575