1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Vectorize/VectorCombine.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/Analysis/VectorUtils.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Transforms/Utils/Local.h"
30 #include "llvm/Transforms/Vectorize.h"
31 
32 using namespace llvm;
33 using namespace llvm::PatternMatch;
34 
35 #define DEBUG_TYPE "vector-combine"
36 STATISTIC(NumVecCmp, "Number of vector compares formed");
37 STATISTIC(NumVecBO, "Number of vector binops formed");
38 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
39 STATISTIC(NumScalarBO, "Number of scalar binops formed");
40 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
41 
42 static cl::opt<bool> DisableVectorCombine(
43     "disable-vector-combine", cl::init(false), cl::Hidden,
44     cl::desc("Disable all vector combine transforms"));
45 
46 static cl::opt<bool> DisableBinopExtractShuffle(
47     "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
48     cl::desc("Disable binop extract to shuffle transforms"));
49 
50 class VectorCombine {
51 public:
52   VectorCombine(Function &F, const TargetTransformInfo &TTI,
53                 const DominatorTree &DT)
54       : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
55 
56   bool run();
57 
58 private:
59   Function &F;
60   IRBuilder<> Builder;
61   const TargetTransformInfo &TTI;
62   const DominatorTree &DT;
63 
64   bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
65                              unsigned Opcode,
66                              ExtractElementInst *&ConvertToShuffle,
67                              unsigned PreferredExtractIndex);
68   ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
69                                        unsigned NewIndex);
70   void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
71                      Instruction &I);
72   void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
73                        Instruction &I);
74   bool foldExtractExtract(Instruction &I);
75   bool foldBitcastShuf(Instruction &I);
76   bool scalarizeBinopOrCmp(Instruction &I);
77 };
78 
79 static void replaceValue(Value &Old, Value &New) {
80   Old.replaceAllUsesWith(&New);
81   New.takeName(&Old);
82 }
83 
84 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
85 /// vector operation(s) followed by extract. Return true if the existing
86 /// instructions are cheaper than a vector alternative. Otherwise, return false
87 /// and if one of the extracts should be transformed to a shufflevector, set
88 /// \p ConvertToShuffle to that extract instruction.
89 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
90                                           ExtractElementInst *Ext1,
91                                           unsigned Opcode,
92                                           ExtractElementInst *&ConvertToShuffle,
93                                           unsigned PreferredExtractIndex) {
94   assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
95          isa<ConstantInt>(Ext1->getOperand(1)) &&
96          "Expected constant extract indexes");
97   Type *ScalarTy = Ext0->getType();
98   auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
99   int ScalarOpCost, VectorOpCost;
100 
101   // Get cost estimates for scalar and vector versions of the operation.
102   bool IsBinOp = Instruction::isBinaryOp(Opcode);
103   if (IsBinOp) {
104     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
105     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
106   } else {
107     assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
108            "Expected a compare");
109     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
110                                           CmpInst::makeCmpResultType(ScalarTy));
111     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
112                                           CmpInst::makeCmpResultType(VecTy));
113   }
114 
115   // Get cost estimates for the extract elements. These costs will factor into
116   // both sequences.
117   unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
118   unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
119 
120   int Extract0Cost =
121       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
122   int Extract1Cost =
123       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
124 
125   // A more expensive extract will always be replaced by a splat shuffle.
126   // For example, if Ext0 is more expensive:
127   // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
128   // extelt (opcode (splat V0, Ext0), V1), Ext1
129   // TODO: Evaluate whether that always results in lowest cost. Alternatively,
130   //       check the cost of creating a broadcast shuffle and shuffling both
131   //       operands to element 0.
132   int CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
133 
134   // Extra uses of the extracts mean that we include those costs in the
135   // vector total because those instructions will not be eliminated.
136   int OldCost, NewCost;
137   if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
138     // Handle a special case. If the 2 extracts are identical, adjust the
139     // formulas to account for that. The extra use charge allows for either the
140     // CSE'd pattern or an unoptimized form with identical values:
141     // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
142     bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
143                                   : !Ext0->hasOneUse() || !Ext1->hasOneUse();
144     OldCost = CheapExtractCost + ScalarOpCost;
145     NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
146   } else {
147     // Handle the general case. Each extract is actually a different value:
148     // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
149     OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
150     NewCost = VectorOpCost + CheapExtractCost +
151               !Ext0->hasOneUse() * Extract0Cost +
152               !Ext1->hasOneUse() * Extract1Cost;
153   }
154 
155   if (Ext0Index == Ext1Index) {
156     // If the extract indexes are identical, no shuffle is needed.
157     ConvertToShuffle = nullptr;
158   } else {
159     if (IsBinOp && DisableBinopExtractShuffle)
160       return true;
161 
162     // If we are extracting from 2 different indexes, then one operand must be
163     // shuffled before performing the vector operation. The shuffle mask is
164     // undefined except for 1 lane that is being translated to the remaining
165     // extraction lane. Therefore, it is a splat shuffle. Ex:
166     // ShufMask = { undef, undef, 0, undef }
167     // TODO: The cost model has an option for a "broadcast" shuffle
168     //       (splat-from-element-0), but no option for a more general splat.
169     NewCost +=
170         TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
171 
172     // The more expensive extract will be replaced by a shuffle. If the costs
173     // are equal and there is a preferred extract index, shuffle the opposite
174     // operand. Otherwise, replace the extract with the higher index.
175     if (Extract0Cost > Extract1Cost)
176       ConvertToShuffle = Ext0;
177     else if (Extract1Cost > Extract0Cost)
178       ConvertToShuffle = Ext1;
179     else if (PreferredExtractIndex == Ext0Index)
180       ConvertToShuffle = Ext1;
181     else if (PreferredExtractIndex == Ext1Index)
182       ConvertToShuffle = Ext0;
183     else
184       ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1;
185   }
186 
187   // Aggressively form a vector op if the cost is equal because the transform
188   // may enable further optimization.
189   // Codegen can reverse this transform (scalarize) if it was not profitable.
190   return OldCost < NewCost;
191 }
192 
193 /// Given an extract element instruction with constant index operand, shuffle
194 /// the source vector (shift the scalar element) to a NewIndex for extraction.
195 /// Return null if the input can be constant folded, so that we are not creating
196 /// unnecessary instructions.
197 ExtractElementInst *VectorCombine::translateExtract(ExtractElementInst *ExtElt,
198                                                     unsigned NewIndex) {
199   // If the extract can be constant-folded, this code is unsimplified. Defer
200   // to other passes to handle that.
201   Value *X = ExtElt->getVectorOperand();
202   Value *C = ExtElt->getIndexOperand();
203   assert(isa<ConstantInt>(C) && "Expected a constant index operand");
204   if (isa<Constant>(X))
205     return nullptr;
206 
207   // The shuffle mask is undefined except for 1 lane that is being translated
208   // to the cheap extraction lane. Example:
209   // ShufMask = { 2, undef, undef, undef }
210   auto *VecTy = cast<FixedVectorType>(X->getType());
211   SmallVector<int, 32> Mask(VecTy->getNumElements(), -1);
212   Mask[NewIndex] = cast<ConstantInt>(C)->getZExtValue();
213 
214   // extelt X, C --> extelt (shuffle X), NewIndex
215   Value *Shuf =
216       Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask, "shift");
217   return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
218 }
219 
220 /// Try to reduce extract element costs by converting scalar compares to vector
221 /// compares followed by extract.
222 /// cmp (ext0 V0, C), (ext1 V1, C)
223 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
224                                   ExtractElementInst *Ext1, Instruction &I) {
225   assert(isa<CmpInst>(&I) && "Expected a compare");
226   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
227              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
228          "Expected matching constant extract indexes");
229 
230   // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
231   ++NumVecCmp;
232   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
233   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
234   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
235   Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
236   replaceValue(I, *NewExt);
237 }
238 
239 /// Try to reduce extract element costs by converting scalar binops to vector
240 /// binops followed by extract.
241 /// bo (ext0 V0, C), (ext1 V1, C)
242 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
243                                     ExtractElementInst *Ext1, Instruction &I) {
244   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
245   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
246              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
247          "Expected matching constant extract indexes");
248 
249   // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
250   ++NumVecBO;
251   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
252   Value *VecBO =
253       Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
254 
255   // All IR flags are safe to back-propagate because any potential poison
256   // created in unused vector elements is discarded by the extract.
257   if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
258     VecBOInst->copyIRFlags(&I);
259 
260   Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
261   replaceValue(I, *NewExt);
262 }
263 
264 /// Match an instruction with extracted vector operands.
265 bool VectorCombine::foldExtractExtract(Instruction &I) {
266   // It is not safe to transform things like div, urem, etc. because we may
267   // create undefined behavior when executing those on unknown vector elements.
268   if (!isSafeToSpeculativelyExecute(&I))
269     return false;
270 
271   Instruction *I0, *I1;
272   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
273   if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
274       !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
275     return false;
276 
277   Value *V0, *V1;
278   uint64_t C0, C1;
279   if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
280       !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
281       V0->getType() != V1->getType())
282     return false;
283 
284   // If the scalar value 'I' is going to be re-inserted into a vector, then try
285   // to create an extract to that same element. The extract/insert can be
286   // reduced to a "select shuffle".
287   // TODO: If we add a larger pattern match that starts from an insert, this
288   //       probably becomes unnecessary.
289   auto *Ext0 = cast<ExtractElementInst>(I0);
290   auto *Ext1 = cast<ExtractElementInst>(I1);
291   uint64_t InsertIndex = std::numeric_limits<uint64_t>::max();
292   if (I.hasOneUse())
293     match(I.user_back(),
294           m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
295 
296   ExtractElementInst *ExtractToChange;
297   if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
298                             InsertIndex))
299     return false;
300 
301   if (ExtractToChange) {
302     unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
303     ExtractElementInst *NewExtract =
304         translateExtract(ExtractToChange, CheapExtractIdx);
305     if (!NewExtract)
306       return false;
307     if (ExtractToChange == Ext0)
308       Ext0 = NewExtract;
309     else
310       Ext1 = NewExtract;
311   }
312 
313   if (Pred != CmpInst::BAD_ICMP_PREDICATE)
314     foldExtExtCmp(Ext0, Ext1, I);
315   else
316     foldExtExtBinop(Ext0, Ext1, I);
317 
318   return true;
319 }
320 
321 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
322 /// destination type followed by shuffle. This can enable further transforms by
323 /// moving bitcasts or shuffles together.
324 bool VectorCombine::foldBitcastShuf(Instruction &I) {
325   Value *V;
326   ArrayRef<int> Mask;
327   if (!match(&I, m_BitCast(
328                      m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
329     return false;
330 
331   // Disallow non-vector casts and length-changing shuffles.
332   // TODO: We could allow any shuffle.
333   auto *DestTy = dyn_cast<VectorType>(I.getType());
334   auto *SrcTy = cast<VectorType>(V->getType());
335   if (!DestTy || I.getOperand(0)->getType() != SrcTy)
336     return false;
337 
338   // The new shuffle must not cost more than the old shuffle. The bitcast is
339   // moved ahead of the shuffle, so assume that it has the same cost as before.
340   if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
341       TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
342     return false;
343 
344   unsigned DestNumElts = DestTy->getNumElements();
345   unsigned SrcNumElts = SrcTy->getNumElements();
346   SmallVector<int, 16> NewMask;
347   if (SrcNumElts <= DestNumElts) {
348     // The bitcast is from wide to narrow/equal elements. The shuffle mask can
349     // always be expanded to the equivalent form choosing narrower elements.
350     assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
351     unsigned ScaleFactor = DestNumElts / SrcNumElts;
352     narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
353   } else {
354     // The bitcast is from narrow elements to wide elements. The shuffle mask
355     // must choose consecutive elements to allow casting first.
356     assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
357     unsigned ScaleFactor = SrcNumElts / DestNumElts;
358     if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
359       return false;
360   }
361   // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
362   ++NumShufOfBitcast;
363   Value *CastV = Builder.CreateBitCast(V, DestTy);
364   Value *Shuf =
365       Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
366   replaceValue(I, *Shuf);
367   return true;
368 }
369 
370 /// Match a vector binop or compare instruction with at least one inserted
371 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
372 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
373   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
374   Value *Ins0, *Ins1;
375   if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
376       !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
377     return false;
378 
379   // Do not convert the vector condition of a vector select into a scalar
380   // condition. That may cause problems for codegen because of differences in
381   // boolean formats and register-file transfers.
382   // TODO: Can we account for that in the cost model?
383   bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
384   if (IsCmp)
385     for (User *U : I.users())
386       if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
387         return false;
388 
389   // Match against one or both scalar values being inserted into constant
390   // vectors:
391   // vec_op VecC0, (inselt VecC1, V1, Index)
392   // vec_op (inselt VecC0, V0, Index), VecC1
393   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
394   // TODO: Deal with mismatched index constants and variable indexes?
395   Constant *VecC0 = nullptr, *VecC1 = nullptr;
396   Value *V0 = nullptr, *V1 = nullptr;
397   uint64_t Index0 = 0, Index1 = 0;
398   if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
399                                m_ConstantInt(Index0))) &&
400       !match(Ins0, m_Constant(VecC0)))
401     return false;
402   if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
403                                m_ConstantInt(Index1))) &&
404       !match(Ins1, m_Constant(VecC1)))
405     return false;
406 
407   bool IsConst0 = !V0;
408   bool IsConst1 = !V1;
409   if (IsConst0 && IsConst1)
410     return false;
411   if (!IsConst0 && !IsConst1 && Index0 != Index1)
412     return false;
413 
414   // Bail for single insertion if it is a load.
415   // TODO: Handle this once getVectorInstrCost can cost for load/stores.
416   auto *I0 = dyn_cast_or_null<Instruction>(V0);
417   auto *I1 = dyn_cast_or_null<Instruction>(V1);
418   if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
419       (IsConst1 && I0 && I0->mayReadFromMemory()))
420     return false;
421 
422   uint64_t Index = IsConst0 ? Index1 : Index0;
423   Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
424   Type *VecTy = I.getType();
425   assert(VecTy->isVectorTy() &&
426          (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
427          (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
428           ScalarTy->isPointerTy()) &&
429          "Unexpected types for insert element into binop or cmp");
430 
431   unsigned Opcode = I.getOpcode();
432   int ScalarOpCost, VectorOpCost;
433   if (IsCmp) {
434     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
435     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
436   } else {
437     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
438     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
439   }
440 
441   // Get cost estimate for the insert element. This cost will factor into
442   // both sequences.
443   int InsertCost =
444       TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
445   int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) +
446                 VectorOpCost;
447   int NewCost = ScalarOpCost + InsertCost +
448                 (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
449                 (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
450 
451   // We want to scalarize unless the vector variant actually has lower cost.
452   if (OldCost < NewCost)
453     return false;
454 
455   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
456   // inselt NewVecC, (scalar_op V0, V1), Index
457   if (IsCmp)
458     ++NumScalarCmp;
459   else
460     ++NumScalarBO;
461 
462   // For constant cases, extract the scalar element, this should constant fold.
463   if (IsConst0)
464     V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
465   if (IsConst1)
466     V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
467 
468   Value *Scalar =
469       IsCmp ? Builder.CreateCmp(Pred, V0, V1)
470             : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
471 
472   Scalar->setName(I.getName() + ".scalar");
473 
474   // All IR flags are safe to back-propagate. There is no potential for extra
475   // poison to be created by the scalar instruction.
476   if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
477     ScalarInst->copyIRFlags(&I);
478 
479   // Fold the vector constants in the original vectors into a new base vector.
480   Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
481                             : ConstantExpr::get(Opcode, VecC0, VecC1);
482   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
483   replaceValue(I, *Insert);
484   return true;
485 }
486 
487 /// This is the entry point for all transforms. Pass manager differences are
488 /// handled in the callers of this function.
489 bool VectorCombine::run() {
490   if (DisableVectorCombine)
491     return false;
492 
493   bool MadeChange = false;
494   for (BasicBlock &BB : F) {
495     // Ignore unreachable basic blocks.
496     if (!DT.isReachableFromEntry(&BB))
497       continue;
498     // Do not delete instructions under here and invalidate the iterator.
499     // Walk the block forwards to enable simple iterative chains of transforms.
500     // TODO: It could be more efficient to remove dead instructions
501     //       iteratively in this loop rather than waiting until the end.
502     for (Instruction &I : BB) {
503       if (isa<DbgInfoIntrinsic>(I))
504         continue;
505       Builder.SetInsertPoint(&I);
506       MadeChange |= foldExtractExtract(I);
507       MadeChange |= foldBitcastShuf(I);
508       MadeChange |= scalarizeBinopOrCmp(I);
509     }
510   }
511 
512   // We're done with transforms, so remove dead instructions.
513   if (MadeChange)
514     for (BasicBlock &BB : F)
515       SimplifyInstructionsInBlock(&BB);
516 
517   return MadeChange;
518 }
519 
520 // Pass manager boilerplate below here.
521 
522 namespace {
523 class VectorCombineLegacyPass : public FunctionPass {
524 public:
525   static char ID;
526   VectorCombineLegacyPass() : FunctionPass(ID) {
527     initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry());
528   }
529 
530   void getAnalysisUsage(AnalysisUsage &AU) const override {
531     AU.addRequired<DominatorTreeWrapperPass>();
532     AU.addRequired<TargetTransformInfoWrapperPass>();
533     AU.setPreservesCFG();
534     AU.addPreserved<DominatorTreeWrapperPass>();
535     AU.addPreserved<GlobalsAAWrapperPass>();
536     AU.addPreserved<AAResultsWrapperPass>();
537     AU.addPreserved<BasicAAWrapperPass>();
538     FunctionPass::getAnalysisUsage(AU);
539   }
540 
541   bool runOnFunction(Function &F) override {
542     if (skipFunction(F))
543       return false;
544     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
545     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
546     VectorCombine Combiner(F, TTI, DT);
547     return Combiner.run();
548   }
549 };
550 } // namespace
551 
552 char VectorCombineLegacyPass::ID = 0;
553 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
554                       "Optimize scalar/vector ops", false,
555                       false)
556 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
557 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
558                     "Optimize scalar/vector ops", false, false)
559 Pass *llvm::createVectorCombinePass() {
560   return new VectorCombineLegacyPass();
561 }
562 
563 PreservedAnalyses VectorCombinePass::run(Function &F,
564                                          FunctionAnalysisManager &FAM) {
565   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
566   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
567   VectorCombine Combiner(F, TTI, DT);
568   if (!Combiner.run())
569     return PreservedAnalyses::all();
570   PreservedAnalyses PA;
571   PA.preserveSet<CFGAnalyses>();
572   PA.preserve<GlobalsAA>();
573   PA.preserve<AAManager>();
574   PA.preserve<BasicAA>();
575   return PA;
576 }
577