1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Vectorize/VectorCombine.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/Analysis/VectorUtils.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Transforms/Utils/Local.h"
30 #include "llvm/Transforms/Vectorize.h"
31 
32 using namespace llvm;
33 using namespace llvm::PatternMatch;
34 
35 #define DEBUG_TYPE "vector-combine"
36 STATISTIC(NumVecCmp, "Number of vector compares formed");
37 STATISTIC(NumVecBO, "Number of vector binops formed");
38 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
39 STATISTIC(NumScalarBO, "Number of scalar binops formed");
40 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
41 
42 static cl::opt<bool> DisableVectorCombine(
43     "disable-vector-combine", cl::init(false), cl::Hidden,
44     cl::desc("Disable all vector combine transforms"));
45 
46 static cl::opt<bool> DisableBinopExtractShuffle(
47     "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
48     cl::desc("Disable binop extract to shuffle transforms"));
49 
50 class VectorCombine {
51 public:
52   VectorCombine(Function &F, const TargetTransformInfo &TTI,
53                 const DominatorTree &DT)
54       : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
55 
56   bool run();
57 
58 private:
59   Function &F;
60   IRBuilder<> Builder;
61   const TargetTransformInfo &TTI;
62   const DominatorTree &DT;
63 
64   bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
65                              unsigned Opcode,
66                              ExtractElementInst *&ConvertToShuffle,
67                              unsigned PreferredExtractIndex);
68   void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
69                      Instruction &I);
70   void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
71                        Instruction &I);
72   bool foldExtractExtract(Instruction &I);
73   bool foldBitcastShuf(Instruction &I);
74   bool scalarizeBinopOrCmp(Instruction &I);
75 };
76 
77 static void replaceValue(Value &Old, Value &New) {
78   Old.replaceAllUsesWith(&New);
79   New.takeName(&Old);
80 }
81 
82 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
83 /// vector operation(s) followed by extract. Return true if the existing
84 /// instructions are cheaper than a vector alternative. Otherwise, return false
85 /// and if one of the extracts should be transformed to a shufflevector, set
86 /// \p ConvertToShuffle to that extract instruction.
87 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
88                                           ExtractElementInst *Ext1,
89                                           unsigned Opcode,
90                                           ExtractElementInst *&ConvertToShuffle,
91                                           unsigned PreferredExtractIndex) {
92   assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
93          isa<ConstantInt>(Ext1->getOperand(1)) &&
94          "Expected constant extract indexes");
95   Type *ScalarTy = Ext0->getType();
96   auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
97   int ScalarOpCost, VectorOpCost;
98 
99   // Get cost estimates for scalar and vector versions of the operation.
100   bool IsBinOp = Instruction::isBinaryOp(Opcode);
101   if (IsBinOp) {
102     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
103     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
104   } else {
105     assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
106            "Expected a compare");
107     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
108                                           CmpInst::makeCmpResultType(ScalarTy));
109     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
110                                           CmpInst::makeCmpResultType(VecTy));
111   }
112 
113   // Get cost estimates for the extract elements. These costs will factor into
114   // both sequences.
115   unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
116   unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
117 
118   int Extract0Cost =
119       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
120   int Extract1Cost =
121       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
122 
123   // A more expensive extract will always be replaced by a splat shuffle.
124   // For example, if Ext0 is more expensive:
125   // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
126   // extelt (opcode (splat V0, Ext0), V1), Ext1
127   // TODO: Evaluate whether that always results in lowest cost. Alternatively,
128   //       check the cost of creating a broadcast shuffle and shuffling both
129   //       operands to element 0.
130   int CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
131 
132   // Extra uses of the extracts mean that we include those costs in the
133   // vector total because those instructions will not be eliminated.
134   int OldCost, NewCost;
135   if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
136     // Handle a special case. If the 2 extracts are identical, adjust the
137     // formulas to account for that. The extra use charge allows for either the
138     // CSE'd pattern or an unoptimized form with identical values:
139     // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
140     bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
141                                   : !Ext0->hasOneUse() || !Ext1->hasOneUse();
142     OldCost = CheapExtractCost + ScalarOpCost;
143     NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
144   } else {
145     // Handle the general case. Each extract is actually a different value:
146     // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
147     OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
148     NewCost = VectorOpCost + CheapExtractCost +
149               !Ext0->hasOneUse() * Extract0Cost +
150               !Ext1->hasOneUse() * Extract1Cost;
151   }
152 
153   if (Ext0Index == Ext1Index) {
154     // If the extract indexes are identical, no shuffle is needed.
155     ConvertToShuffle = nullptr;
156   } else {
157     if (IsBinOp && DisableBinopExtractShuffle)
158       return true;
159 
160     // If we are extracting from 2 different indexes, then one operand must be
161     // shuffled before performing the vector operation. The shuffle mask is
162     // undefined except for 1 lane that is being translated to the remaining
163     // extraction lane. Therefore, it is a splat shuffle. Ex:
164     // ShufMask = { undef, undef, 0, undef }
165     // TODO: The cost model has an option for a "broadcast" shuffle
166     //       (splat-from-element-0), but no option for a more general splat.
167     NewCost +=
168         TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
169 
170     // The more expensive extract will be replaced by a shuffle. If the costs
171     // are equal and there is a preferred extract index, shuffle the opposite
172     // operand. Otherwise, replace the extract with the higher index.
173     if (Extract0Cost > Extract1Cost)
174       ConvertToShuffle = Ext0;
175     else if (Extract1Cost > Extract0Cost)
176       ConvertToShuffle = Ext1;
177     else if (PreferredExtractIndex == Ext0Index)
178       ConvertToShuffle = Ext1;
179     else if (PreferredExtractIndex == Ext1Index)
180       ConvertToShuffle = Ext0;
181     else
182       ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1;
183   }
184 
185   // Aggressively form a vector op if the cost is equal because the transform
186   // may enable further optimization.
187   // Codegen can reverse this transform (scalarize) if it was not profitable.
188   return OldCost < NewCost;
189 }
190 
191 /// Create a shuffle that translates (shifts) 1 element from the input vector
192 /// to a new element location.
193 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
194                                  unsigned NewIndex, IRBuilder<> &Builder) {
195   // The shuffle mask is undefined except for 1 lane that is being translated
196   // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
197   // ShufMask = { 2, undef, undef, undef }
198   auto *VecTy = cast<FixedVectorType>(Vec->getType());
199   SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
200   ShufMask[NewIndex] = OldIndex;
201   Value *Undef = UndefValue::get(VecTy);
202   return Builder.CreateShuffleVector(Vec, Undef, ShufMask, "shift");
203 }
204 
205 /// Given an extract element instruction with constant index operand, shuffle
206 /// the source vector (shift the scalar element) to a NewIndex for extraction.
207 /// Return null if the input can be constant folded, so that we are not creating
208 /// unnecessary instructions.
209 static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
210                                             unsigned NewIndex,
211                                             IRBuilder<> &Builder) {
212   // If the extract can be constant-folded, this code is unsimplified. Defer
213   // to other passes to handle that.
214   Value *X = ExtElt->getVectorOperand();
215   Value *C = ExtElt->getIndexOperand();
216   assert(isa<ConstantInt>(C) && "Expected a constant index operand");
217   if (isa<Constant>(X))
218     return nullptr;
219 
220   Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
221                                    NewIndex, Builder);
222   return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
223 }
224 
225 /// Try to reduce extract element costs by converting scalar compares to vector
226 /// compares followed by extract.
227 /// cmp (ext0 V0, C), (ext1 V1, C)
228 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
229                                   ExtractElementInst *Ext1, Instruction &I) {
230   assert(isa<CmpInst>(&I) && "Expected a compare");
231   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
232              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
233          "Expected matching constant extract indexes");
234 
235   // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
236   ++NumVecCmp;
237   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
238   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
239   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
240   Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
241   replaceValue(I, *NewExt);
242 }
243 
244 /// Try to reduce extract element costs by converting scalar binops to vector
245 /// binops followed by extract.
246 /// bo (ext0 V0, C), (ext1 V1, C)
247 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
248                                     ExtractElementInst *Ext1, Instruction &I) {
249   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
250   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
251              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
252          "Expected matching constant extract indexes");
253 
254   // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
255   ++NumVecBO;
256   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
257   Value *VecBO =
258       Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
259 
260   // All IR flags are safe to back-propagate because any potential poison
261   // created in unused vector elements is discarded by the extract.
262   if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
263     VecBOInst->copyIRFlags(&I);
264 
265   Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
266   replaceValue(I, *NewExt);
267 }
268 
269 /// Match an instruction with extracted vector operands.
270 bool VectorCombine::foldExtractExtract(Instruction &I) {
271   // It is not safe to transform things like div, urem, etc. because we may
272   // create undefined behavior when executing those on unknown vector elements.
273   if (!isSafeToSpeculativelyExecute(&I))
274     return false;
275 
276   Instruction *I0, *I1;
277   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
278   if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
279       !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
280     return false;
281 
282   Value *V0, *V1;
283   uint64_t C0, C1;
284   if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
285       !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
286       V0->getType() != V1->getType())
287     return false;
288 
289   // If the scalar value 'I' is going to be re-inserted into a vector, then try
290   // to create an extract to that same element. The extract/insert can be
291   // reduced to a "select shuffle".
292   // TODO: If we add a larger pattern match that starts from an insert, this
293   //       probably becomes unnecessary.
294   auto *Ext0 = cast<ExtractElementInst>(I0);
295   auto *Ext1 = cast<ExtractElementInst>(I1);
296   uint64_t InsertIndex = std::numeric_limits<uint64_t>::max();
297   if (I.hasOneUse())
298     match(I.user_back(),
299           m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
300 
301   ExtractElementInst *ExtractToChange;
302   if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
303                             InsertIndex))
304     return false;
305 
306   if (ExtractToChange) {
307     unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
308     ExtractElementInst *NewExtract =
309         translateExtract(ExtractToChange, CheapExtractIdx, Builder);
310     if (!NewExtract)
311       return false;
312     if (ExtractToChange == Ext0)
313       Ext0 = NewExtract;
314     else
315       Ext1 = NewExtract;
316   }
317 
318   if (Pred != CmpInst::BAD_ICMP_PREDICATE)
319     foldExtExtCmp(Ext0, Ext1, I);
320   else
321     foldExtExtBinop(Ext0, Ext1, I);
322 
323   return true;
324 }
325 
326 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
327 /// destination type followed by shuffle. This can enable further transforms by
328 /// moving bitcasts or shuffles together.
329 bool VectorCombine::foldBitcastShuf(Instruction &I) {
330   Value *V;
331   ArrayRef<int> Mask;
332   if (!match(&I, m_BitCast(
333                      m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
334     return false;
335 
336   // Disallow non-vector casts and length-changing shuffles.
337   // TODO: We could allow any shuffle.
338   auto *DestTy = dyn_cast<VectorType>(I.getType());
339   auto *SrcTy = cast<VectorType>(V->getType());
340   if (!DestTy || I.getOperand(0)->getType() != SrcTy)
341     return false;
342 
343   // The new shuffle must not cost more than the old shuffle. The bitcast is
344   // moved ahead of the shuffle, so assume that it has the same cost as before.
345   if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
346       TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
347     return false;
348 
349   unsigned DestNumElts = DestTy->getNumElements();
350   unsigned SrcNumElts = SrcTy->getNumElements();
351   SmallVector<int, 16> NewMask;
352   if (SrcNumElts <= DestNumElts) {
353     // The bitcast is from wide to narrow/equal elements. The shuffle mask can
354     // always be expanded to the equivalent form choosing narrower elements.
355     assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
356     unsigned ScaleFactor = DestNumElts / SrcNumElts;
357     narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
358   } else {
359     // The bitcast is from narrow elements to wide elements. The shuffle mask
360     // must choose consecutive elements to allow casting first.
361     assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
362     unsigned ScaleFactor = SrcNumElts / DestNumElts;
363     if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
364       return false;
365   }
366   // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
367   ++NumShufOfBitcast;
368   Value *CastV = Builder.CreateBitCast(V, DestTy);
369   Value *Shuf =
370       Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
371   replaceValue(I, *Shuf);
372   return true;
373 }
374 
375 /// Match a vector binop or compare instruction with at least one inserted
376 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
377 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
378   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
379   Value *Ins0, *Ins1;
380   if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
381       !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
382     return false;
383 
384   // Do not convert the vector condition of a vector select into a scalar
385   // condition. That may cause problems for codegen because of differences in
386   // boolean formats and register-file transfers.
387   // TODO: Can we account for that in the cost model?
388   bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
389   if (IsCmp)
390     for (User *U : I.users())
391       if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
392         return false;
393 
394   // Match against one or both scalar values being inserted into constant
395   // vectors:
396   // vec_op VecC0, (inselt VecC1, V1, Index)
397   // vec_op (inselt VecC0, V0, Index), VecC1
398   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
399   // TODO: Deal with mismatched index constants and variable indexes?
400   Constant *VecC0 = nullptr, *VecC1 = nullptr;
401   Value *V0 = nullptr, *V1 = nullptr;
402   uint64_t Index0 = 0, Index1 = 0;
403   if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
404                                m_ConstantInt(Index0))) &&
405       !match(Ins0, m_Constant(VecC0)))
406     return false;
407   if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
408                                m_ConstantInt(Index1))) &&
409       !match(Ins1, m_Constant(VecC1)))
410     return false;
411 
412   bool IsConst0 = !V0;
413   bool IsConst1 = !V1;
414   if (IsConst0 && IsConst1)
415     return false;
416   if (!IsConst0 && !IsConst1 && Index0 != Index1)
417     return false;
418 
419   // Bail for single insertion if it is a load.
420   // TODO: Handle this once getVectorInstrCost can cost for load/stores.
421   auto *I0 = dyn_cast_or_null<Instruction>(V0);
422   auto *I1 = dyn_cast_or_null<Instruction>(V1);
423   if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
424       (IsConst1 && I0 && I0->mayReadFromMemory()))
425     return false;
426 
427   uint64_t Index = IsConst0 ? Index1 : Index0;
428   Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
429   Type *VecTy = I.getType();
430   assert(VecTy->isVectorTy() &&
431          (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
432          (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
433           ScalarTy->isPointerTy()) &&
434          "Unexpected types for insert element into binop or cmp");
435 
436   unsigned Opcode = I.getOpcode();
437   int ScalarOpCost, VectorOpCost;
438   if (IsCmp) {
439     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
440     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
441   } else {
442     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
443     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
444   }
445 
446   // Get cost estimate for the insert element. This cost will factor into
447   // both sequences.
448   int InsertCost =
449       TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
450   int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) +
451                 VectorOpCost;
452   int NewCost = ScalarOpCost + InsertCost +
453                 (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
454                 (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
455 
456   // We want to scalarize unless the vector variant actually has lower cost.
457   if (OldCost < NewCost)
458     return false;
459 
460   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
461   // inselt NewVecC, (scalar_op V0, V1), Index
462   if (IsCmp)
463     ++NumScalarCmp;
464   else
465     ++NumScalarBO;
466 
467   // For constant cases, extract the scalar element, this should constant fold.
468   if (IsConst0)
469     V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
470   if (IsConst1)
471     V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
472 
473   Value *Scalar =
474       IsCmp ? Builder.CreateCmp(Pred, V0, V1)
475             : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
476 
477   Scalar->setName(I.getName() + ".scalar");
478 
479   // All IR flags are safe to back-propagate. There is no potential for extra
480   // poison to be created by the scalar instruction.
481   if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
482     ScalarInst->copyIRFlags(&I);
483 
484   // Fold the vector constants in the original vectors into a new base vector.
485   Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
486                             : ConstantExpr::get(Opcode, VecC0, VecC1);
487   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
488   replaceValue(I, *Insert);
489   return true;
490 }
491 
492 /// This is the entry point for all transforms. Pass manager differences are
493 /// handled in the callers of this function.
494 bool VectorCombine::run() {
495   if (DisableVectorCombine)
496     return false;
497 
498   bool MadeChange = false;
499   for (BasicBlock &BB : F) {
500     // Ignore unreachable basic blocks.
501     if (!DT.isReachableFromEntry(&BB))
502       continue;
503     // Do not delete instructions under here and invalidate the iterator.
504     // Walk the block forwards to enable simple iterative chains of transforms.
505     // TODO: It could be more efficient to remove dead instructions
506     //       iteratively in this loop rather than waiting until the end.
507     for (Instruction &I : BB) {
508       if (isa<DbgInfoIntrinsic>(I))
509         continue;
510       Builder.SetInsertPoint(&I);
511       MadeChange |= foldExtractExtract(I);
512       MadeChange |= foldBitcastShuf(I);
513       MadeChange |= scalarizeBinopOrCmp(I);
514     }
515   }
516 
517   // We're done with transforms, so remove dead instructions.
518   if (MadeChange)
519     for (BasicBlock &BB : F)
520       SimplifyInstructionsInBlock(&BB);
521 
522   return MadeChange;
523 }
524 
525 // Pass manager boilerplate below here.
526 
527 namespace {
528 class VectorCombineLegacyPass : public FunctionPass {
529 public:
530   static char ID;
531   VectorCombineLegacyPass() : FunctionPass(ID) {
532     initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry());
533   }
534 
535   void getAnalysisUsage(AnalysisUsage &AU) const override {
536     AU.addRequired<DominatorTreeWrapperPass>();
537     AU.addRequired<TargetTransformInfoWrapperPass>();
538     AU.setPreservesCFG();
539     AU.addPreserved<DominatorTreeWrapperPass>();
540     AU.addPreserved<GlobalsAAWrapperPass>();
541     AU.addPreserved<AAResultsWrapperPass>();
542     AU.addPreserved<BasicAAWrapperPass>();
543     FunctionPass::getAnalysisUsage(AU);
544   }
545 
546   bool runOnFunction(Function &F) override {
547     if (skipFunction(F))
548       return false;
549     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
550     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
551     VectorCombine Combiner(F, TTI, DT);
552     return Combiner.run();
553   }
554 };
555 } // namespace
556 
557 char VectorCombineLegacyPass::ID = 0;
558 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
559                       "Optimize scalar/vector ops", false,
560                       false)
561 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
562 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
563                     "Optimize scalar/vector ops", false, false)
564 Pass *llvm::createVectorCombinePass() {
565   return new VectorCombineLegacyPass();
566 }
567 
568 PreservedAnalyses VectorCombinePass::run(Function &F,
569                                          FunctionAnalysisManager &FAM) {
570   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
571   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
572   VectorCombine Combiner(F, TTI, DT);
573   if (!Combiner.run())
574     return PreservedAnalyses::all();
575   PreservedAnalyses PA;
576   PA.preserveSet<CFGAnalyses>();
577   PA.preserve<GlobalsAA>();
578   PA.preserve<AAManager>();
579   PA.preserve<BasicAA>();
580   return PA;
581 }
582