1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Vectorize/VectorCombine.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/Analysis/VectorUtils.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Transforms/Utils/Local.h"
30 #include "llvm/Transforms/Vectorize.h"
31 
32 using namespace llvm;
33 using namespace llvm::PatternMatch;
34 
35 #define DEBUG_TYPE "vector-combine"
36 STATISTIC(NumVecCmp, "Number of vector compares formed");
37 STATISTIC(NumVecBO, "Number of vector binops formed");
38 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
39 STATISTIC(NumScalarBO, "Number of scalar binops formed");
40 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
41 
42 static cl::opt<bool> DisableVectorCombine(
43     "disable-vector-combine", cl::init(false), cl::Hidden,
44     cl::desc("Disable all vector combine transforms"));
45 
46 static cl::opt<bool> DisableBinopExtractShuffle(
47     "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
48     cl::desc("Disable binop extract to shuffle transforms"));
49 
50 static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
51 
52 class VectorCombine {
53 public:
54   VectorCombine(Function &F, const TargetTransformInfo &TTI,
55                 const DominatorTree &DT)
56       : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
57 
58   bool run();
59 
60 private:
61   Function &F;
62   IRBuilder<> Builder;
63   const TargetTransformInfo &TTI;
64   const DominatorTree &DT;
65 
66   bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
67                              unsigned Opcode,
68                              ExtractElementInst *&ConvertToShuffle,
69                              unsigned PreferredExtractIndex);
70   void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
71                      Instruction &I);
72   void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
73                        Instruction &I);
74   bool foldExtractExtract(Instruction &I);
75   bool foldBitcastShuf(Instruction &I);
76   bool scalarizeBinopOrCmp(Instruction &I);
77 };
78 
79 static void replaceValue(Value &Old, Value &New) {
80   Old.replaceAllUsesWith(&New);
81   New.takeName(&Old);
82 }
83 
84 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
85 /// vector operation(s) followed by extract. Return true if the existing
86 /// instructions are cheaper than a vector alternative. Otherwise, return false
87 /// and if one of the extracts should be transformed to a shufflevector, set
88 /// \p ConvertToShuffle to that extract instruction.
89 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
90                                           ExtractElementInst *Ext1,
91                                           unsigned Opcode,
92                                           ExtractElementInst *&ConvertToShuffle,
93                                           unsigned PreferredExtractIndex) {
94   assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
95          isa<ConstantInt>(Ext1->getOperand(1)) &&
96          "Expected constant extract indexes");
97   Type *ScalarTy = Ext0->getType();
98   auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
99   int ScalarOpCost, VectorOpCost;
100 
101   // Get cost estimates for scalar and vector versions of the operation.
102   bool IsBinOp = Instruction::isBinaryOp(Opcode);
103   if (IsBinOp) {
104     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
105     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
106   } else {
107     assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
108            "Expected a compare");
109     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
110                                           CmpInst::makeCmpResultType(ScalarTy));
111     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
112                                           CmpInst::makeCmpResultType(VecTy));
113   }
114 
115   // Get cost estimates for the extract elements. These costs will factor into
116   // both sequences.
117   unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
118   unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
119 
120   int Extract0Cost =
121       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
122   int Extract1Cost =
123       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
124 
125   // A more expensive extract will always be replaced by a splat shuffle.
126   // For example, if Ext0 is more expensive:
127   // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
128   // extelt (opcode (splat V0, Ext0), V1), Ext1
129   // TODO: Evaluate whether that always results in lowest cost. Alternatively,
130   //       check the cost of creating a broadcast shuffle and shuffling both
131   //       operands to element 0.
132   int CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
133 
134   // Extra uses of the extracts mean that we include those costs in the
135   // vector total because those instructions will not be eliminated.
136   int OldCost, NewCost;
137   if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
138     // Handle a special case. If the 2 extracts are identical, adjust the
139     // formulas to account for that. The extra use charge allows for either the
140     // CSE'd pattern or an unoptimized form with identical values:
141     // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
142     bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
143                                   : !Ext0->hasOneUse() || !Ext1->hasOneUse();
144     OldCost = CheapExtractCost + ScalarOpCost;
145     NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
146   } else {
147     // Handle the general case. Each extract is actually a different value:
148     // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
149     OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
150     NewCost = VectorOpCost + CheapExtractCost +
151               !Ext0->hasOneUse() * Extract0Cost +
152               !Ext1->hasOneUse() * Extract1Cost;
153   }
154 
155   if (Ext0Index == Ext1Index) {
156     // If the extract indexes are identical, no shuffle is needed.
157     ConvertToShuffle = nullptr;
158   } else {
159     if (IsBinOp && DisableBinopExtractShuffle)
160       return true;
161 
162     // If we are extracting from 2 different indexes, then one operand must be
163     // shuffled before performing the vector operation. The shuffle mask is
164     // undefined except for 1 lane that is being translated to the remaining
165     // extraction lane. Therefore, it is a splat shuffle. Ex:
166     // ShufMask = { undef, undef, 0, undef }
167     // TODO: The cost model has an option for a "broadcast" shuffle
168     //       (splat-from-element-0), but no option for a more general splat.
169     NewCost +=
170         TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
171 
172     // The more expensive extract will be replaced by a shuffle. If the costs
173     // are equal and there is a preferred extract index, shuffle the opposite
174     // operand. Otherwise, replace the extract with the higher index.
175     if (Extract0Cost > Extract1Cost)
176       ConvertToShuffle = Ext0;
177     else if (Extract1Cost > Extract0Cost)
178       ConvertToShuffle = Ext1;
179     else if (PreferredExtractIndex == Ext0Index)
180       ConvertToShuffle = Ext1;
181     else if (PreferredExtractIndex == Ext1Index)
182       ConvertToShuffle = Ext0;
183     else
184       ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1;
185   }
186 
187   // Aggressively form a vector op if the cost is equal because the transform
188   // may enable further optimization.
189   // Codegen can reverse this transform (scalarize) if it was not profitable.
190   return OldCost < NewCost;
191 }
192 
193 /// Create a shuffle that translates (shifts) 1 element from the input vector
194 /// to a new element location.
195 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
196                                  unsigned NewIndex, IRBuilder<> &Builder) {
197   // The shuffle mask is undefined except for 1 lane that is being translated
198   // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
199   // ShufMask = { 2, undef, undef, undef }
200   auto *VecTy = cast<FixedVectorType>(Vec->getType());
201   SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
202   ShufMask[NewIndex] = OldIndex;
203   Value *Undef = UndefValue::get(VecTy);
204   return Builder.CreateShuffleVector(Vec, Undef, ShufMask, "shift");
205 }
206 
207 /// Given an extract element instruction with constant index operand, shuffle
208 /// the source vector (shift the scalar element) to a NewIndex for extraction.
209 /// Return null if the input can be constant folded, so that we are not creating
210 /// unnecessary instructions.
211 static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
212                                             unsigned NewIndex,
213                                             IRBuilder<> &Builder) {
214   // If the extract can be constant-folded, this code is unsimplified. Defer
215   // to other passes to handle that.
216   Value *X = ExtElt->getVectorOperand();
217   Value *C = ExtElt->getIndexOperand();
218   assert(isa<ConstantInt>(C) && "Expected a constant index operand");
219   if (isa<Constant>(X))
220     return nullptr;
221 
222   Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
223                                    NewIndex, Builder);
224   return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
225 }
226 
227 /// Try to reduce extract element costs by converting scalar compares to vector
228 /// compares followed by extract.
229 /// cmp (ext0 V0, C), (ext1 V1, C)
230 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
231                                   ExtractElementInst *Ext1, Instruction &I) {
232   assert(isa<CmpInst>(&I) && "Expected a compare");
233   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
234              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
235          "Expected matching constant extract indexes");
236 
237   // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
238   ++NumVecCmp;
239   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
240   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
241   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
242   Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
243   replaceValue(I, *NewExt);
244 }
245 
246 /// Try to reduce extract element costs by converting scalar binops to vector
247 /// binops followed by extract.
248 /// bo (ext0 V0, C), (ext1 V1, C)
249 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
250                                     ExtractElementInst *Ext1, Instruction &I) {
251   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
252   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
253              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
254          "Expected matching constant extract indexes");
255 
256   // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
257   ++NumVecBO;
258   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
259   Value *VecBO =
260       Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
261 
262   // All IR flags are safe to back-propagate because any potential poison
263   // created in unused vector elements is discarded by the extract.
264   if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
265     VecBOInst->copyIRFlags(&I);
266 
267   Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
268   replaceValue(I, *NewExt);
269 }
270 
271 /// Match an instruction with extracted vector operands.
272 bool VectorCombine::foldExtractExtract(Instruction &I) {
273   // It is not safe to transform things like div, urem, etc. because we may
274   // create undefined behavior when executing those on unknown vector elements.
275   if (!isSafeToSpeculativelyExecute(&I))
276     return false;
277 
278   Instruction *I0, *I1;
279   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
280   if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
281       !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
282     return false;
283 
284   Value *V0, *V1;
285   uint64_t C0, C1;
286   if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
287       !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
288       V0->getType() != V1->getType())
289     return false;
290 
291   // If the scalar value 'I' is going to be re-inserted into a vector, then try
292   // to create an extract to that same element. The extract/insert can be
293   // reduced to a "select shuffle".
294   // TODO: If we add a larger pattern match that starts from an insert, this
295   //       probably becomes unnecessary.
296   auto *Ext0 = cast<ExtractElementInst>(I0);
297   auto *Ext1 = cast<ExtractElementInst>(I1);
298   uint64_t InsertIndex = InvalidIndex;
299   if (I.hasOneUse())
300     match(I.user_back(),
301           m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
302 
303   ExtractElementInst *ExtractToChange;
304   if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
305                             InsertIndex))
306     return false;
307 
308   if (ExtractToChange) {
309     unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
310     ExtractElementInst *NewExtract =
311         translateExtract(ExtractToChange, CheapExtractIdx, Builder);
312     if (!NewExtract)
313       return false;
314     if (ExtractToChange == Ext0)
315       Ext0 = NewExtract;
316     else
317       Ext1 = NewExtract;
318   }
319 
320   if (Pred != CmpInst::BAD_ICMP_PREDICATE)
321     foldExtExtCmp(Ext0, Ext1, I);
322   else
323     foldExtExtBinop(Ext0, Ext1, I);
324 
325   return true;
326 }
327 
328 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
329 /// destination type followed by shuffle. This can enable further transforms by
330 /// moving bitcasts or shuffles together.
331 bool VectorCombine::foldBitcastShuf(Instruction &I) {
332   Value *V;
333   ArrayRef<int> Mask;
334   if (!match(&I, m_BitCast(
335                      m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
336     return false;
337 
338   // Disallow non-vector casts and length-changing shuffles.
339   // TODO: We could allow any shuffle.
340   auto *DestTy = dyn_cast<VectorType>(I.getType());
341   auto *SrcTy = cast<VectorType>(V->getType());
342   if (!DestTy || I.getOperand(0)->getType() != SrcTy)
343     return false;
344 
345   // The new shuffle must not cost more than the old shuffle. The bitcast is
346   // moved ahead of the shuffle, so assume that it has the same cost as before.
347   if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
348       TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
349     return false;
350 
351   unsigned DestNumElts = DestTy->getNumElements();
352   unsigned SrcNumElts = SrcTy->getNumElements();
353   SmallVector<int, 16> NewMask;
354   if (SrcNumElts <= DestNumElts) {
355     // The bitcast is from wide to narrow/equal elements. The shuffle mask can
356     // always be expanded to the equivalent form choosing narrower elements.
357     assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
358     unsigned ScaleFactor = DestNumElts / SrcNumElts;
359     narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
360   } else {
361     // The bitcast is from narrow elements to wide elements. The shuffle mask
362     // must choose consecutive elements to allow casting first.
363     assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
364     unsigned ScaleFactor = SrcNumElts / DestNumElts;
365     if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
366       return false;
367   }
368   // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
369   ++NumShufOfBitcast;
370   Value *CastV = Builder.CreateBitCast(V, DestTy);
371   Value *Shuf =
372       Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
373   replaceValue(I, *Shuf);
374   return true;
375 }
376 
377 /// Match a vector binop or compare instruction with at least one inserted
378 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
379 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
380   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
381   Value *Ins0, *Ins1;
382   if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
383       !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
384     return false;
385 
386   // Do not convert the vector condition of a vector select into a scalar
387   // condition. That may cause problems for codegen because of differences in
388   // boolean formats and register-file transfers.
389   // TODO: Can we account for that in the cost model?
390   bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
391   if (IsCmp)
392     for (User *U : I.users())
393       if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
394         return false;
395 
396   // Match against one or both scalar values being inserted into constant
397   // vectors:
398   // vec_op VecC0, (inselt VecC1, V1, Index)
399   // vec_op (inselt VecC0, V0, Index), VecC1
400   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
401   // TODO: Deal with mismatched index constants and variable indexes?
402   Constant *VecC0 = nullptr, *VecC1 = nullptr;
403   Value *V0 = nullptr, *V1 = nullptr;
404   uint64_t Index0 = 0, Index1 = 0;
405   if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
406                                m_ConstantInt(Index0))) &&
407       !match(Ins0, m_Constant(VecC0)))
408     return false;
409   if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
410                                m_ConstantInt(Index1))) &&
411       !match(Ins1, m_Constant(VecC1)))
412     return false;
413 
414   bool IsConst0 = !V0;
415   bool IsConst1 = !V1;
416   if (IsConst0 && IsConst1)
417     return false;
418   if (!IsConst0 && !IsConst1 && Index0 != Index1)
419     return false;
420 
421   // Bail for single insertion if it is a load.
422   // TODO: Handle this once getVectorInstrCost can cost for load/stores.
423   auto *I0 = dyn_cast_or_null<Instruction>(V0);
424   auto *I1 = dyn_cast_or_null<Instruction>(V1);
425   if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
426       (IsConst1 && I0 && I0->mayReadFromMemory()))
427     return false;
428 
429   uint64_t Index = IsConst0 ? Index1 : Index0;
430   Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
431   Type *VecTy = I.getType();
432   assert(VecTy->isVectorTy() &&
433          (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
434          (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
435           ScalarTy->isPointerTy()) &&
436          "Unexpected types for insert element into binop or cmp");
437 
438   unsigned Opcode = I.getOpcode();
439   int ScalarOpCost, VectorOpCost;
440   if (IsCmp) {
441     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
442     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
443   } else {
444     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
445     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
446   }
447 
448   // Get cost estimate for the insert element. This cost will factor into
449   // both sequences.
450   int InsertCost =
451       TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
452   int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) +
453                 VectorOpCost;
454   int NewCost = ScalarOpCost + InsertCost +
455                 (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
456                 (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
457 
458   // We want to scalarize unless the vector variant actually has lower cost.
459   if (OldCost < NewCost)
460     return false;
461 
462   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
463   // inselt NewVecC, (scalar_op V0, V1), Index
464   if (IsCmp)
465     ++NumScalarCmp;
466   else
467     ++NumScalarBO;
468 
469   // For constant cases, extract the scalar element, this should constant fold.
470   if (IsConst0)
471     V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
472   if (IsConst1)
473     V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
474 
475   Value *Scalar =
476       IsCmp ? Builder.CreateCmp(Pred, V0, V1)
477             : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
478 
479   Scalar->setName(I.getName() + ".scalar");
480 
481   // All IR flags are safe to back-propagate. There is no potential for extra
482   // poison to be created by the scalar instruction.
483   if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
484     ScalarInst->copyIRFlags(&I);
485 
486   // Fold the vector constants in the original vectors into a new base vector.
487   Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
488                             : ConstantExpr::get(Opcode, VecC0, VecC1);
489   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
490   replaceValue(I, *Insert);
491   return true;
492 }
493 
494 /// This is the entry point for all transforms. Pass manager differences are
495 /// handled in the callers of this function.
496 bool VectorCombine::run() {
497   if (DisableVectorCombine)
498     return false;
499 
500   bool MadeChange = false;
501   for (BasicBlock &BB : F) {
502     // Ignore unreachable basic blocks.
503     if (!DT.isReachableFromEntry(&BB))
504       continue;
505     // Do not delete instructions under here and invalidate the iterator.
506     // Walk the block forwards to enable simple iterative chains of transforms.
507     // TODO: It could be more efficient to remove dead instructions
508     //       iteratively in this loop rather than waiting until the end.
509     for (Instruction &I : BB) {
510       if (isa<DbgInfoIntrinsic>(I))
511         continue;
512       Builder.SetInsertPoint(&I);
513       MadeChange |= foldExtractExtract(I);
514       MadeChange |= foldBitcastShuf(I);
515       MadeChange |= scalarizeBinopOrCmp(I);
516     }
517   }
518 
519   // We're done with transforms, so remove dead instructions.
520   if (MadeChange)
521     for (BasicBlock &BB : F)
522       SimplifyInstructionsInBlock(&BB);
523 
524   return MadeChange;
525 }
526 
527 // Pass manager boilerplate below here.
528 
529 namespace {
530 class VectorCombineLegacyPass : public FunctionPass {
531 public:
532   static char ID;
533   VectorCombineLegacyPass() : FunctionPass(ID) {
534     initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry());
535   }
536 
537   void getAnalysisUsage(AnalysisUsage &AU) const override {
538     AU.addRequired<DominatorTreeWrapperPass>();
539     AU.addRequired<TargetTransformInfoWrapperPass>();
540     AU.setPreservesCFG();
541     AU.addPreserved<DominatorTreeWrapperPass>();
542     AU.addPreserved<GlobalsAAWrapperPass>();
543     AU.addPreserved<AAResultsWrapperPass>();
544     AU.addPreserved<BasicAAWrapperPass>();
545     FunctionPass::getAnalysisUsage(AU);
546   }
547 
548   bool runOnFunction(Function &F) override {
549     if (skipFunction(F))
550       return false;
551     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
552     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
553     VectorCombine Combiner(F, TTI, DT);
554     return Combiner.run();
555   }
556 };
557 } // namespace
558 
559 char VectorCombineLegacyPass::ID = 0;
560 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
561                       "Optimize scalar/vector ops", false,
562                       false)
563 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
564 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
565                     "Optimize scalar/vector ops", false, false)
566 Pass *llvm::createVectorCombinePass() {
567   return new VectorCombineLegacyPass();
568 }
569 
570 PreservedAnalyses VectorCombinePass::run(Function &F,
571                                          FunctionAnalysisManager &FAM) {
572   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
573   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
574   VectorCombine Combiner(F, TTI, DT);
575   if (!Combiner.run())
576     return PreservedAnalyses::all();
577   PreservedAnalyses PA;
578   PA.preserveSet<CFGAnalyses>();
579   PA.preserve<GlobalsAA>();
580   PA.preserve<AAManager>();
581   PA.preserve<BasicAA>();
582   return PA;
583 }
584