10b57cec5SDimitry Andric //===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements the Bottom Up SLP vectorizer. It detects consecutive
100b57cec5SDimitry Andric // stores that can be put together into vector-stores. Next, it attempts to
110b57cec5SDimitry Andric // construct vectorizable tree using the use-def chains. If a profitable tree
120b57cec5SDimitry Andric // was found, the SLP vectorizer performs vectorization on the tree.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // The pass is inspired by the work described in the paper:
150b57cec5SDimitry Andric //  "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
200b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
210b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
220b57cec5SDimitry Andric #include "llvm/ADT/Optional.h"
230b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
255f7ddb14SDimitry Andric #include "llvm/ADT/SetOperations.h"
260b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
27480093f4SDimitry Andric #include "llvm/ADT/SmallBitVector.h"
280b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
290b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
30af732203SDimitry Andric #include "llvm/ADT/SmallString.h"
310b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
320b57cec5SDimitry Andric #include "llvm/ADT/iterator.h"
330b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
340b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
35af732203SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
360b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
370b57cec5SDimitry Andric #include "llvm/Analysis/DemandedBits.h"
380b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
39af732203SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
400b57cec5SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h"
410b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
420b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
430b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
440b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
450b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
460b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
470b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
480b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
490b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
500b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
510b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
520b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
530b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
540b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
550b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
560b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
570b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
580b57cec5SDimitry Andric #include "llvm/IR/Function.h"
590b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
600b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
610b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
620b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
630b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
640b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
650b57cec5SDimitry Andric #include "llvm/IR/Module.h"
660b57cec5SDimitry Andric #include "llvm/IR/NoFolder.h"
670b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
680b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
690b57cec5SDimitry Andric #include "llvm/IR/Type.h"
700b57cec5SDimitry Andric #include "llvm/IR/Use.h"
710b57cec5SDimitry Andric #include "llvm/IR/User.h"
720b57cec5SDimitry Andric #include "llvm/IR/Value.h"
730b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h"
740b57cec5SDimitry Andric #include "llvm/IR/Verifier.h"
75480093f4SDimitry Andric #include "llvm/InitializePasses.h"
760b57cec5SDimitry Andric #include "llvm/Pass.h"
770b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
780b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
790b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
800b57cec5SDimitry Andric #include "llvm/Support/DOTGraphTraits.h"
810b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
820b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
830b57cec5SDimitry Andric #include "llvm/Support/GraphWriter.h"
84af732203SDimitry Andric #include "llvm/Support/InstructionCost.h"
850b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
860b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
870b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
885ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/InjectTLIMappings.h"
890b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
900b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize.h"
910b57cec5SDimitry Andric #include <algorithm>
920b57cec5SDimitry Andric #include <cassert>
930b57cec5SDimitry Andric #include <cstdint>
940b57cec5SDimitry Andric #include <iterator>
950b57cec5SDimitry Andric #include <memory>
960b57cec5SDimitry Andric #include <set>
970b57cec5SDimitry Andric #include <string>
980b57cec5SDimitry Andric #include <tuple>
990b57cec5SDimitry Andric #include <utility>
1000b57cec5SDimitry Andric #include <vector>
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric using namespace llvm;
1030b57cec5SDimitry Andric using namespace llvm::PatternMatch;
1040b57cec5SDimitry Andric using namespace slpvectorizer;
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric #define SV_NAME "slp-vectorizer"
1070b57cec5SDimitry Andric #define DEBUG_TYPE "SLP"
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
1100b57cec5SDimitry Andric 
1115ffd83dbSDimitry Andric cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
1120b57cec5SDimitry Andric                                   cl::desc("Run the SLP vectorization passes"));
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric static cl::opt<int>
1150b57cec5SDimitry Andric     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
1160b57cec5SDimitry Andric                      cl::desc("Only vectorize if you gain more than this "
1170b57cec5SDimitry Andric                               "number "));
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric static cl::opt<bool>
1200b57cec5SDimitry Andric ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
1210b57cec5SDimitry Andric                    cl::desc("Attempt to vectorize horizontal reductions"));
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric static cl::opt<bool> ShouldStartVectorizeHorAtStore(
1240b57cec5SDimitry Andric     "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
1250b57cec5SDimitry Andric     cl::desc(
1260b57cec5SDimitry Andric         "Attempt to vectorize horizontal reductions feeding into a store"));
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric static cl::opt<int>
1290b57cec5SDimitry Andric MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
1300b57cec5SDimitry Andric     cl::desc("Attempt to vectorize for this register size in bits"));
1310b57cec5SDimitry Andric 
132af732203SDimitry Andric static cl::opt<unsigned>
133af732203SDimitry Andric MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
134af732203SDimitry Andric     cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
135af732203SDimitry Andric 
136480093f4SDimitry Andric static cl::opt<int>
137480093f4SDimitry Andric MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
138480093f4SDimitry Andric     cl::desc("Maximum depth of the lookup for consecutive stores."));
139480093f4SDimitry Andric 
1400b57cec5SDimitry Andric /// Limits the size of scheduling regions in a block.
1410b57cec5SDimitry Andric /// It avoid long compile times for _very_ large blocks where vector
1420b57cec5SDimitry Andric /// instructions are spread over a wide range.
1430b57cec5SDimitry Andric /// This limit is way higher than needed by real-world functions.
1440b57cec5SDimitry Andric static cl::opt<int>
1450b57cec5SDimitry Andric ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
1460b57cec5SDimitry Andric     cl::desc("Limit the size of the SLP scheduling region per block"));
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric static cl::opt<int> MinVectorRegSizeOption(
1490b57cec5SDimitry Andric     "slp-min-reg-size", cl::init(128), cl::Hidden,
1500b57cec5SDimitry Andric     cl::desc("Attempt to vectorize for this register size in bits"));
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric static cl::opt<unsigned> RecursionMaxDepth(
1530b57cec5SDimitry Andric     "slp-recursion-max-depth", cl::init(12), cl::Hidden,
1540b57cec5SDimitry Andric     cl::desc("Limit the recursion depth when building a vectorizable tree"));
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric static cl::opt<unsigned> MinTreeSize(
1570b57cec5SDimitry Andric     "slp-min-tree-size", cl::init(3), cl::Hidden,
1580b57cec5SDimitry Andric     cl::desc("Only vectorize small trees if they are fully vectorizable"));
1590b57cec5SDimitry Andric 
160480093f4SDimitry Andric // The maximum depth that the look-ahead score heuristic will explore.
161480093f4SDimitry Andric // The higher this value, the higher the compilation time overhead.
162480093f4SDimitry Andric static cl::opt<int> LookAheadMaxDepth(
163480093f4SDimitry Andric     "slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
164480093f4SDimitry Andric     cl::desc("The maximum look-ahead depth for operand reordering scores"));
165480093f4SDimitry Andric 
166480093f4SDimitry Andric // The Look-ahead heuristic goes through the users of the bundle to calculate
167480093f4SDimitry Andric // the users cost in getExternalUsesCost(). To avoid compilation time increase
168480093f4SDimitry Andric // we limit the number of users visited to this value.
169480093f4SDimitry Andric static cl::opt<unsigned> LookAheadUsersBudget(
170480093f4SDimitry Andric     "slp-look-ahead-users-budget", cl::init(2), cl::Hidden,
171480093f4SDimitry Andric     cl::desc("The maximum number of users to visit while visiting the "
172480093f4SDimitry Andric              "predecessors. This prevents compilation time increase."));
173480093f4SDimitry Andric 
1740b57cec5SDimitry Andric static cl::opt<bool>
1750b57cec5SDimitry Andric     ViewSLPTree("view-slp-tree", cl::Hidden,
1760b57cec5SDimitry Andric                 cl::desc("Display the SLP trees with Graphviz"));
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric // Limit the number of alias checks. The limit is chosen so that
1790b57cec5SDimitry Andric // it has no negative effect on the llvm benchmarks.
1800b57cec5SDimitry Andric static const unsigned AliasedCheckLimit = 10;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric // Another limit for the alias checks: The maximum distance between load/store
1830b57cec5SDimitry Andric // instructions where alias checks are done.
1840b57cec5SDimitry Andric // This limit is useful for very large basic blocks.
1850b57cec5SDimitry Andric static const unsigned MaxMemDepDistance = 160;
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric /// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
1880b57cec5SDimitry Andric /// regions to be handled.
1890b57cec5SDimitry Andric static const int MinScheduleRegionSize = 16;
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric /// Predicate for the element types that the SLP vectorizer supports.
1920b57cec5SDimitry Andric ///
1930b57cec5SDimitry Andric /// The most important thing to filter here are types which are invalid in LLVM
1940b57cec5SDimitry Andric /// vectors. We also filter target specific types which have absolutely no
1950b57cec5SDimitry Andric /// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
1960b57cec5SDimitry Andric /// avoids spending time checking the cost model and realizing that they will
1970b57cec5SDimitry Andric /// be inevitably scalarized.
isValidElementType(Type * Ty)1980b57cec5SDimitry Andric static bool isValidElementType(Type *Ty) {
1990b57cec5SDimitry Andric   return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
2000b57cec5SDimitry Andric          !Ty->isPPC_FP128Ty();
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric /// \returns true if all of the instructions in \p VL are in the same block or
2040b57cec5SDimitry Andric /// false otherwise.
allSameBlock(ArrayRef<Value * > VL)2050b57cec5SDimitry Andric static bool allSameBlock(ArrayRef<Value *> VL) {
2060b57cec5SDimitry Andric   Instruction *I0 = dyn_cast<Instruction>(VL[0]);
2070b57cec5SDimitry Andric   if (!I0)
2080b57cec5SDimitry Andric     return false;
2090b57cec5SDimitry Andric   BasicBlock *BB = I0->getParent();
210af732203SDimitry Andric   for (int I = 1, E = VL.size(); I < E; I++) {
211af732203SDimitry Andric     auto *II = dyn_cast<Instruction>(VL[I]);
212af732203SDimitry Andric     if (!II)
2130b57cec5SDimitry Andric       return false;
2140b57cec5SDimitry Andric 
215af732203SDimitry Andric     if (BB != II->getParent())
2160b57cec5SDimitry Andric       return false;
2170b57cec5SDimitry Andric   }
2180b57cec5SDimitry Andric   return true;
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric 
2215f7ddb14SDimitry Andric /// \returns True if the value is a constant (but not globals/constant
2225f7ddb14SDimitry Andric /// expressions).
isConstant(Value * V)2235f7ddb14SDimitry Andric static bool isConstant(Value *V) {
2245f7ddb14SDimitry Andric   return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
2255f7ddb14SDimitry Andric }
2265f7ddb14SDimitry Andric 
2278bcb0991SDimitry Andric /// \returns True if all of the values in \p VL are constants (but not
2288bcb0991SDimitry Andric /// globals/constant expressions).
allConstant(ArrayRef<Value * > VL)2290b57cec5SDimitry Andric static bool allConstant(ArrayRef<Value *> VL) {
2308bcb0991SDimitry Andric   // Constant expressions and globals can't be vectorized like normal integer/FP
2318bcb0991SDimitry Andric   // constants.
2325f7ddb14SDimitry Andric   return all_of(VL, isConstant);
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric /// \returns True if all of the values in \p VL are identical.
isSplat(ArrayRef<Value * > VL)2360b57cec5SDimitry Andric static bool isSplat(ArrayRef<Value *> VL) {
2370b57cec5SDimitry Andric   for (unsigned i = 1, e = VL.size(); i < e; ++i)
2380b57cec5SDimitry Andric     if (VL[i] != VL[0])
2390b57cec5SDimitry Andric       return false;
2400b57cec5SDimitry Andric   return true;
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric 
243af732203SDimitry Andric /// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
isCommutative(Instruction * I)2440b57cec5SDimitry Andric static bool isCommutative(Instruction *I) {
245af732203SDimitry Andric   if (auto *Cmp = dyn_cast<CmpInst>(I))
246af732203SDimitry Andric     return Cmp->isCommutative();
247af732203SDimitry Andric   if (auto *BO = dyn_cast<BinaryOperator>(I))
248af732203SDimitry Andric     return BO->isCommutative();
249af732203SDimitry Andric   // TODO: This should check for generic Instruction::isCommutative(), but
250af732203SDimitry Andric   //       we need to confirm that the caller code correctly handles Intrinsics
251af732203SDimitry Andric   //       for example (does not have 2 operands).
252af732203SDimitry Andric   return false;
2530b57cec5SDimitry Andric }
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric /// Checks if the vector of instructions can be represented as a shuffle, like:
2560b57cec5SDimitry Andric /// %x0 = extractelement <4 x i8> %x, i32 0
2570b57cec5SDimitry Andric /// %x3 = extractelement <4 x i8> %x, i32 3
2580b57cec5SDimitry Andric /// %y1 = extractelement <4 x i8> %y, i32 1
2590b57cec5SDimitry Andric /// %y2 = extractelement <4 x i8> %y, i32 2
2600b57cec5SDimitry Andric /// %x0x0 = mul i8 %x0, %x0
2610b57cec5SDimitry Andric /// %x3x3 = mul i8 %x3, %x3
2620b57cec5SDimitry Andric /// %y1y1 = mul i8 %y1, %y1
2630b57cec5SDimitry Andric /// %y2y2 = mul i8 %y2, %y2
264af732203SDimitry Andric /// %ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
2650b57cec5SDimitry Andric /// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
2660b57cec5SDimitry Andric /// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
2670b57cec5SDimitry Andric /// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
2680b57cec5SDimitry Andric /// ret <4 x i8> %ins4
2690b57cec5SDimitry Andric /// can be transformed into:
2700b57cec5SDimitry Andric /// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
2710b57cec5SDimitry Andric ///                                                         i32 6>
2720b57cec5SDimitry Andric /// %2 = mul <4 x i8> %1, %1
2730b57cec5SDimitry Andric /// ret <4 x i8> %2
2740b57cec5SDimitry Andric /// We convert this initially to something like:
2750b57cec5SDimitry Andric /// %x0 = extractelement <4 x i8> %x, i32 0
2760b57cec5SDimitry Andric /// %x3 = extractelement <4 x i8> %x, i32 3
2770b57cec5SDimitry Andric /// %y1 = extractelement <4 x i8> %y, i32 1
2780b57cec5SDimitry Andric /// %y2 = extractelement <4 x i8> %y, i32 2
279af732203SDimitry Andric /// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0
2800b57cec5SDimitry Andric /// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
2810b57cec5SDimitry Andric /// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
2820b57cec5SDimitry Andric /// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
2830b57cec5SDimitry Andric /// %5 = mul <4 x i8> %4, %4
2840b57cec5SDimitry Andric /// %6 = extractelement <4 x i8> %5, i32 0
285af732203SDimitry Andric /// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0
2860b57cec5SDimitry Andric /// %7 = extractelement <4 x i8> %5, i32 1
2870b57cec5SDimitry Andric /// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
2880b57cec5SDimitry Andric /// %8 = extractelement <4 x i8> %5, i32 2
2890b57cec5SDimitry Andric /// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
2900b57cec5SDimitry Andric /// %9 = extractelement <4 x i8> %5, i32 3
2910b57cec5SDimitry Andric /// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
2920b57cec5SDimitry Andric /// ret <4 x i8> %ins4
2930b57cec5SDimitry Andric /// InstCombiner transforms this into a shuffle and vector mul
2945f7ddb14SDimitry Andric /// Mask will return the Shuffle Mask equivalent to the extracted elements.
2950b57cec5SDimitry Andric /// TODO: Can we split off and reuse the shuffle mask detection from
2960b57cec5SDimitry Andric /// TargetTransformInfo::getInstructionThroughput?
2970b57cec5SDimitry Andric static Optional<TargetTransformInfo::ShuffleKind>
isShuffle(ArrayRef<Value * > VL,SmallVectorImpl<int> & Mask)2985f7ddb14SDimitry Andric isShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
2990b57cec5SDimitry Andric   auto *EI0 = cast<ExtractElementInst>(VL[0]);
300af732203SDimitry Andric   unsigned Size =
301af732203SDimitry Andric       cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
3020b57cec5SDimitry Andric   Value *Vec1 = nullptr;
3030b57cec5SDimitry Andric   Value *Vec2 = nullptr;
3040b57cec5SDimitry Andric   enum ShuffleMode { Unknown, Select, Permute };
3050b57cec5SDimitry Andric   ShuffleMode CommonShuffleMode = Unknown;
3060b57cec5SDimitry Andric   for (unsigned I = 0, E = VL.size(); I < E; ++I) {
3070b57cec5SDimitry Andric     auto *EI = cast<ExtractElementInst>(VL[I]);
3080b57cec5SDimitry Andric     auto *Vec = EI->getVectorOperand();
3090b57cec5SDimitry Andric     // All vector operands must have the same number of vector elements.
310af732203SDimitry Andric     if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
3110b57cec5SDimitry Andric       return None;
3120b57cec5SDimitry Andric     auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
3130b57cec5SDimitry Andric     if (!Idx)
3140b57cec5SDimitry Andric       return None;
3150b57cec5SDimitry Andric     // Undefined behavior if Idx is negative or >= Size.
3165f7ddb14SDimitry Andric     if (Idx->getValue().uge(Size)) {
3175f7ddb14SDimitry Andric       Mask.push_back(UndefMaskElem);
3180b57cec5SDimitry Andric       continue;
3195f7ddb14SDimitry Andric     }
3200b57cec5SDimitry Andric     unsigned IntIdx = Idx->getValue().getZExtValue();
3215f7ddb14SDimitry Andric     Mask.push_back(IntIdx);
322af732203SDimitry Andric     // We can extractelement from undef or poison vector.
3230b57cec5SDimitry Andric     if (isa<UndefValue>(Vec))
3240b57cec5SDimitry Andric       continue;
3250b57cec5SDimitry Andric     // For correct shuffling we have to have at most 2 different vector operands
3260b57cec5SDimitry Andric     // in all extractelement instructions.
3270b57cec5SDimitry Andric     if (!Vec1 || Vec1 == Vec)
3280b57cec5SDimitry Andric       Vec1 = Vec;
3290b57cec5SDimitry Andric     else if (!Vec2 || Vec2 == Vec)
3300b57cec5SDimitry Andric       Vec2 = Vec;
3310b57cec5SDimitry Andric     else
3320b57cec5SDimitry Andric       return None;
3330b57cec5SDimitry Andric     if (CommonShuffleMode == Permute)
3340b57cec5SDimitry Andric       continue;
3350b57cec5SDimitry Andric     // If the extract index is not the same as the operation number, it is a
3360b57cec5SDimitry Andric     // permutation.
3370b57cec5SDimitry Andric     if (IntIdx != I) {
3380b57cec5SDimitry Andric       CommonShuffleMode = Permute;
3390b57cec5SDimitry Andric       continue;
3400b57cec5SDimitry Andric     }
3410b57cec5SDimitry Andric     CommonShuffleMode = Select;
3420b57cec5SDimitry Andric   }
3430b57cec5SDimitry Andric   // If we're not crossing lanes in different vectors, consider it as blending.
3440b57cec5SDimitry Andric   if (CommonShuffleMode == Select && Vec2)
3450b57cec5SDimitry Andric     return TargetTransformInfo::SK_Select;
3460b57cec5SDimitry Andric   // If Vec2 was never used, we have a permutation of a single vector, otherwise
3470b57cec5SDimitry Andric   // we have permutation of 2 vectors.
3480b57cec5SDimitry Andric   return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
3490b57cec5SDimitry Andric               : TargetTransformInfo::SK_PermuteSingleSrc;
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric namespace {
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric /// Main data required for vectorization of instructions.
3550b57cec5SDimitry Andric struct InstructionsState {
3560b57cec5SDimitry Andric   /// The very first instruction in the list with the main opcode.
3570b57cec5SDimitry Andric   Value *OpValue = nullptr;
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric   /// The main/alternate instruction.
3600b57cec5SDimitry Andric   Instruction *MainOp = nullptr;
3610b57cec5SDimitry Andric   Instruction *AltOp = nullptr;
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric   /// The main/alternate opcodes for the list of instructions.
getOpcode__anon75ab86280111::InstructionsState3640b57cec5SDimitry Andric   unsigned getOpcode() const {
3650b57cec5SDimitry Andric     return MainOp ? MainOp->getOpcode() : 0;
3660b57cec5SDimitry Andric   }
3670b57cec5SDimitry Andric 
getAltOpcode__anon75ab86280111::InstructionsState3680b57cec5SDimitry Andric   unsigned getAltOpcode() const {
3690b57cec5SDimitry Andric     return AltOp ? AltOp->getOpcode() : 0;
3700b57cec5SDimitry Andric   }
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   /// Some of the instructions in the list have alternate opcodes.
isAltShuffle__anon75ab86280111::InstructionsState3730b57cec5SDimitry Andric   bool isAltShuffle() const { return getOpcode() != getAltOpcode(); }
3740b57cec5SDimitry Andric 
isOpcodeOrAlt__anon75ab86280111::InstructionsState3750b57cec5SDimitry Andric   bool isOpcodeOrAlt(Instruction *I) const {
3760b57cec5SDimitry Andric     unsigned CheckedOpcode = I->getOpcode();
3770b57cec5SDimitry Andric     return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
3780b57cec5SDimitry Andric   }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric   InstructionsState() = delete;
InstructionsState__anon75ab86280111::InstructionsState3810b57cec5SDimitry Andric   InstructionsState(Value *OpValue, Instruction *MainOp, Instruction *AltOp)
3820b57cec5SDimitry Andric       : OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
3830b57cec5SDimitry Andric };
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric } // end anonymous namespace
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric /// Chooses the correct key for scheduling data. If \p Op has the same (or
3880b57cec5SDimitry Andric /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p
3890b57cec5SDimitry Andric /// OpValue.
isOneOf(const InstructionsState & S,Value * Op)3900b57cec5SDimitry Andric static Value *isOneOf(const InstructionsState &S, Value *Op) {
3910b57cec5SDimitry Andric   auto *I = dyn_cast<Instruction>(Op);
3920b57cec5SDimitry Andric   if (I && S.isOpcodeOrAlt(I))
3930b57cec5SDimitry Andric     return Op;
3940b57cec5SDimitry Andric   return S.OpValue;
3950b57cec5SDimitry Andric }
3960b57cec5SDimitry Andric 
39755e4f9d5SDimitry Andric /// \returns true if \p Opcode is allowed as part of of the main/alternate
39855e4f9d5SDimitry Andric /// instruction for SLP vectorization.
39955e4f9d5SDimitry Andric ///
40055e4f9d5SDimitry Andric /// Example of unsupported opcode is SDIV that can potentially cause UB if the
40155e4f9d5SDimitry Andric /// "shuffled out" lane would result in division by zero.
isValidForAlternation(unsigned Opcode)40255e4f9d5SDimitry Andric static bool isValidForAlternation(unsigned Opcode) {
40355e4f9d5SDimitry Andric   if (Instruction::isIntDivRem(Opcode))
40455e4f9d5SDimitry Andric     return false;
40555e4f9d5SDimitry Andric 
40655e4f9d5SDimitry Andric   return true;
40755e4f9d5SDimitry Andric }
40855e4f9d5SDimitry Andric 
4090b57cec5SDimitry Andric /// \returns analysis of the Instructions in \p VL described in
4100b57cec5SDimitry Andric /// InstructionsState, the Opcode that we suppose the whole list
4110b57cec5SDimitry Andric /// could be vectorized even if its structure is diverse.
getSameOpcode(ArrayRef<Value * > VL,unsigned BaseIndex=0)4120b57cec5SDimitry Andric static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
4130b57cec5SDimitry Andric                                        unsigned BaseIndex = 0) {
4140b57cec5SDimitry Andric   // Make sure these are all Instructions.
4150b57cec5SDimitry Andric   if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
4160b57cec5SDimitry Andric     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric   bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
4190b57cec5SDimitry Andric   bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
4200b57cec5SDimitry Andric   unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
4210b57cec5SDimitry Andric   unsigned AltOpcode = Opcode;
4220b57cec5SDimitry Andric   unsigned AltIndex = BaseIndex;
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric   // Check for one alternate opcode from another BinaryOperator.
4250b57cec5SDimitry Andric   // TODO - generalize to support all operators (types, calls etc.).
4260b57cec5SDimitry Andric   for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
4270b57cec5SDimitry Andric     unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
4280b57cec5SDimitry Andric     if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
4290b57cec5SDimitry Andric       if (InstOpcode == Opcode || InstOpcode == AltOpcode)
4300b57cec5SDimitry Andric         continue;
43155e4f9d5SDimitry Andric       if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
43255e4f9d5SDimitry Andric           isValidForAlternation(Opcode)) {
4330b57cec5SDimitry Andric         AltOpcode = InstOpcode;
4340b57cec5SDimitry Andric         AltIndex = Cnt;
4350b57cec5SDimitry Andric         continue;
4360b57cec5SDimitry Andric       }
4370b57cec5SDimitry Andric     } else if (IsCastOp && isa<CastInst>(VL[Cnt])) {
4380b57cec5SDimitry Andric       Type *Ty0 = cast<Instruction>(VL[BaseIndex])->getOperand(0)->getType();
4390b57cec5SDimitry Andric       Type *Ty1 = cast<Instruction>(VL[Cnt])->getOperand(0)->getType();
4400b57cec5SDimitry Andric       if (Ty0 == Ty1) {
4410b57cec5SDimitry Andric         if (InstOpcode == Opcode || InstOpcode == AltOpcode)
4420b57cec5SDimitry Andric           continue;
4430b57cec5SDimitry Andric         if (Opcode == AltOpcode) {
44455e4f9d5SDimitry Andric           assert(isValidForAlternation(Opcode) &&
44555e4f9d5SDimitry Andric                  isValidForAlternation(InstOpcode) &&
44655e4f9d5SDimitry Andric                  "Cast isn't safe for alternation, logic needs to be updated!");
4470b57cec5SDimitry Andric           AltOpcode = InstOpcode;
4480b57cec5SDimitry Andric           AltIndex = Cnt;
4490b57cec5SDimitry Andric           continue;
4500b57cec5SDimitry Andric         }
4510b57cec5SDimitry Andric       }
4520b57cec5SDimitry Andric     } else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
4530b57cec5SDimitry Andric       continue;
4540b57cec5SDimitry Andric     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
4550b57cec5SDimitry Andric   }
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
4580b57cec5SDimitry Andric                            cast<Instruction>(VL[AltIndex]));
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric /// \returns true if all of the values in \p VL have the same type or false
4620b57cec5SDimitry Andric /// otherwise.
allSameType(ArrayRef<Value * > VL)4630b57cec5SDimitry Andric static bool allSameType(ArrayRef<Value *> VL) {
4640b57cec5SDimitry Andric   Type *Ty = VL[0]->getType();
4650b57cec5SDimitry Andric   for (int i = 1, e = VL.size(); i < e; i++)
4660b57cec5SDimitry Andric     if (VL[i]->getType() != Ty)
4670b57cec5SDimitry Andric       return false;
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   return true;
4700b57cec5SDimitry Andric }
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric /// \returns True if Extract{Value,Element} instruction extracts element Idx.
getExtractIndex(Instruction * E)4730b57cec5SDimitry Andric static Optional<unsigned> getExtractIndex(Instruction *E) {
4740b57cec5SDimitry Andric   unsigned Opcode = E->getOpcode();
4750b57cec5SDimitry Andric   assert((Opcode == Instruction::ExtractElement ||
4760b57cec5SDimitry Andric           Opcode == Instruction::ExtractValue) &&
4770b57cec5SDimitry Andric          "Expected extractelement or extractvalue instruction.");
4780b57cec5SDimitry Andric   if (Opcode == Instruction::ExtractElement) {
4790b57cec5SDimitry Andric     auto *CI = dyn_cast<ConstantInt>(E->getOperand(1));
4800b57cec5SDimitry Andric     if (!CI)
4810b57cec5SDimitry Andric       return None;
4820b57cec5SDimitry Andric     return CI->getZExtValue();
4830b57cec5SDimitry Andric   }
4840b57cec5SDimitry Andric   ExtractValueInst *EI = cast<ExtractValueInst>(E);
4850b57cec5SDimitry Andric   if (EI->getNumIndices() != 1)
4860b57cec5SDimitry Andric     return None;
4870b57cec5SDimitry Andric   return *EI->idx_begin();
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric /// \returns True if in-tree use also needs extract. This refers to
4910b57cec5SDimitry Andric /// possible scalar operand in vectorized instruction.
InTreeUserNeedToExtract(Value * Scalar,Instruction * UserInst,TargetLibraryInfo * TLI)4920b57cec5SDimitry Andric static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
4930b57cec5SDimitry Andric                                     TargetLibraryInfo *TLI) {
4940b57cec5SDimitry Andric   unsigned Opcode = UserInst->getOpcode();
4950b57cec5SDimitry Andric   switch (Opcode) {
4960b57cec5SDimitry Andric   case Instruction::Load: {
4970b57cec5SDimitry Andric     LoadInst *LI = cast<LoadInst>(UserInst);
4980b57cec5SDimitry Andric     return (LI->getPointerOperand() == Scalar);
4990b57cec5SDimitry Andric   }
5000b57cec5SDimitry Andric   case Instruction::Store: {
5010b57cec5SDimitry Andric     StoreInst *SI = cast<StoreInst>(UserInst);
5020b57cec5SDimitry Andric     return (SI->getPointerOperand() == Scalar);
5030b57cec5SDimitry Andric   }
5040b57cec5SDimitry Andric   case Instruction::Call: {
5050b57cec5SDimitry Andric     CallInst *CI = cast<CallInst>(UserInst);
5060b57cec5SDimitry Andric     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
5070b57cec5SDimitry Andric     for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
5080b57cec5SDimitry Andric       if (hasVectorInstrinsicScalarOpd(ID, i))
5090b57cec5SDimitry Andric         return (CI->getArgOperand(i) == Scalar);
5100b57cec5SDimitry Andric     }
5110b57cec5SDimitry Andric     LLVM_FALLTHROUGH;
5120b57cec5SDimitry Andric   }
5130b57cec5SDimitry Andric   default:
5140b57cec5SDimitry Andric     return false;
5150b57cec5SDimitry Andric   }
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric /// \returns the AA location that is being access by the instruction.
getLocation(Instruction * I,AAResults * AA)519af732203SDimitry Andric static MemoryLocation getLocation(Instruction *I, AAResults *AA) {
5200b57cec5SDimitry Andric   if (StoreInst *SI = dyn_cast<StoreInst>(I))
5210b57cec5SDimitry Andric     return MemoryLocation::get(SI);
5220b57cec5SDimitry Andric   if (LoadInst *LI = dyn_cast<LoadInst>(I))
5230b57cec5SDimitry Andric     return MemoryLocation::get(LI);
5240b57cec5SDimitry Andric   return MemoryLocation();
5250b57cec5SDimitry Andric }
5260b57cec5SDimitry Andric 
5270b57cec5SDimitry Andric /// \returns True if the instruction is not a volatile or atomic load/store.
isSimple(Instruction * I)5280b57cec5SDimitry Andric static bool isSimple(Instruction *I) {
5290b57cec5SDimitry Andric   if (LoadInst *LI = dyn_cast<LoadInst>(I))
5300b57cec5SDimitry Andric     return LI->isSimple();
5310b57cec5SDimitry Andric   if (StoreInst *SI = dyn_cast<StoreInst>(I))
5320b57cec5SDimitry Andric     return SI->isSimple();
5330b57cec5SDimitry Andric   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
5340b57cec5SDimitry Andric     return !MI->isVolatile();
5350b57cec5SDimitry Andric   return true;
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric namespace llvm {
5390b57cec5SDimitry Andric 
inversePermutation(ArrayRef<unsigned> Indices,SmallVectorImpl<int> & Mask)540af732203SDimitry Andric static void inversePermutation(ArrayRef<unsigned> Indices,
541af732203SDimitry Andric                                SmallVectorImpl<int> &Mask) {
542af732203SDimitry Andric   Mask.clear();
543af732203SDimitry Andric   const unsigned E = Indices.size();
544af732203SDimitry Andric   Mask.resize(E, E + 1);
545af732203SDimitry Andric   for (unsigned I = 0; I < E; ++I)
546af732203SDimitry Andric     Mask[Indices[I]] = I;
547af732203SDimitry Andric }
548af732203SDimitry Andric 
5495f7ddb14SDimitry Andric /// \returns inserting index of InsertElement or InsertValue instruction,
5505f7ddb14SDimitry Andric /// using Offset as base offset for index.
getInsertIndex(Value * InsertInst,unsigned Offset)5515f7ddb14SDimitry Andric static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
5525f7ddb14SDimitry Andric   int Index = Offset;
5535f7ddb14SDimitry Andric   if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
5545f7ddb14SDimitry Andric     if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
5555f7ddb14SDimitry Andric       auto *VT = cast<FixedVectorType>(IE->getType());
5565f7ddb14SDimitry Andric       if (CI->getValue().uge(VT->getNumElements()))
5575f7ddb14SDimitry Andric         return UndefMaskElem;
5585f7ddb14SDimitry Andric       Index *= VT->getNumElements();
5595f7ddb14SDimitry Andric       Index += CI->getZExtValue();
5605f7ddb14SDimitry Andric       return Index;
5615f7ddb14SDimitry Andric     }
5625f7ddb14SDimitry Andric     if (isa<UndefValue>(IE->getOperand(2)))
5635f7ddb14SDimitry Andric       return UndefMaskElem;
5645f7ddb14SDimitry Andric     return None;
5655f7ddb14SDimitry Andric   }
5665f7ddb14SDimitry Andric 
5675f7ddb14SDimitry Andric   auto *IV = cast<InsertValueInst>(InsertInst);
5685f7ddb14SDimitry Andric   Type *CurrentType = IV->getType();
5695f7ddb14SDimitry Andric   for (unsigned I : IV->indices()) {
5705f7ddb14SDimitry Andric     if (auto *ST = dyn_cast<StructType>(CurrentType)) {
5715f7ddb14SDimitry Andric       Index *= ST->getNumElements();
5725f7ddb14SDimitry Andric       CurrentType = ST->getElementType(I);
5735f7ddb14SDimitry Andric     } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
5745f7ddb14SDimitry Andric       Index *= AT->getNumElements();
5755f7ddb14SDimitry Andric       CurrentType = AT->getElementType();
5765f7ddb14SDimitry Andric     } else {
5775f7ddb14SDimitry Andric       return None;
5785f7ddb14SDimitry Andric     }
5795f7ddb14SDimitry Andric     Index += I;
5805f7ddb14SDimitry Andric   }
5815f7ddb14SDimitry Andric   return Index;
5825f7ddb14SDimitry Andric }
5835f7ddb14SDimitry Andric 
5840b57cec5SDimitry Andric namespace slpvectorizer {
5850b57cec5SDimitry Andric 
5860b57cec5SDimitry Andric /// Bottom Up SLP Vectorizer.
5870b57cec5SDimitry Andric class BoUpSLP {
5880b57cec5SDimitry Andric   struct TreeEntry;
5898bcb0991SDimitry Andric   struct ScheduleData;
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric public:
5920b57cec5SDimitry Andric   using ValueList = SmallVector<Value *, 8>;
5930b57cec5SDimitry Andric   using InstrList = SmallVector<Instruction *, 16>;
5940b57cec5SDimitry Andric   using ValueSet = SmallPtrSet<Value *, 16>;
5950b57cec5SDimitry Andric   using StoreList = SmallVector<StoreInst *, 8>;
5960b57cec5SDimitry Andric   using ExtraValueToDebugLocsMap =
5970b57cec5SDimitry Andric       MapVector<Value *, SmallVector<Instruction *, 2>>;
598af732203SDimitry Andric   using OrdersType = SmallVector<unsigned, 4>;
5990b57cec5SDimitry Andric 
BoUpSLP(Function * Func,ScalarEvolution * Se,TargetTransformInfo * Tti,TargetLibraryInfo * TLi,AAResults * Aa,LoopInfo * Li,DominatorTree * Dt,AssumptionCache * AC,DemandedBits * DB,const DataLayout * DL,OptimizationRemarkEmitter * ORE)6000b57cec5SDimitry Andric   BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
601af732203SDimitry Andric           TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
6020b57cec5SDimitry Andric           DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
6030b57cec5SDimitry Andric           const DataLayout *DL, OptimizationRemarkEmitter *ORE)
6040b57cec5SDimitry Andric       : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
6050b57cec5SDimitry Andric         DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
6060b57cec5SDimitry Andric     CodeMetrics::collectEphemeralValues(F, AC, EphValues);
6070b57cec5SDimitry Andric     // Use the vector register size specified by the target unless overridden
6080b57cec5SDimitry Andric     // by a command-line option.
6090b57cec5SDimitry Andric     // TODO: It would be better to limit the vectorization factor based on
6100b57cec5SDimitry Andric     //       data type rather than just register size. For example, x86 AVX has
6110b57cec5SDimitry Andric     //       256-bit registers, but it does not support integer operations
6120b57cec5SDimitry Andric     //       at that width (that requires AVX2).
6130b57cec5SDimitry Andric     if (MaxVectorRegSizeOption.getNumOccurrences())
6140b57cec5SDimitry Andric       MaxVecRegSize = MaxVectorRegSizeOption;
6150b57cec5SDimitry Andric     else
6165f7ddb14SDimitry Andric       MaxVecRegSize =
6175f7ddb14SDimitry Andric           TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
6185f7ddb14SDimitry Andric               .getFixedSize();
6190b57cec5SDimitry Andric 
6200b57cec5SDimitry Andric     if (MinVectorRegSizeOption.getNumOccurrences())
6210b57cec5SDimitry Andric       MinVecRegSize = MinVectorRegSizeOption;
6220b57cec5SDimitry Andric     else
6230b57cec5SDimitry Andric       MinVecRegSize = TTI->getMinVectorRegisterBitWidth();
6240b57cec5SDimitry Andric   }
6250b57cec5SDimitry Andric 
6260b57cec5SDimitry Andric   /// Vectorize the tree that starts with the elements in \p VL.
6270b57cec5SDimitry Andric   /// Returns the vectorized root.
6280b57cec5SDimitry Andric   Value *vectorizeTree();
6290b57cec5SDimitry Andric 
6300b57cec5SDimitry Andric   /// Vectorize the tree but with the list of externally used values \p
6310b57cec5SDimitry Andric   /// ExternallyUsedValues. Values in this MapVector can be replaced but the
6320b57cec5SDimitry Andric   /// generated extractvalue instructions.
6330b57cec5SDimitry Andric   Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
6340b57cec5SDimitry Andric 
6350b57cec5SDimitry Andric   /// \returns the cost incurred by unwanted spills and fills, caused by
6360b57cec5SDimitry Andric   /// holding live values over call sites.
637af732203SDimitry Andric   InstructionCost getSpillCost() const;
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric   /// \returns the vectorization cost of the subtree that starts at \p VL.
6400b57cec5SDimitry Andric   /// A negative number means that this is profitable.
6415f7ddb14SDimitry Andric   InstructionCost getTreeCost(ArrayRef<Value *> VectorizedVals = None);
6420b57cec5SDimitry Andric 
6430b57cec5SDimitry Andric   /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
6440b57cec5SDimitry Andric   /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
6450b57cec5SDimitry Andric   void buildTree(ArrayRef<Value *> Roots,
6460b57cec5SDimitry Andric                  ArrayRef<Value *> UserIgnoreLst = None);
6470b57cec5SDimitry Andric 
6480b57cec5SDimitry Andric   /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
6490b57cec5SDimitry Andric   /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
650480093f4SDimitry Andric   /// into account (and updating it, if required) list of externally used
6510b57cec5SDimitry Andric   /// values stored in \p ExternallyUsedValues.
6520b57cec5SDimitry Andric   void buildTree(ArrayRef<Value *> Roots,
6530b57cec5SDimitry Andric                  ExtraValueToDebugLocsMap &ExternallyUsedValues,
6540b57cec5SDimitry Andric                  ArrayRef<Value *> UserIgnoreLst = None);
6550b57cec5SDimitry Andric 
6560b57cec5SDimitry Andric   /// Clear the internal data structures that are created by 'buildTree'.
deleteTree()6570b57cec5SDimitry Andric   void deleteTree() {
6580b57cec5SDimitry Andric     VectorizableTree.clear();
6590b57cec5SDimitry Andric     ScalarToTreeEntry.clear();
6600b57cec5SDimitry Andric     MustGather.clear();
6610b57cec5SDimitry Andric     ExternalUses.clear();
6620b57cec5SDimitry Andric     NumOpsWantToKeepOrder.clear();
6630b57cec5SDimitry Andric     NumOpsWantToKeepOriginalOrder = 0;
6640b57cec5SDimitry Andric     for (auto &Iter : BlocksSchedules) {
6650b57cec5SDimitry Andric       BlockScheduling *BS = Iter.second.get();
6660b57cec5SDimitry Andric       BS->clear();
6670b57cec5SDimitry Andric     }
6680b57cec5SDimitry Andric     MinBWs.clear();
6695f7ddb14SDimitry Andric     InstrElementSize.clear();
6700b57cec5SDimitry Andric   }
6710b57cec5SDimitry Andric 
getTreeSize() const6720b57cec5SDimitry Andric   unsigned getTreeSize() const { return VectorizableTree.size(); }
6730b57cec5SDimitry Andric 
6740b57cec5SDimitry Andric   /// Perform LICM and CSE on the newly generated gather sequences.
6750b57cec5SDimitry Andric   void optimizeGatherSequence();
6760b57cec5SDimitry Andric 
6770b57cec5SDimitry Andric   /// \returns The best order of instructions for vectorization.
bestOrder() const6780b57cec5SDimitry Andric   Optional<ArrayRef<unsigned>> bestOrder() const {
679af732203SDimitry Andric     assert(llvm::all_of(
680af732203SDimitry Andric                NumOpsWantToKeepOrder,
681af732203SDimitry Andric                [this](const decltype(NumOpsWantToKeepOrder)::value_type &D) {
682af732203SDimitry Andric                  return D.getFirst().size() ==
683af732203SDimitry Andric                         VectorizableTree[0]->Scalars.size();
684af732203SDimitry Andric                }) &&
685af732203SDimitry Andric            "All orders must have the same size as number of instructions in "
686af732203SDimitry Andric            "tree node.");
6870b57cec5SDimitry Andric     auto I = std::max_element(
6880b57cec5SDimitry Andric         NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
6890b57cec5SDimitry Andric         [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
6900b57cec5SDimitry Andric            const decltype(NumOpsWantToKeepOrder)::value_type &D2) {
6910b57cec5SDimitry Andric           return D1.second < D2.second;
6920b57cec5SDimitry Andric         });
6930b57cec5SDimitry Andric     if (I == NumOpsWantToKeepOrder.end() ||
6940b57cec5SDimitry Andric         I->getSecond() <= NumOpsWantToKeepOriginalOrder)
6950b57cec5SDimitry Andric       return None;
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric     return makeArrayRef(I->getFirst());
6980b57cec5SDimitry Andric   }
6990b57cec5SDimitry Andric 
700af732203SDimitry Andric   /// Builds the correct order for root instructions.
701af732203SDimitry Andric   /// If some leaves have the same instructions to be vectorized, we may
702af732203SDimitry Andric   /// incorrectly evaluate the best order for the root node (it is built for the
703af732203SDimitry Andric   /// vector of instructions without repeated instructions and, thus, has less
704af732203SDimitry Andric   /// elements than the root node). This function builds the correct order for
705af732203SDimitry Andric   /// the root node.
706af732203SDimitry Andric   /// For example, if the root node is \<a+b, a+c, a+d, f+e\>, then the leaves
707af732203SDimitry Andric   /// are \<a, a, a, f\> and \<b, c, d, e\>. When we try to vectorize the first
708af732203SDimitry Andric   /// leaf, it will be shrink to \<a, b\>. If instructions in this leaf should
709af732203SDimitry Andric   /// be reordered, the best order will be \<1, 0\>. We need to extend this
710af732203SDimitry Andric   /// order for the root node. For the root node this order should look like
711af732203SDimitry Andric   /// \<3, 0, 1, 2\>. This function extends the order for the reused
712af732203SDimitry Andric   /// instructions.
findRootOrder(OrdersType & Order)713af732203SDimitry Andric   void findRootOrder(OrdersType &Order) {
714af732203SDimitry Andric     // If the leaf has the same number of instructions to vectorize as the root
715af732203SDimitry Andric     // - order must be set already.
716af732203SDimitry Andric     unsigned RootSize = VectorizableTree[0]->Scalars.size();
717af732203SDimitry Andric     if (Order.size() == RootSize)
718af732203SDimitry Andric       return;
719af732203SDimitry Andric     SmallVector<unsigned, 4> RealOrder(Order.size());
720af732203SDimitry Andric     std::swap(Order, RealOrder);
721af732203SDimitry Andric     SmallVector<int, 4> Mask;
722af732203SDimitry Andric     inversePermutation(RealOrder, Mask);
723af732203SDimitry Andric     Order.assign(Mask.begin(), Mask.end());
724af732203SDimitry Andric     // The leaf has less number of instructions - need to find the true order of
725af732203SDimitry Andric     // the root.
726af732203SDimitry Andric     // Scan the nodes starting from the leaf back to the root.
727af732203SDimitry Andric     const TreeEntry *PNode = VectorizableTree.back().get();
728af732203SDimitry Andric     SmallVector<const TreeEntry *, 4> Nodes(1, PNode);
729af732203SDimitry Andric     SmallPtrSet<const TreeEntry *, 4> Visited;
730af732203SDimitry Andric     while (!Nodes.empty() && Order.size() != RootSize) {
731af732203SDimitry Andric       const TreeEntry *PNode = Nodes.pop_back_val();
732af732203SDimitry Andric       if (!Visited.insert(PNode).second)
733af732203SDimitry Andric         continue;
734af732203SDimitry Andric       const TreeEntry &Node = *PNode;
735af732203SDimitry Andric       for (const EdgeInfo &EI : Node.UserTreeIndices)
736af732203SDimitry Andric         if (EI.UserTE)
737af732203SDimitry Andric           Nodes.push_back(EI.UserTE);
738af732203SDimitry Andric       if (Node.ReuseShuffleIndices.empty())
739af732203SDimitry Andric         continue;
740af732203SDimitry Andric       // Build the order for the parent node.
741af732203SDimitry Andric       OrdersType NewOrder(Node.ReuseShuffleIndices.size(), RootSize);
742af732203SDimitry Andric       SmallVector<unsigned, 4> OrderCounter(Order.size(), 0);
743af732203SDimitry Andric       // The algorithm of the order extension is:
744af732203SDimitry Andric       // 1. Calculate the number of the same instructions for the order.
745af732203SDimitry Andric       // 2. Calculate the index of the new order: total number of instructions
746af732203SDimitry Andric       // with order less than the order of the current instruction + reuse
747af732203SDimitry Andric       // number of the current instruction.
748af732203SDimitry Andric       // 3. The new order is just the index of the instruction in the original
749af732203SDimitry Andric       // vector of the instructions.
750af732203SDimitry Andric       for (unsigned I : Node.ReuseShuffleIndices)
751af732203SDimitry Andric         ++OrderCounter[Order[I]];
752af732203SDimitry Andric       SmallVector<unsigned, 4> CurrentCounter(Order.size(), 0);
753af732203SDimitry Andric       for (unsigned I = 0, E = Node.ReuseShuffleIndices.size(); I < E; ++I) {
754af732203SDimitry Andric         unsigned ReusedIdx = Node.ReuseShuffleIndices[I];
755af732203SDimitry Andric         unsigned OrderIdx = Order[ReusedIdx];
756af732203SDimitry Andric         unsigned NewIdx = 0;
757af732203SDimitry Andric         for (unsigned J = 0; J < OrderIdx; ++J)
758af732203SDimitry Andric           NewIdx += OrderCounter[J];
759af732203SDimitry Andric         NewIdx += CurrentCounter[OrderIdx];
760af732203SDimitry Andric         ++CurrentCounter[OrderIdx];
761af732203SDimitry Andric         assert(NewOrder[NewIdx] == RootSize &&
762af732203SDimitry Andric                "The order index should not be written already.");
763af732203SDimitry Andric         NewOrder[NewIdx] = I;
764af732203SDimitry Andric       }
765af732203SDimitry Andric       std::swap(Order, NewOrder);
766af732203SDimitry Andric     }
767af732203SDimitry Andric     assert(Order.size() == RootSize &&
768af732203SDimitry Andric            "Root node is expected or the size of the order must be the same as "
769af732203SDimitry Andric            "the number of elements in the root node.");
770af732203SDimitry Andric     assert(llvm::all_of(Order,
771af732203SDimitry Andric                         [RootSize](unsigned Val) { return Val != RootSize; }) &&
772af732203SDimitry Andric            "All indices must be initialized");
773af732203SDimitry Andric   }
774af732203SDimitry Andric 
7750b57cec5SDimitry Andric   /// \return The vector element size in bits to use when vectorizing the
7760b57cec5SDimitry Andric   /// expression tree ending at \p V. If V is a store, the size is the width of
7770b57cec5SDimitry Andric   /// the stored value. Otherwise, the size is the width of the largest loaded
7780b57cec5SDimitry Andric   /// value reaching V. This method is used by the vectorizer to calculate
7790b57cec5SDimitry Andric   /// vectorization factors.
7805ffd83dbSDimitry Andric   unsigned getVectorElementSize(Value *V);
7810b57cec5SDimitry Andric 
7820b57cec5SDimitry Andric   /// Compute the minimum type sizes required to represent the entries in a
7830b57cec5SDimitry Andric   /// vectorizable tree.
7840b57cec5SDimitry Andric   void computeMinimumValueSizes();
7850b57cec5SDimitry Andric 
7860b57cec5SDimitry Andric   // \returns maximum vector register size as set by TTI or overridden by cl::opt.
getMaxVecRegSize() const7870b57cec5SDimitry Andric   unsigned getMaxVecRegSize() const {
7880b57cec5SDimitry Andric     return MaxVecRegSize;
7890b57cec5SDimitry Andric   }
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric   // \returns minimum vector register size as set by cl::opt.
getMinVecRegSize() const7920b57cec5SDimitry Andric   unsigned getMinVecRegSize() const {
7930b57cec5SDimitry Andric     return MinVecRegSize;
7940b57cec5SDimitry Andric   }
7950b57cec5SDimitry Andric 
getMaximumVF(unsigned ElemWidth,unsigned Opcode) const796af732203SDimitry Andric   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
797af732203SDimitry Andric     unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
798af732203SDimitry Andric       MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
799af732203SDimitry Andric     return MaxVF ? MaxVF : UINT_MAX;
800af732203SDimitry Andric   }
801af732203SDimitry Andric 
802480093f4SDimitry Andric   /// Check if homogeneous aggregate is isomorphic to some VectorType.
803480093f4SDimitry Andric   /// Accepts homogeneous multidimensional aggregate of scalars/vectors like
804480093f4SDimitry Andric   /// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
805480093f4SDimitry Andric   /// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on.
8060b57cec5SDimitry Andric   ///
8070b57cec5SDimitry Andric   /// \returns number of elements in vector if isomorphism exists, 0 otherwise.
8080b57cec5SDimitry Andric   unsigned canMapToVector(Type *T, const DataLayout &DL) const;
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric   /// \returns True if the VectorizableTree is both tiny and not fully
8110b57cec5SDimitry Andric   /// vectorizable. We do not vectorize such trees.
8120b57cec5SDimitry Andric   bool isTreeTinyAndNotFullyVectorizable() const;
8130b57cec5SDimitry Andric 
8148bcb0991SDimitry Andric   /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
8158bcb0991SDimitry Andric   /// can be load combined in the backend. Load combining may not be allowed in
8168bcb0991SDimitry Andric   /// the IR optimizer, so we do not want to alter the pattern. For example,
8178bcb0991SDimitry Andric   /// partially transforming a scalar bswap() pattern into vector code is
8188bcb0991SDimitry Andric   /// effectively impossible for the backend to undo.
8198bcb0991SDimitry Andric   /// TODO: If load combining is allowed in the IR optimizer, this analysis
8208bcb0991SDimitry Andric   ///       may not be necessary.
821af732203SDimitry Andric   bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;
8228bcb0991SDimitry Andric 
8235ffd83dbSDimitry Andric   /// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
8245ffd83dbSDimitry Andric   /// can be load combined in the backend. Load combining may not be allowed in
8255ffd83dbSDimitry Andric   /// the IR optimizer, so we do not want to alter the pattern. For example,
8265ffd83dbSDimitry Andric   /// partially transforming a scalar bswap() pattern into vector code is
8275ffd83dbSDimitry Andric   /// effectively impossible for the backend to undo.
8285ffd83dbSDimitry Andric   /// TODO: If load combining is allowed in the IR optimizer, this analysis
8295ffd83dbSDimitry Andric   ///       may not be necessary.
8305ffd83dbSDimitry Andric   bool isLoadCombineCandidate() const;
8315ffd83dbSDimitry Andric 
getORE()8320b57cec5SDimitry Andric   OptimizationRemarkEmitter *getORE() { return ORE; }
8330b57cec5SDimitry Andric 
8340b57cec5SDimitry Andric   /// This structure holds any data we need about the edges being traversed
8350b57cec5SDimitry Andric   /// during buildTree_rec(). We keep track of:
8360b57cec5SDimitry Andric   /// (i) the user TreeEntry index, and
8370b57cec5SDimitry Andric   /// (ii) the index of the edge.
8380b57cec5SDimitry Andric   struct EdgeInfo {
8390b57cec5SDimitry Andric     EdgeInfo() = default;
EdgeInfollvm::slpvectorizer::BoUpSLP::EdgeInfo8400b57cec5SDimitry Andric     EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
8410b57cec5SDimitry Andric         : UserTE(UserTE), EdgeIdx(EdgeIdx) {}
8420b57cec5SDimitry Andric     /// The user TreeEntry.
8430b57cec5SDimitry Andric     TreeEntry *UserTE = nullptr;
8440b57cec5SDimitry Andric     /// The operand index of the use.
8450b57cec5SDimitry Andric     unsigned EdgeIdx = UINT_MAX;
8460b57cec5SDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & OS,const BoUpSLP::EdgeInfo & EI)8470b57cec5SDimitry Andric     friend inline raw_ostream &operator<<(raw_ostream &OS,
8480b57cec5SDimitry Andric                                           const BoUpSLP::EdgeInfo &EI) {
8490b57cec5SDimitry Andric       EI.dump(OS);
8500b57cec5SDimitry Andric       return OS;
8510b57cec5SDimitry Andric     }
8520b57cec5SDimitry Andric     /// Debug print.
dumpllvm::slpvectorizer::BoUpSLP::EdgeInfo8530b57cec5SDimitry Andric     void dump(raw_ostream &OS) const {
8540b57cec5SDimitry Andric       OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
8550b57cec5SDimitry Andric          << " EdgeIdx:" << EdgeIdx << "}";
8560b57cec5SDimitry Andric     }
dumpllvm::slpvectorizer::BoUpSLP::EdgeInfo8570b57cec5SDimitry Andric     LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
8580b57cec5SDimitry Andric #endif
8590b57cec5SDimitry Andric   };
8600b57cec5SDimitry Andric 
8610b57cec5SDimitry Andric   /// A helper data structure to hold the operands of a vector of instructions.
8620b57cec5SDimitry Andric   /// This supports a fixed vector length for all operand vectors.
8630b57cec5SDimitry Andric   class VLOperands {
8640b57cec5SDimitry Andric     /// For each operand we need (i) the value, and (ii) the opcode that it
8650b57cec5SDimitry Andric     /// would be attached to if the expression was in a left-linearized form.
8660b57cec5SDimitry Andric     /// This is required to avoid illegal operand reordering.
8670b57cec5SDimitry Andric     /// For example:
8680b57cec5SDimitry Andric     /// \verbatim
8690b57cec5SDimitry Andric     ///                         0 Op1
8700b57cec5SDimitry Andric     ///                         |/
8710b57cec5SDimitry Andric     /// Op1 Op2   Linearized    + Op2
8720b57cec5SDimitry Andric     ///   \ /     ---------->   |/
8730b57cec5SDimitry Andric     ///    -                    -
8740b57cec5SDimitry Andric     ///
8750b57cec5SDimitry Andric     /// Op1 - Op2            (0 + Op1) - Op2
8760b57cec5SDimitry Andric     /// \endverbatim
8770b57cec5SDimitry Andric     ///
8780b57cec5SDimitry Andric     /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
8790b57cec5SDimitry Andric     ///
8800b57cec5SDimitry Andric     /// Another way to think of this is to track all the operations across the
8810b57cec5SDimitry Andric     /// path from the operand all the way to the root of the tree and to
8820b57cec5SDimitry Andric     /// calculate the operation that corresponds to this path. For example, the
8830b57cec5SDimitry Andric     /// path from Op2 to the root crosses the RHS of the '-', therefore the
8840b57cec5SDimitry Andric     /// corresponding operation is a '-' (which matches the one in the
8850b57cec5SDimitry Andric     /// linearized tree, as shown above).
8860b57cec5SDimitry Andric     ///
8870b57cec5SDimitry Andric     /// For lack of a better term, we refer to this operation as Accumulated
8880b57cec5SDimitry Andric     /// Path Operation (APO).
8890b57cec5SDimitry Andric     struct OperandData {
8900b57cec5SDimitry Andric       OperandData() = default;
OperandDatallvm::slpvectorizer::BoUpSLP::VLOperands::OperandData8910b57cec5SDimitry Andric       OperandData(Value *V, bool APO, bool IsUsed)
8920b57cec5SDimitry Andric           : V(V), APO(APO), IsUsed(IsUsed) {}
8930b57cec5SDimitry Andric       /// The operand value.
8940b57cec5SDimitry Andric       Value *V = nullptr;
8950b57cec5SDimitry Andric       /// TreeEntries only allow a single opcode, or an alternate sequence of
8960b57cec5SDimitry Andric       /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
8970b57cec5SDimitry Andric       /// APO. It is set to 'true' if 'V' is attached to an inverse operation
8980b57cec5SDimitry Andric       /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
8990b57cec5SDimitry Andric       /// (e.g., Add/Mul)
9000b57cec5SDimitry Andric       bool APO = false;
9010b57cec5SDimitry Andric       /// Helper data for the reordering function.
9020b57cec5SDimitry Andric       bool IsUsed = false;
9030b57cec5SDimitry Andric     };
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric     /// During operand reordering, we are trying to select the operand at lane
9060b57cec5SDimitry Andric     /// that matches best with the operand at the neighboring lane. Our
9070b57cec5SDimitry Andric     /// selection is based on the type of value we are looking for. For example,
9080b57cec5SDimitry Andric     /// if the neighboring lane has a load, we need to look for a load that is
9090b57cec5SDimitry Andric     /// accessing a consecutive address. These strategies are summarized in the
9100b57cec5SDimitry Andric     /// 'ReorderingMode' enumerator.
9110b57cec5SDimitry Andric     enum class ReorderingMode {
9120b57cec5SDimitry Andric       Load,     ///< Matching loads to consecutive memory addresses
9130b57cec5SDimitry Andric       Opcode,   ///< Matching instructions based on opcode (same or alternate)
9140b57cec5SDimitry Andric       Constant, ///< Matching constants
9150b57cec5SDimitry Andric       Splat,    ///< Matching the same instruction multiple times (broadcast)
9160b57cec5SDimitry Andric       Failed,   ///< We failed to create a vectorizable group
9170b57cec5SDimitry Andric     };
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric     using OperandDataVec = SmallVector<OperandData, 2>;
9200b57cec5SDimitry Andric 
9210b57cec5SDimitry Andric     /// A vector of operand vectors.
9220b57cec5SDimitry Andric     SmallVector<OperandDataVec, 4> OpsVec;
9230b57cec5SDimitry Andric 
9240b57cec5SDimitry Andric     const DataLayout &DL;
9250b57cec5SDimitry Andric     ScalarEvolution &SE;
926480093f4SDimitry Andric     const BoUpSLP &R;
9270b57cec5SDimitry Andric 
9280b57cec5SDimitry Andric     /// \returns the operand data at \p OpIdx and \p Lane.
getData(unsigned OpIdx,unsigned Lane)9290b57cec5SDimitry Andric     OperandData &getData(unsigned OpIdx, unsigned Lane) {
9300b57cec5SDimitry Andric       return OpsVec[OpIdx][Lane];
9310b57cec5SDimitry Andric     }
9320b57cec5SDimitry Andric 
9330b57cec5SDimitry Andric     /// \returns the operand data at \p OpIdx and \p Lane. Const version.
getData(unsigned OpIdx,unsigned Lane) const9340b57cec5SDimitry Andric     const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
9350b57cec5SDimitry Andric       return OpsVec[OpIdx][Lane];
9360b57cec5SDimitry Andric     }
9370b57cec5SDimitry Andric 
9380b57cec5SDimitry Andric     /// Clears the used flag for all entries.
clearUsed()9390b57cec5SDimitry Andric     void clearUsed() {
9400b57cec5SDimitry Andric       for (unsigned OpIdx = 0, NumOperands = getNumOperands();
9410b57cec5SDimitry Andric            OpIdx != NumOperands; ++OpIdx)
9420b57cec5SDimitry Andric         for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
9430b57cec5SDimitry Andric              ++Lane)
9440b57cec5SDimitry Andric           OpsVec[OpIdx][Lane].IsUsed = false;
9450b57cec5SDimitry Andric     }
9460b57cec5SDimitry Andric 
9470b57cec5SDimitry Andric     /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
swap(unsigned OpIdx1,unsigned OpIdx2,unsigned Lane)9480b57cec5SDimitry Andric     void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
9490b57cec5SDimitry Andric       std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
9500b57cec5SDimitry Andric     }
9510b57cec5SDimitry Andric 
952480093f4SDimitry Andric     // The hard-coded scores listed here are not very important. When computing
953480093f4SDimitry Andric     // the scores of matching one sub-tree with another, we are basically
954480093f4SDimitry Andric     // counting the number of values that are matching. So even if all scores
955480093f4SDimitry Andric     // are set to 1, we would still get a decent matching result.
956480093f4SDimitry Andric     // However, sometimes we have to break ties. For example we may have to
957480093f4SDimitry Andric     // choose between matching loads vs matching opcodes. This is what these
958480093f4SDimitry Andric     // scores are helping us with: they provide the order of preference.
959480093f4SDimitry Andric 
960480093f4SDimitry Andric     /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
961480093f4SDimitry Andric     static const int ScoreConsecutiveLoads = 3;
962480093f4SDimitry Andric     /// ExtractElementInst from same vector and consecutive indexes.
963480093f4SDimitry Andric     static const int ScoreConsecutiveExtracts = 3;
964480093f4SDimitry Andric     /// Constants.
965480093f4SDimitry Andric     static const int ScoreConstants = 2;
966480093f4SDimitry Andric     /// Instructions with the same opcode.
967480093f4SDimitry Andric     static const int ScoreSameOpcode = 2;
968480093f4SDimitry Andric     /// Instructions with alt opcodes (e.g, add + sub).
969480093f4SDimitry Andric     static const int ScoreAltOpcodes = 1;
970480093f4SDimitry Andric     /// Identical instructions (a.k.a. splat or broadcast).
971480093f4SDimitry Andric     static const int ScoreSplat = 1;
972480093f4SDimitry Andric     /// Matching with an undef is preferable to failing.
973480093f4SDimitry Andric     static const int ScoreUndef = 1;
974480093f4SDimitry Andric     /// Score for failing to find a decent match.
975480093f4SDimitry Andric     static const int ScoreFail = 0;
976480093f4SDimitry Andric     /// User exteranl to the vectorized code.
977480093f4SDimitry Andric     static const int ExternalUseCost = 1;
978480093f4SDimitry Andric     /// The user is internal but in a different lane.
979480093f4SDimitry Andric     static const int UserInDiffLaneCost = ExternalUseCost;
980480093f4SDimitry Andric 
981480093f4SDimitry Andric     /// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
getShallowScore(Value * V1,Value * V2,const DataLayout & DL,ScalarEvolution & SE)982480093f4SDimitry Andric     static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
983480093f4SDimitry Andric                                ScalarEvolution &SE) {
984480093f4SDimitry Andric       auto *LI1 = dyn_cast<LoadInst>(V1);
985480093f4SDimitry Andric       auto *LI2 = dyn_cast<LoadInst>(V2);
9865f7ddb14SDimitry Andric       if (LI1 && LI2) {
9875f7ddb14SDimitry Andric         if (LI1->getParent() != LI2->getParent())
9885f7ddb14SDimitry Andric           return VLOperands::ScoreFail;
9895f7ddb14SDimitry Andric 
9905f7ddb14SDimitry Andric         Optional<int> Dist = getPointersDiff(
9915f7ddb14SDimitry Andric             LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
9925f7ddb14SDimitry Andric             LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
9935f7ddb14SDimitry Andric         return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
994480093f4SDimitry Andric                                     : VLOperands::ScoreFail;
9955f7ddb14SDimitry Andric       }
996480093f4SDimitry Andric 
997480093f4SDimitry Andric       auto *C1 = dyn_cast<Constant>(V1);
998480093f4SDimitry Andric       auto *C2 = dyn_cast<Constant>(V2);
999480093f4SDimitry Andric       if (C1 && C2)
1000480093f4SDimitry Andric         return VLOperands::ScoreConstants;
1001480093f4SDimitry Andric 
1002480093f4SDimitry Andric       // Extracts from consecutive indexes of the same vector better score as
1003480093f4SDimitry Andric       // the extracts could be optimized away.
100447395794SDimitry Andric       Value *EV;
100547395794SDimitry Andric       ConstantInt *Ex1Idx, *Ex2Idx;
10065ffd83dbSDimitry Andric       if (match(V1, m_ExtractElt(m_Value(EV), m_ConstantInt(Ex1Idx))) &&
10075ffd83dbSDimitry Andric           match(V2, m_ExtractElt(m_Deferred(EV), m_ConstantInt(Ex2Idx))) &&
100847395794SDimitry Andric           Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue())
1009480093f4SDimitry Andric         return VLOperands::ScoreConsecutiveExtracts;
1010480093f4SDimitry Andric 
1011480093f4SDimitry Andric       auto *I1 = dyn_cast<Instruction>(V1);
1012480093f4SDimitry Andric       auto *I2 = dyn_cast<Instruction>(V2);
1013480093f4SDimitry Andric       if (I1 && I2) {
1014480093f4SDimitry Andric         if (I1 == I2)
1015480093f4SDimitry Andric           return VLOperands::ScoreSplat;
1016480093f4SDimitry Andric         InstructionsState S = getSameOpcode({I1, I2});
1017480093f4SDimitry Andric         // Note: Only consider instructions with <= 2 operands to avoid
1018480093f4SDimitry Andric         // complexity explosion.
1019480093f4SDimitry Andric         if (S.getOpcode() && S.MainOp->getNumOperands() <= 2)
1020480093f4SDimitry Andric           return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
1021480093f4SDimitry Andric                                   : VLOperands::ScoreSameOpcode;
1022480093f4SDimitry Andric       }
1023480093f4SDimitry Andric 
1024480093f4SDimitry Andric       if (isa<UndefValue>(V2))
1025480093f4SDimitry Andric         return VLOperands::ScoreUndef;
1026480093f4SDimitry Andric 
1027480093f4SDimitry Andric       return VLOperands::ScoreFail;
1028480093f4SDimitry Andric     }
1029480093f4SDimitry Andric 
1030480093f4SDimitry Andric     /// Holds the values and their lane that are taking part in the look-ahead
1031480093f4SDimitry Andric     /// score calculation. This is used in the external uses cost calculation.
1032480093f4SDimitry Andric     SmallDenseMap<Value *, int> InLookAheadValues;
1033480093f4SDimitry Andric 
1034480093f4SDimitry Andric     /// \Returns the additinal cost due to uses of \p LHS and \p RHS that are
1035480093f4SDimitry Andric     /// either external to the vectorized code, or require shuffling.
getExternalUsesCost(const std::pair<Value *,int> & LHS,const std::pair<Value *,int> & RHS)1036480093f4SDimitry Andric     int getExternalUsesCost(const std::pair<Value *, int> &LHS,
1037480093f4SDimitry Andric                             const std::pair<Value *, int> &RHS) {
1038480093f4SDimitry Andric       int Cost = 0;
10395ffd83dbSDimitry Andric       std::array<std::pair<Value *, int>, 2> Values = {{LHS, RHS}};
1040480093f4SDimitry Andric       for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
1041480093f4SDimitry Andric         Value *V = Values[Idx].first;
1042af732203SDimitry Andric         if (isa<Constant>(V)) {
1043af732203SDimitry Andric           // Since this is a function pass, it doesn't make semantic sense to
1044af732203SDimitry Andric           // walk the users of a subclass of Constant. The users could be in
1045af732203SDimitry Andric           // another function, or even another module that happens to be in
1046af732203SDimitry Andric           // the same LLVMContext.
1047af732203SDimitry Andric           continue;
1048af732203SDimitry Andric         }
1049af732203SDimitry Andric 
1050480093f4SDimitry Andric         // Calculate the absolute lane, using the minimum relative lane of LHS
1051480093f4SDimitry Andric         // and RHS as base and Idx as the offset.
1052480093f4SDimitry Andric         int Ln = std::min(LHS.second, RHS.second) + Idx;
1053480093f4SDimitry Andric         assert(Ln >= 0 && "Bad lane calculation");
1054480093f4SDimitry Andric         unsigned UsersBudget = LookAheadUsersBudget;
1055480093f4SDimitry Andric         for (User *U : V->users()) {
1056480093f4SDimitry Andric           if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
1057480093f4SDimitry Andric             // The user is in the VectorizableTree. Check if we need to insert.
1058480093f4SDimitry Andric             auto It = llvm::find(UserTE->Scalars, U);
1059480093f4SDimitry Andric             assert(It != UserTE->Scalars.end() && "U is in UserTE");
1060480093f4SDimitry Andric             int UserLn = std::distance(UserTE->Scalars.begin(), It);
1061480093f4SDimitry Andric             assert(UserLn >= 0 && "Bad lane");
1062480093f4SDimitry Andric             if (UserLn != Ln)
1063480093f4SDimitry Andric               Cost += UserInDiffLaneCost;
1064480093f4SDimitry Andric           } else {
1065480093f4SDimitry Andric             // Check if the user is in the look-ahead code.
1066480093f4SDimitry Andric             auto It2 = InLookAheadValues.find(U);
1067480093f4SDimitry Andric             if (It2 != InLookAheadValues.end()) {
1068480093f4SDimitry Andric               // The user is in the look-ahead code. Check the lane.
1069480093f4SDimitry Andric               if (It2->second != Ln)
1070480093f4SDimitry Andric                 Cost += UserInDiffLaneCost;
1071480093f4SDimitry Andric             } else {
1072480093f4SDimitry Andric               // The user is neither in SLP tree nor in the look-ahead code.
1073480093f4SDimitry Andric               Cost += ExternalUseCost;
1074480093f4SDimitry Andric             }
1075480093f4SDimitry Andric           }
1076480093f4SDimitry Andric           // Limit the number of visited uses to cap compilation time.
1077480093f4SDimitry Andric           if (--UsersBudget == 0)
1078480093f4SDimitry Andric             break;
1079480093f4SDimitry Andric         }
1080480093f4SDimitry Andric       }
1081480093f4SDimitry Andric       return Cost;
1082480093f4SDimitry Andric     }
1083480093f4SDimitry Andric 
1084480093f4SDimitry Andric     /// Go through the operands of \p LHS and \p RHS recursively until \p
1085480093f4SDimitry Andric     /// MaxLevel, and return the cummulative score. For example:
1086480093f4SDimitry Andric     /// \verbatim
1087480093f4SDimitry Andric     ///  A[0]  B[0]  A[1]  B[1]  C[0] D[0]  B[1] A[1]
1088480093f4SDimitry Andric     ///     \ /         \ /         \ /        \ /
1089480093f4SDimitry Andric     ///      +           +           +          +
1090480093f4SDimitry Andric     ///     G1          G2          G3         G4
1091480093f4SDimitry Andric     /// \endverbatim
1092480093f4SDimitry Andric     /// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at
1093480093f4SDimitry Andric     /// each level recursively, accumulating the score. It starts from matching
1094480093f4SDimitry Andric     /// the additions at level 0, then moves on to the loads (level 1). The
1095480093f4SDimitry Andric     /// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
1096480093f4SDimitry Andric     /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
1097480093f4SDimitry Andric     /// {A[0],C[0]} has a score of VLOperands::ScoreFail.
1098480093f4SDimitry Andric     /// Please note that the order of the operands does not matter, as we
1099480093f4SDimitry Andric     /// evaluate the score of all profitable combinations of operands. In
1100480093f4SDimitry Andric     /// other words the score of G1 and G4 is the same as G1 and G2. This
1101480093f4SDimitry Andric     /// heuristic is based on ideas described in:
1102480093f4SDimitry Andric     ///   Look-ahead SLP: Auto-vectorization in the presence of commutative
1103480093f4SDimitry Andric     ///   operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
1104480093f4SDimitry Andric     ///   Luís F. W. Góes
getScoreAtLevelRec(const std::pair<Value *,int> & LHS,const std::pair<Value *,int> & RHS,int CurrLevel,int MaxLevel)1105480093f4SDimitry Andric     int getScoreAtLevelRec(const std::pair<Value *, int> &LHS,
1106480093f4SDimitry Andric                            const std::pair<Value *, int> &RHS, int CurrLevel,
1107480093f4SDimitry Andric                            int MaxLevel) {
1108480093f4SDimitry Andric 
1109480093f4SDimitry Andric       Value *V1 = LHS.first;
1110480093f4SDimitry Andric       Value *V2 = RHS.first;
1111480093f4SDimitry Andric       // Get the shallow score of V1 and V2.
1112480093f4SDimitry Andric       int ShallowScoreAtThisLevel =
1113480093f4SDimitry Andric           std::max((int)ScoreFail, getShallowScore(V1, V2, DL, SE) -
1114480093f4SDimitry Andric                                        getExternalUsesCost(LHS, RHS));
1115480093f4SDimitry Andric       int Lane1 = LHS.second;
1116480093f4SDimitry Andric       int Lane2 = RHS.second;
1117480093f4SDimitry Andric 
1118480093f4SDimitry Andric       // If reached MaxLevel,
1119480093f4SDimitry Andric       //  or if V1 and V2 are not instructions,
1120480093f4SDimitry Andric       //  or if they are SPLAT,
1121480093f4SDimitry Andric       //  or if they are not consecutive, early return the current cost.
1122480093f4SDimitry Andric       auto *I1 = dyn_cast<Instruction>(V1);
1123480093f4SDimitry Andric       auto *I2 = dyn_cast<Instruction>(V2);
1124480093f4SDimitry Andric       if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
1125480093f4SDimitry Andric           ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
1126480093f4SDimitry Andric           (isa<LoadInst>(I1) && isa<LoadInst>(I2) && ShallowScoreAtThisLevel))
1127480093f4SDimitry Andric         return ShallowScoreAtThisLevel;
1128480093f4SDimitry Andric       assert(I1 && I2 && "Should have early exited.");
1129480093f4SDimitry Andric 
1130480093f4SDimitry Andric       // Keep track of in-tree values for determining the external-use cost.
1131480093f4SDimitry Andric       InLookAheadValues[V1] = Lane1;
1132480093f4SDimitry Andric       InLookAheadValues[V2] = Lane2;
1133480093f4SDimitry Andric 
1134480093f4SDimitry Andric       // Contains the I2 operand indexes that got matched with I1 operands.
1135480093f4SDimitry Andric       SmallSet<unsigned, 4> Op2Used;
1136480093f4SDimitry Andric 
1137480093f4SDimitry Andric       // Recursion towards the operands of I1 and I2. We are trying all possbile
1138480093f4SDimitry Andric       // operand pairs, and keeping track of the best score.
1139480093f4SDimitry Andric       for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
1140480093f4SDimitry Andric            OpIdx1 != NumOperands1; ++OpIdx1) {
1141480093f4SDimitry Andric         // Try to pair op1I with the best operand of I2.
1142480093f4SDimitry Andric         int MaxTmpScore = 0;
1143480093f4SDimitry Andric         unsigned MaxOpIdx2 = 0;
1144480093f4SDimitry Andric         bool FoundBest = false;
1145480093f4SDimitry Andric         // If I2 is commutative try all combinations.
1146480093f4SDimitry Andric         unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1;
1147480093f4SDimitry Andric         unsigned ToIdx = isCommutative(I2)
1148480093f4SDimitry Andric                              ? I2->getNumOperands()
1149480093f4SDimitry Andric                              : std::min(I2->getNumOperands(), OpIdx1 + 1);
1150480093f4SDimitry Andric         assert(FromIdx <= ToIdx && "Bad index");
1151480093f4SDimitry Andric         for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) {
1152480093f4SDimitry Andric           // Skip operands already paired with OpIdx1.
1153480093f4SDimitry Andric           if (Op2Used.count(OpIdx2))
1154480093f4SDimitry Andric             continue;
1155480093f4SDimitry Andric           // Recursively calculate the cost at each level
1156480093f4SDimitry Andric           int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1},
1157480093f4SDimitry Andric                                             {I2->getOperand(OpIdx2), Lane2},
1158480093f4SDimitry Andric                                             CurrLevel + 1, MaxLevel);
1159480093f4SDimitry Andric           // Look for the best score.
1160480093f4SDimitry Andric           if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
1161480093f4SDimitry Andric             MaxTmpScore = TmpScore;
1162480093f4SDimitry Andric             MaxOpIdx2 = OpIdx2;
1163480093f4SDimitry Andric             FoundBest = true;
1164480093f4SDimitry Andric           }
1165480093f4SDimitry Andric         }
1166480093f4SDimitry Andric         if (FoundBest) {
1167480093f4SDimitry Andric           // Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it.
1168480093f4SDimitry Andric           Op2Used.insert(MaxOpIdx2);
1169480093f4SDimitry Andric           ShallowScoreAtThisLevel += MaxTmpScore;
1170480093f4SDimitry Andric         }
1171480093f4SDimitry Andric       }
1172480093f4SDimitry Andric       return ShallowScoreAtThisLevel;
1173480093f4SDimitry Andric     }
1174480093f4SDimitry Andric 
1175480093f4SDimitry Andric     /// \Returns the look-ahead score, which tells us how much the sub-trees
1176480093f4SDimitry Andric     /// rooted at \p LHS and \p RHS match, the more they match the higher the
1177480093f4SDimitry Andric     /// score. This helps break ties in an informed way when we cannot decide on
1178480093f4SDimitry Andric     /// the order of the operands by just considering the immediate
1179480093f4SDimitry Andric     /// predecessors.
getLookAheadScore(const std::pair<Value *,int> & LHS,const std::pair<Value *,int> & RHS)1180480093f4SDimitry Andric     int getLookAheadScore(const std::pair<Value *, int> &LHS,
1181480093f4SDimitry Andric                           const std::pair<Value *, int> &RHS) {
1182480093f4SDimitry Andric       InLookAheadValues.clear();
1183480093f4SDimitry Andric       return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth);
1184480093f4SDimitry Andric     }
1185480093f4SDimitry Andric 
11860b57cec5SDimitry Andric     // Search all operands in Ops[*][Lane] for the one that matches best
11870b57cec5SDimitry Andric     // Ops[OpIdx][LastLane] and return its opreand index.
11880b57cec5SDimitry Andric     // If no good match can be found, return None.
11890b57cec5SDimitry Andric     Optional<unsigned>
getBestOperand(unsigned OpIdx,int Lane,int LastLane,ArrayRef<ReorderingMode> ReorderingModes)11900b57cec5SDimitry Andric     getBestOperand(unsigned OpIdx, int Lane, int LastLane,
11910b57cec5SDimitry Andric                    ArrayRef<ReorderingMode> ReorderingModes) {
11920b57cec5SDimitry Andric       unsigned NumOperands = getNumOperands();
11930b57cec5SDimitry Andric 
11940b57cec5SDimitry Andric       // The operand of the previous lane at OpIdx.
11950b57cec5SDimitry Andric       Value *OpLastLane = getData(OpIdx, LastLane).V;
11960b57cec5SDimitry Andric 
11970b57cec5SDimitry Andric       // Our strategy mode for OpIdx.
11980b57cec5SDimitry Andric       ReorderingMode RMode = ReorderingModes[OpIdx];
11990b57cec5SDimitry Andric 
12000b57cec5SDimitry Andric       // The linearized opcode of the operand at OpIdx, Lane.
12010b57cec5SDimitry Andric       bool OpIdxAPO = getData(OpIdx, Lane).APO;
12020b57cec5SDimitry Andric 
12030b57cec5SDimitry Andric       // The best operand index and its score.
12040b57cec5SDimitry Andric       // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
12050b57cec5SDimitry Andric       // are using the score to differentiate between the two.
12060b57cec5SDimitry Andric       struct BestOpData {
12070b57cec5SDimitry Andric         Optional<unsigned> Idx = None;
12080b57cec5SDimitry Andric         unsigned Score = 0;
12090b57cec5SDimitry Andric       } BestOp;
12100b57cec5SDimitry Andric 
12110b57cec5SDimitry Andric       // Iterate through all unused operands and look for the best.
12120b57cec5SDimitry Andric       for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
12130b57cec5SDimitry Andric         // Get the operand at Idx and Lane.
12140b57cec5SDimitry Andric         OperandData &OpData = getData(Idx, Lane);
12150b57cec5SDimitry Andric         Value *Op = OpData.V;
12160b57cec5SDimitry Andric         bool OpAPO = OpData.APO;
12170b57cec5SDimitry Andric 
12180b57cec5SDimitry Andric         // Skip already selected operands.
12190b57cec5SDimitry Andric         if (OpData.IsUsed)
12200b57cec5SDimitry Andric           continue;
12210b57cec5SDimitry Andric 
12220b57cec5SDimitry Andric         // Skip if we are trying to move the operand to a position with a
12230b57cec5SDimitry Andric         // different opcode in the linearized tree form. This would break the
12240b57cec5SDimitry Andric         // semantics.
12250b57cec5SDimitry Andric         if (OpAPO != OpIdxAPO)
12260b57cec5SDimitry Andric           continue;
12270b57cec5SDimitry Andric 
12280b57cec5SDimitry Andric         // Look for an operand that matches the current mode.
12290b57cec5SDimitry Andric         switch (RMode) {
12300b57cec5SDimitry Andric         case ReorderingMode::Load:
1231480093f4SDimitry Andric         case ReorderingMode::Constant:
1232480093f4SDimitry Andric         case ReorderingMode::Opcode: {
12330b57cec5SDimitry Andric           bool LeftToRight = Lane > LastLane;
12340b57cec5SDimitry Andric           Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
12350b57cec5SDimitry Andric           Value *OpRight = (LeftToRight) ? Op : OpLastLane;
1236480093f4SDimitry Andric           unsigned Score =
1237480093f4SDimitry Andric               getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane});
12380b57cec5SDimitry Andric           if (Score > BestOp.Score) {
12390b57cec5SDimitry Andric             BestOp.Idx = Idx;
12400b57cec5SDimitry Andric             BestOp.Score = Score;
12410b57cec5SDimitry Andric           }
12420b57cec5SDimitry Andric           break;
12430b57cec5SDimitry Andric         }
12440b57cec5SDimitry Andric         case ReorderingMode::Splat:
12450b57cec5SDimitry Andric           if (Op == OpLastLane)
12460b57cec5SDimitry Andric             BestOp.Idx = Idx;
12470b57cec5SDimitry Andric           break;
12480b57cec5SDimitry Andric         case ReorderingMode::Failed:
12490b57cec5SDimitry Andric           return None;
12500b57cec5SDimitry Andric         }
12510b57cec5SDimitry Andric       }
12520b57cec5SDimitry Andric 
12530b57cec5SDimitry Andric       if (BestOp.Idx) {
12540b57cec5SDimitry Andric         getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
12550b57cec5SDimitry Andric         return BestOp.Idx;
12560b57cec5SDimitry Andric       }
12570b57cec5SDimitry Andric       // If we could not find a good match return None.
12580b57cec5SDimitry Andric       return None;
12590b57cec5SDimitry Andric     }
12600b57cec5SDimitry Andric 
12610b57cec5SDimitry Andric     /// Helper for reorderOperandVecs. \Returns the lane that we should start
12620b57cec5SDimitry Andric     /// reordering from. This is the one which has the least number of operands
12630b57cec5SDimitry Andric     /// that can freely move about.
getBestLaneToStartReordering() const12640b57cec5SDimitry Andric     unsigned getBestLaneToStartReordering() const {
12650b57cec5SDimitry Andric       unsigned BestLane = 0;
12660b57cec5SDimitry Andric       unsigned Min = UINT_MAX;
12670b57cec5SDimitry Andric       for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
12680b57cec5SDimitry Andric            ++Lane) {
12690b57cec5SDimitry Andric         unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
12700b57cec5SDimitry Andric         if (NumFreeOps < Min) {
12710b57cec5SDimitry Andric           Min = NumFreeOps;
12720b57cec5SDimitry Andric           BestLane = Lane;
12730b57cec5SDimitry Andric         }
12740b57cec5SDimitry Andric       }
12750b57cec5SDimitry Andric       return BestLane;
12760b57cec5SDimitry Andric     }
12770b57cec5SDimitry Andric 
12780b57cec5SDimitry Andric     /// \Returns the maximum number of operands that are allowed to be reordered
12790b57cec5SDimitry Andric     /// for \p Lane. This is used as a heuristic for selecting the first lane to
12800b57cec5SDimitry Andric     /// start operand reordering.
getMaxNumOperandsThatCanBeReordered(unsigned Lane) const12810b57cec5SDimitry Andric     unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
12820b57cec5SDimitry Andric       unsigned CntTrue = 0;
12830b57cec5SDimitry Andric       unsigned NumOperands = getNumOperands();
12840b57cec5SDimitry Andric       // Operands with the same APO can be reordered. We therefore need to count
12850b57cec5SDimitry Andric       // how many of them we have for each APO, like this: Cnt[APO] = x.
12860b57cec5SDimitry Andric       // Since we only have two APOs, namely true and false, we can avoid using
12870b57cec5SDimitry Andric       // a map. Instead we can simply count the number of operands that
12880b57cec5SDimitry Andric       // correspond to one of them (in this case the 'true' APO), and calculate
12890b57cec5SDimitry Andric       // the other by subtracting it from the total number of operands.
12900b57cec5SDimitry Andric       for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
12910b57cec5SDimitry Andric         if (getData(OpIdx, Lane).APO)
12920b57cec5SDimitry Andric           ++CntTrue;
12930b57cec5SDimitry Andric       unsigned CntFalse = NumOperands - CntTrue;
12940b57cec5SDimitry Andric       return std::max(CntTrue, CntFalse);
12950b57cec5SDimitry Andric     }
12960b57cec5SDimitry Andric 
12970b57cec5SDimitry Andric     /// Go through the instructions in VL and append their operands.
appendOperandsOfVL(ArrayRef<Value * > VL)12980b57cec5SDimitry Andric     void appendOperandsOfVL(ArrayRef<Value *> VL) {
12990b57cec5SDimitry Andric       assert(!VL.empty() && "Bad VL");
13000b57cec5SDimitry Andric       assert((empty() || VL.size() == getNumLanes()) &&
13010b57cec5SDimitry Andric              "Expected same number of lanes");
13020b57cec5SDimitry Andric       assert(isa<Instruction>(VL[0]) && "Expected instruction");
13030b57cec5SDimitry Andric       unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
13040b57cec5SDimitry Andric       OpsVec.resize(NumOperands);
13050b57cec5SDimitry Andric       unsigned NumLanes = VL.size();
13060b57cec5SDimitry Andric       for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
13070b57cec5SDimitry Andric         OpsVec[OpIdx].resize(NumLanes);
13080b57cec5SDimitry Andric         for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
13090b57cec5SDimitry Andric           assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
13100b57cec5SDimitry Andric           // Our tree has just 3 nodes: the root and two operands.
13110b57cec5SDimitry Andric           // It is therefore trivial to get the APO. We only need to check the
13120b57cec5SDimitry Andric           // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
13130b57cec5SDimitry Andric           // RHS operand. The LHS operand of both add and sub is never attached
13140b57cec5SDimitry Andric           // to an inversese operation in the linearized form, therefore its APO
13150b57cec5SDimitry Andric           // is false. The RHS is true only if VL[Lane] is an inverse operation.
13160b57cec5SDimitry Andric 
13170b57cec5SDimitry Andric           // Since operand reordering is performed on groups of commutative
13180b57cec5SDimitry Andric           // operations or alternating sequences (e.g., +, -), we can safely
13190b57cec5SDimitry Andric           // tell the inverse operations by checking commutativity.
13200b57cec5SDimitry Andric           bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
13210b57cec5SDimitry Andric           bool APO = (OpIdx == 0) ? false : IsInverseOperation;
13220b57cec5SDimitry Andric           OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
13230b57cec5SDimitry Andric                                  APO, false};
13240b57cec5SDimitry Andric         }
13250b57cec5SDimitry Andric       }
13260b57cec5SDimitry Andric     }
13270b57cec5SDimitry Andric 
13280b57cec5SDimitry Andric     /// \returns the number of operands.
getNumOperands() const13290b57cec5SDimitry Andric     unsigned getNumOperands() const { return OpsVec.size(); }
13300b57cec5SDimitry Andric 
13310b57cec5SDimitry Andric     /// \returns the number of lanes.
getNumLanes() const13320b57cec5SDimitry Andric     unsigned getNumLanes() const { return OpsVec[0].size(); }
13330b57cec5SDimitry Andric 
13340b57cec5SDimitry Andric     /// \returns the operand value at \p OpIdx and \p Lane.
getValue(unsigned OpIdx,unsigned Lane) const13350b57cec5SDimitry Andric     Value *getValue(unsigned OpIdx, unsigned Lane) const {
13360b57cec5SDimitry Andric       return getData(OpIdx, Lane).V;
13370b57cec5SDimitry Andric     }
13380b57cec5SDimitry Andric 
13390b57cec5SDimitry Andric     /// \returns true if the data structure is empty.
empty() const13400b57cec5SDimitry Andric     bool empty() const { return OpsVec.empty(); }
13410b57cec5SDimitry Andric 
13420b57cec5SDimitry Andric     /// Clears the data.
clear()13430b57cec5SDimitry Andric     void clear() { OpsVec.clear(); }
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric     /// \Returns true if there are enough operands identical to \p Op to fill
13460b57cec5SDimitry Andric     /// the whole vector.
13470b57cec5SDimitry Andric     /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
shouldBroadcast(Value * Op,unsigned OpIdx,unsigned Lane)13480b57cec5SDimitry Andric     bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
13490b57cec5SDimitry Andric       bool OpAPO = getData(OpIdx, Lane).APO;
13500b57cec5SDimitry Andric       for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
13510b57cec5SDimitry Andric         if (Ln == Lane)
13520b57cec5SDimitry Andric           continue;
13530b57cec5SDimitry Andric         // This is set to true if we found a candidate for broadcast at Lane.
13540b57cec5SDimitry Andric         bool FoundCandidate = false;
13550b57cec5SDimitry Andric         for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
13560b57cec5SDimitry Andric           OperandData &Data = getData(OpI, Ln);
13570b57cec5SDimitry Andric           if (Data.APO != OpAPO || Data.IsUsed)
13580b57cec5SDimitry Andric             continue;
13590b57cec5SDimitry Andric           if (Data.V == Op) {
13600b57cec5SDimitry Andric             FoundCandidate = true;
13610b57cec5SDimitry Andric             Data.IsUsed = true;
13620b57cec5SDimitry Andric             break;
13630b57cec5SDimitry Andric           }
13640b57cec5SDimitry Andric         }
13650b57cec5SDimitry Andric         if (!FoundCandidate)
13660b57cec5SDimitry Andric           return false;
13670b57cec5SDimitry Andric       }
13680b57cec5SDimitry Andric       return true;
13690b57cec5SDimitry Andric     }
13700b57cec5SDimitry Andric 
13710b57cec5SDimitry Andric   public:
13720b57cec5SDimitry Andric     /// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value * > RootVL,const DataLayout & DL,ScalarEvolution & SE,const BoUpSLP & R)13730b57cec5SDimitry Andric     VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
1374480093f4SDimitry Andric                ScalarEvolution &SE, const BoUpSLP &R)
1375480093f4SDimitry Andric         : DL(DL), SE(SE), R(R) {
13760b57cec5SDimitry Andric       // Append all the operands of RootVL.
13770b57cec5SDimitry Andric       appendOperandsOfVL(RootVL);
13780b57cec5SDimitry Andric     }
13790b57cec5SDimitry Andric 
13800b57cec5SDimitry Andric     /// \Returns a value vector with the operands across all lanes for the
13810b57cec5SDimitry Andric     /// opearnd at \p OpIdx.
getVL(unsigned OpIdx) const13820b57cec5SDimitry Andric     ValueList getVL(unsigned OpIdx) const {
13830b57cec5SDimitry Andric       ValueList OpVL(OpsVec[OpIdx].size());
13840b57cec5SDimitry Andric       assert(OpsVec[OpIdx].size() == getNumLanes() &&
13850b57cec5SDimitry Andric              "Expected same num of lanes across all operands");
13860b57cec5SDimitry Andric       for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
13870b57cec5SDimitry Andric         OpVL[Lane] = OpsVec[OpIdx][Lane].V;
13880b57cec5SDimitry Andric       return OpVL;
13890b57cec5SDimitry Andric     }
13900b57cec5SDimitry Andric 
13910b57cec5SDimitry Andric     // Performs operand reordering for 2 or more operands.
13920b57cec5SDimitry Andric     // The original operands are in OrigOps[OpIdx][Lane].
13930b57cec5SDimitry Andric     // The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
reorder()13940b57cec5SDimitry Andric     void reorder() {
13950b57cec5SDimitry Andric       unsigned NumOperands = getNumOperands();
13960b57cec5SDimitry Andric       unsigned NumLanes = getNumLanes();
13970b57cec5SDimitry Andric       // Each operand has its own mode. We are using this mode to help us select
13980b57cec5SDimitry Andric       // the instructions for each lane, so that they match best with the ones
13990b57cec5SDimitry Andric       // we have selected so far.
14000b57cec5SDimitry Andric       SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
14010b57cec5SDimitry Andric 
14020b57cec5SDimitry Andric       // This is a greedy single-pass algorithm. We are going over each lane
14030b57cec5SDimitry Andric       // once and deciding on the best order right away with no back-tracking.
14040b57cec5SDimitry Andric       // However, in order to increase its effectiveness, we start with the lane
14050b57cec5SDimitry Andric       // that has operands that can move the least. For example, given the
14060b57cec5SDimitry Andric       // following lanes:
14070b57cec5SDimitry Andric       //  Lane 0 : A[0] = B[0] + C[0]   // Visited 3rd
14080b57cec5SDimitry Andric       //  Lane 1 : A[1] = C[1] - B[1]   // Visited 1st
14090b57cec5SDimitry Andric       //  Lane 2 : A[2] = B[2] + C[2]   // Visited 2nd
14100b57cec5SDimitry Andric       //  Lane 3 : A[3] = C[3] - B[3]   // Visited 4th
14110b57cec5SDimitry Andric       // we will start at Lane 1, since the operands of the subtraction cannot
14120b57cec5SDimitry Andric       // be reordered. Then we will visit the rest of the lanes in a circular
14130b57cec5SDimitry Andric       // fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
14140b57cec5SDimitry Andric 
14150b57cec5SDimitry Andric       // Find the first lane that we will start our search from.
14160b57cec5SDimitry Andric       unsigned FirstLane = getBestLaneToStartReordering();
14170b57cec5SDimitry Andric 
14180b57cec5SDimitry Andric       // Initialize the modes.
14190b57cec5SDimitry Andric       for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
14200b57cec5SDimitry Andric         Value *OpLane0 = getValue(OpIdx, FirstLane);
14210b57cec5SDimitry Andric         // Keep track if we have instructions with all the same opcode on one
14220b57cec5SDimitry Andric         // side.
14230b57cec5SDimitry Andric         if (isa<LoadInst>(OpLane0))
14240b57cec5SDimitry Andric           ReorderingModes[OpIdx] = ReorderingMode::Load;
14250b57cec5SDimitry Andric         else if (isa<Instruction>(OpLane0)) {
14260b57cec5SDimitry Andric           // Check if OpLane0 should be broadcast.
14270b57cec5SDimitry Andric           if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
14280b57cec5SDimitry Andric             ReorderingModes[OpIdx] = ReorderingMode::Splat;
14290b57cec5SDimitry Andric           else
14300b57cec5SDimitry Andric             ReorderingModes[OpIdx] = ReorderingMode::Opcode;
14310b57cec5SDimitry Andric         }
14320b57cec5SDimitry Andric         else if (isa<Constant>(OpLane0))
14330b57cec5SDimitry Andric           ReorderingModes[OpIdx] = ReorderingMode::Constant;
14340b57cec5SDimitry Andric         else if (isa<Argument>(OpLane0))
14350b57cec5SDimitry Andric           // Our best hope is a Splat. It may save some cost in some cases.
14360b57cec5SDimitry Andric           ReorderingModes[OpIdx] = ReorderingMode::Splat;
14370b57cec5SDimitry Andric         else
14380b57cec5SDimitry Andric           // NOTE: This should be unreachable.
14390b57cec5SDimitry Andric           ReorderingModes[OpIdx] = ReorderingMode::Failed;
14400b57cec5SDimitry Andric       }
14410b57cec5SDimitry Andric 
14420b57cec5SDimitry Andric       // If the initial strategy fails for any of the operand indexes, then we
14430b57cec5SDimitry Andric       // perform reordering again in a second pass. This helps avoid assigning
14440b57cec5SDimitry Andric       // high priority to the failed strategy, and should improve reordering for
14450b57cec5SDimitry Andric       // the non-failed operand indexes.
14460b57cec5SDimitry Andric       for (int Pass = 0; Pass != 2; ++Pass) {
14470b57cec5SDimitry Andric         // Skip the second pass if the first pass did not fail.
14480b57cec5SDimitry Andric         bool StrategyFailed = false;
14490b57cec5SDimitry Andric         // Mark all operand data as free to use.
14500b57cec5SDimitry Andric         clearUsed();
14510b57cec5SDimitry Andric         // We keep the original operand order for the FirstLane, so reorder the
14520b57cec5SDimitry Andric         // rest of the lanes. We are visiting the nodes in a circular fashion,
14530b57cec5SDimitry Andric         // using FirstLane as the center point and increasing the radius
14540b57cec5SDimitry Andric         // distance.
14550b57cec5SDimitry Andric         for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
14560b57cec5SDimitry Andric           // Visit the lane on the right and then the lane on the left.
14570b57cec5SDimitry Andric           for (int Direction : {+1, -1}) {
14580b57cec5SDimitry Andric             int Lane = FirstLane + Direction * Distance;
14590b57cec5SDimitry Andric             if (Lane < 0 || Lane >= (int)NumLanes)
14600b57cec5SDimitry Andric               continue;
14610b57cec5SDimitry Andric             int LastLane = Lane - Direction;
14620b57cec5SDimitry Andric             assert(LastLane >= 0 && LastLane < (int)NumLanes &&
14630b57cec5SDimitry Andric                    "Out of bounds");
14640b57cec5SDimitry Andric             // Look for a good match for each operand.
14650b57cec5SDimitry Andric             for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
14660b57cec5SDimitry Andric               // Search for the operand that matches SortedOps[OpIdx][Lane-1].
14670b57cec5SDimitry Andric               Optional<unsigned> BestIdx =
14680b57cec5SDimitry Andric                   getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
14690b57cec5SDimitry Andric               // By not selecting a value, we allow the operands that follow to
14700b57cec5SDimitry Andric               // select a better matching value. We will get a non-null value in
14710b57cec5SDimitry Andric               // the next run of getBestOperand().
14720b57cec5SDimitry Andric               if (BestIdx) {
14730b57cec5SDimitry Andric                 // Swap the current operand with the one returned by
14740b57cec5SDimitry Andric                 // getBestOperand().
14750b57cec5SDimitry Andric                 swap(OpIdx, BestIdx.getValue(), Lane);
14760b57cec5SDimitry Andric               } else {
14770b57cec5SDimitry Andric                 // We failed to find a best operand, set mode to 'Failed'.
14780b57cec5SDimitry Andric                 ReorderingModes[OpIdx] = ReorderingMode::Failed;
14790b57cec5SDimitry Andric                 // Enable the second pass.
14800b57cec5SDimitry Andric                 StrategyFailed = true;
14810b57cec5SDimitry Andric               }
14820b57cec5SDimitry Andric             }
14830b57cec5SDimitry Andric           }
14840b57cec5SDimitry Andric         }
14850b57cec5SDimitry Andric         // Skip second pass if the strategy did not fail.
14860b57cec5SDimitry Andric         if (!StrategyFailed)
14870b57cec5SDimitry Andric           break;
14880b57cec5SDimitry Andric       }
14890b57cec5SDimitry Andric     }
14900b57cec5SDimitry Andric 
14910b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
getModeStr(ReorderingMode RMode)14920b57cec5SDimitry Andric     LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
14930b57cec5SDimitry Andric       switch (RMode) {
14940b57cec5SDimitry Andric       case ReorderingMode::Load:
14950b57cec5SDimitry Andric         return "Load";
14960b57cec5SDimitry Andric       case ReorderingMode::Opcode:
14970b57cec5SDimitry Andric         return "Opcode";
14980b57cec5SDimitry Andric       case ReorderingMode::Constant:
14990b57cec5SDimitry Andric         return "Constant";
15000b57cec5SDimitry Andric       case ReorderingMode::Splat:
15010b57cec5SDimitry Andric         return "Splat";
15020b57cec5SDimitry Andric       case ReorderingMode::Failed:
15030b57cec5SDimitry Andric         return "Failed";
15040b57cec5SDimitry Andric       }
15050b57cec5SDimitry Andric       llvm_unreachable("Unimplemented Reordering Type");
15060b57cec5SDimitry Andric     }
15070b57cec5SDimitry Andric 
printMode(ReorderingMode RMode,raw_ostream & OS)15080b57cec5SDimitry Andric     LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
15090b57cec5SDimitry Andric                                                    raw_ostream &OS) {
15100b57cec5SDimitry Andric       return OS << getModeStr(RMode);
15110b57cec5SDimitry Andric     }
15120b57cec5SDimitry Andric 
15130b57cec5SDimitry Andric     /// Debug print.
dumpMode(ReorderingMode RMode)15140b57cec5SDimitry Andric     LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
15150b57cec5SDimitry Andric       printMode(RMode, dbgs());
15160b57cec5SDimitry Andric     }
15170b57cec5SDimitry Andric 
operator <<(raw_ostream & OS,ReorderingMode RMode)15180b57cec5SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
15190b57cec5SDimitry Andric       return printMode(RMode, OS);
15200b57cec5SDimitry Andric     }
15210b57cec5SDimitry Andric 
print(raw_ostream & OS) const15220b57cec5SDimitry Andric     LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
15230b57cec5SDimitry Andric       const unsigned Indent = 2;
15240b57cec5SDimitry Andric       unsigned Cnt = 0;
15250b57cec5SDimitry Andric       for (const OperandDataVec &OpDataVec : OpsVec) {
15260b57cec5SDimitry Andric         OS << "Operand " << Cnt++ << "\n";
15270b57cec5SDimitry Andric         for (const OperandData &OpData : OpDataVec) {
15280b57cec5SDimitry Andric           OS.indent(Indent) << "{";
15290b57cec5SDimitry Andric           if (Value *V = OpData.V)
15300b57cec5SDimitry Andric             OS << *V;
15310b57cec5SDimitry Andric           else
15320b57cec5SDimitry Andric             OS << "null";
15330b57cec5SDimitry Andric           OS << ", APO:" << OpData.APO << "}\n";
15340b57cec5SDimitry Andric         }
15350b57cec5SDimitry Andric         OS << "\n";
15360b57cec5SDimitry Andric       }
15370b57cec5SDimitry Andric       return OS;
15380b57cec5SDimitry Andric     }
15390b57cec5SDimitry Andric 
15400b57cec5SDimitry Andric     /// Debug print.
dump() const15410b57cec5SDimitry Andric     LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
15420b57cec5SDimitry Andric #endif
15430b57cec5SDimitry Andric   };
15440b57cec5SDimitry Andric 
15458bcb0991SDimitry Andric   /// Checks if the instruction is marked for deletion.
isDeleted(Instruction * I) const15468bcb0991SDimitry Andric   bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
15478bcb0991SDimitry Andric 
15488bcb0991SDimitry Andric   /// Marks values operands for later deletion by replacing them with Undefs.
15498bcb0991SDimitry Andric   void eraseInstructions(ArrayRef<Value *> AV);
15508bcb0991SDimitry Andric 
15518bcb0991SDimitry Andric   ~BoUpSLP();
15528bcb0991SDimitry Andric 
15530b57cec5SDimitry Andric private:
15540b57cec5SDimitry Andric   /// Checks if all users of \p I are the part of the vectorization tree.
15555f7ddb14SDimitry Andric   bool areAllUsersVectorized(Instruction *I,
15565f7ddb14SDimitry Andric                              ArrayRef<Value *> VectorizedVals) const;
15570b57cec5SDimitry Andric 
15580b57cec5SDimitry Andric   /// \returns the cost of the vectorizable entry.
15595f7ddb14SDimitry Andric   InstructionCost getEntryCost(const TreeEntry *E,
15605f7ddb14SDimitry Andric                                ArrayRef<Value *> VectorizedVals);
15610b57cec5SDimitry Andric 
15620b57cec5SDimitry Andric   /// This is the recursive part of buildTree.
15630b57cec5SDimitry Andric   void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
15640b57cec5SDimitry Andric                      const EdgeInfo &EI);
15650b57cec5SDimitry Andric 
15660b57cec5SDimitry Andric   /// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
15670b57cec5SDimitry Andric   /// be vectorized to use the original vector (or aggregate "bitcast" to a
15680b57cec5SDimitry Andric   /// vector) and sets \p CurrentOrder to the identity permutation; otherwise
15690b57cec5SDimitry Andric   /// returns false, setting \p CurrentOrder to either an empty vector or a
15700b57cec5SDimitry Andric   /// non-identity permutation that allows to reuse extract instructions.
15710b57cec5SDimitry Andric   bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
15720b57cec5SDimitry Andric                        SmallVectorImpl<unsigned> &CurrentOrder) const;
15730b57cec5SDimitry Andric 
15740b57cec5SDimitry Andric   /// Vectorize a single entry in the tree.
15750b57cec5SDimitry Andric   Value *vectorizeTree(TreeEntry *E);
15760b57cec5SDimitry Andric 
15770b57cec5SDimitry Andric   /// Vectorize a single entry in the tree, starting in \p VL.
15780b57cec5SDimitry Andric   Value *vectorizeTree(ArrayRef<Value *> VL);
15790b57cec5SDimitry Andric 
15800b57cec5SDimitry Andric   /// \returns the scalarization cost for this type. Scalarization in this
15810b57cec5SDimitry Andric   /// context means the creation of vectors from a group of scalars.
1582af732203SDimitry Andric   InstructionCost
1583af732203SDimitry Andric   getGatherCost(FixedVectorType *Ty,
15845ffd83dbSDimitry Andric                 const DenseSet<unsigned> &ShuffledIndices) const;
15850b57cec5SDimitry Andric 
15865f7ddb14SDimitry Andric   /// Checks if the gathered \p VL can be represented as shuffle(s) of previous
15875f7ddb14SDimitry Andric   /// tree entries.
15885f7ddb14SDimitry Andric   /// \returns ShuffleKind, if gathered values can be represented as shuffles of
15895f7ddb14SDimitry Andric   /// previous tree entries. \p Mask is filled with the shuffle mask.
15905f7ddb14SDimitry Andric   Optional<TargetTransformInfo::ShuffleKind>
15915f7ddb14SDimitry Andric   isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
15925f7ddb14SDimitry Andric                         SmallVectorImpl<const TreeEntry *> &Entries);
15935f7ddb14SDimitry Andric 
15940b57cec5SDimitry Andric   /// \returns the scalarization cost for this list of values. Assuming that
15950b57cec5SDimitry Andric   /// this subtree gets vectorized, we may need to extract the values from the
15960b57cec5SDimitry Andric   /// roots. This method calculates the cost of extracting the values.
1597af732203SDimitry Andric   InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
15980b57cec5SDimitry Andric 
15990b57cec5SDimitry Andric   /// Set the Builder insert point to one after the last instruction in
16000b57cec5SDimitry Andric   /// the bundle
16015f7ddb14SDimitry Andric   void setInsertPointAfterBundle(const TreeEntry *E);
16020b57cec5SDimitry Andric 
16030b57cec5SDimitry Andric   /// \returns a vector from a collection of scalars in \p VL.
1604af732203SDimitry Andric   Value *gather(ArrayRef<Value *> VL);
16050b57cec5SDimitry Andric 
16060b57cec5SDimitry Andric   /// \returns whether the VectorizableTree is fully vectorizable and will
16070b57cec5SDimitry Andric   /// be beneficial even the tree height is tiny.
16080b57cec5SDimitry Andric   bool isFullyVectorizableTinyTree() const;
16090b57cec5SDimitry Andric 
16100b57cec5SDimitry Andric   /// Reorder commutative or alt operands to get better probability of
16110b57cec5SDimitry Andric   /// generating vectorized code.
16120b57cec5SDimitry Andric   static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
16130b57cec5SDimitry Andric                                              SmallVectorImpl<Value *> &Left,
16140b57cec5SDimitry Andric                                              SmallVectorImpl<Value *> &Right,
16150b57cec5SDimitry Andric                                              const DataLayout &DL,
1616480093f4SDimitry Andric                                              ScalarEvolution &SE,
1617480093f4SDimitry Andric                                              const BoUpSLP &R);
16180b57cec5SDimitry Andric   struct TreeEntry {
16190b57cec5SDimitry Andric     using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntryllvm::slpvectorizer::BoUpSLP::TreeEntry16200b57cec5SDimitry Andric     TreeEntry(VecTreeTy &Container) : Container(Container) {}
16210b57cec5SDimitry Andric 
16220b57cec5SDimitry Andric     /// \returns true if the scalars in VL are equal to this entry.
isSamellvm::slpvectorizer::BoUpSLP::TreeEntry16230b57cec5SDimitry Andric     bool isSame(ArrayRef<Value *> VL) const {
16240b57cec5SDimitry Andric       if (VL.size() == Scalars.size())
16250b57cec5SDimitry Andric         return std::equal(VL.begin(), VL.end(), Scalars.begin());
16260b57cec5SDimitry Andric       return VL.size() == ReuseShuffleIndices.size() &&
16270b57cec5SDimitry Andric              std::equal(
16280b57cec5SDimitry Andric                  VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
16295ffd83dbSDimitry Andric                  [this](Value *V, int Idx) { return V == Scalars[Idx]; });
16300b57cec5SDimitry Andric     }
16310b57cec5SDimitry Andric 
16320b57cec5SDimitry Andric     /// A vector of scalars.
16330b57cec5SDimitry Andric     ValueList Scalars;
16340b57cec5SDimitry Andric 
16350b57cec5SDimitry Andric     /// The Scalars are vectorized into this value. It is initialized to Null.
16360b57cec5SDimitry Andric     Value *VectorizedValue = nullptr;
16370b57cec5SDimitry Andric 
1638af732203SDimitry Andric     /// Do we need to gather this sequence or vectorize it
1639af732203SDimitry Andric     /// (either with vector instruction or with scatter/gather
1640af732203SDimitry Andric     /// intrinsics for store/load)?
1641af732203SDimitry Andric     enum EntryState { Vectorize, ScatterVectorize, NeedToGather };
1642480093f4SDimitry Andric     EntryState State;
16430b57cec5SDimitry Andric 
16440b57cec5SDimitry Andric     /// Does this sequence require some shuffling?
16455ffd83dbSDimitry Andric     SmallVector<int, 4> ReuseShuffleIndices;
16460b57cec5SDimitry Andric 
16470b57cec5SDimitry Andric     /// Does this entry require reordering?
1648af732203SDimitry Andric     SmallVector<unsigned, 4> ReorderIndices;
16490b57cec5SDimitry Andric 
16500b57cec5SDimitry Andric     /// Points back to the VectorizableTree.
16510b57cec5SDimitry Andric     ///
16520b57cec5SDimitry Andric     /// Only used for Graphviz right now.  Unfortunately GraphTrait::NodeRef has
16530b57cec5SDimitry Andric     /// to be a pointer and needs to be able to initialize the child iterator.
16540b57cec5SDimitry Andric     /// Thus we need a reference back to the container to translate the indices
16550b57cec5SDimitry Andric     /// to entries.
16560b57cec5SDimitry Andric     VecTreeTy &Container;
16570b57cec5SDimitry Andric 
16580b57cec5SDimitry Andric     /// The TreeEntry index containing the user of this entry.  We can actually
16590b57cec5SDimitry Andric     /// have multiple users so the data structure is not truly a tree.
16600b57cec5SDimitry Andric     SmallVector<EdgeInfo, 1> UserTreeIndices;
16610b57cec5SDimitry Andric 
16620b57cec5SDimitry Andric     /// The index of this treeEntry in VectorizableTree.
16630b57cec5SDimitry Andric     int Idx = -1;
16640b57cec5SDimitry Andric 
16650b57cec5SDimitry Andric   private:
16660b57cec5SDimitry Andric     /// The operands of each instruction in each lane Operands[op_index][lane].
16670b57cec5SDimitry Andric     /// Note: This helps avoid the replication of the code that performs the
16680b57cec5SDimitry Andric     /// reordering of operands during buildTree_rec() and vectorizeTree().
16690b57cec5SDimitry Andric     SmallVector<ValueList, 2> Operands;
16700b57cec5SDimitry Andric 
16718bcb0991SDimitry Andric     /// The main/alternate instruction.
16728bcb0991SDimitry Andric     Instruction *MainOp = nullptr;
16738bcb0991SDimitry Andric     Instruction *AltOp = nullptr;
16748bcb0991SDimitry Andric 
16750b57cec5SDimitry Andric   public:
16760b57cec5SDimitry Andric     /// Set this bundle's \p OpIdx'th operand to \p OpVL.
setOperandllvm::slpvectorizer::BoUpSLP::TreeEntry16778bcb0991SDimitry Andric     void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
16780b57cec5SDimitry Andric       if (Operands.size() < OpIdx + 1)
16790b57cec5SDimitry Andric         Operands.resize(OpIdx + 1);
16805f7ddb14SDimitry Andric       assert(Operands[OpIdx].empty() && "Already resized?");
16810b57cec5SDimitry Andric       Operands[OpIdx].resize(Scalars.size());
16820b57cec5SDimitry Andric       for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
16838bcb0991SDimitry Andric         Operands[OpIdx][Lane] = OpVL[Lane];
16840b57cec5SDimitry Andric     }
16850b57cec5SDimitry Andric 
16868bcb0991SDimitry Andric     /// Set the operands of this bundle in their original order.
setOperandsInOrderllvm::slpvectorizer::BoUpSLP::TreeEntry16878bcb0991SDimitry Andric     void setOperandsInOrder() {
16888bcb0991SDimitry Andric       assert(Operands.empty() && "Already initialized?");
16898bcb0991SDimitry Andric       auto *I0 = cast<Instruction>(Scalars[0]);
16908bcb0991SDimitry Andric       Operands.resize(I0->getNumOperands());
16918bcb0991SDimitry Andric       unsigned NumLanes = Scalars.size();
16928bcb0991SDimitry Andric       for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
16938bcb0991SDimitry Andric            OpIdx != NumOperands; ++OpIdx) {
16948bcb0991SDimitry Andric         Operands[OpIdx].resize(NumLanes);
16958bcb0991SDimitry Andric         for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
16968bcb0991SDimitry Andric           auto *I = cast<Instruction>(Scalars[Lane]);
16978bcb0991SDimitry Andric           assert(I->getNumOperands() == NumOperands &&
16988bcb0991SDimitry Andric                  "Expected same number of operands");
16998bcb0991SDimitry Andric           Operands[OpIdx][Lane] = I->getOperand(OpIdx);
17008bcb0991SDimitry Andric         }
17018bcb0991SDimitry Andric       }
17020b57cec5SDimitry Andric     }
17030b57cec5SDimitry Andric 
17040b57cec5SDimitry Andric     /// \returns the \p OpIdx operand of this TreeEntry.
getOperandllvm::slpvectorizer::BoUpSLP::TreeEntry17050b57cec5SDimitry Andric     ValueList &getOperand(unsigned OpIdx) {
17060b57cec5SDimitry Andric       assert(OpIdx < Operands.size() && "Off bounds");
17070b57cec5SDimitry Andric       return Operands[OpIdx];
17080b57cec5SDimitry Andric     }
17090b57cec5SDimitry Andric 
17108bcb0991SDimitry Andric     /// \returns the number of operands.
getNumOperandsllvm::slpvectorizer::BoUpSLP::TreeEntry17118bcb0991SDimitry Andric     unsigned getNumOperands() const { return Operands.size(); }
17128bcb0991SDimitry Andric 
17130b57cec5SDimitry Andric     /// \return the single \p OpIdx operand.
getSingleOperandllvm::slpvectorizer::BoUpSLP::TreeEntry17140b57cec5SDimitry Andric     Value *getSingleOperand(unsigned OpIdx) const {
17150b57cec5SDimitry Andric       assert(OpIdx < Operands.size() && "Off bounds");
17160b57cec5SDimitry Andric       assert(!Operands[OpIdx].empty() && "No operand available");
17170b57cec5SDimitry Andric       return Operands[OpIdx][0];
17180b57cec5SDimitry Andric     }
17190b57cec5SDimitry Andric 
17208bcb0991SDimitry Andric     /// Some of the instructions in the list have alternate opcodes.
isAltShufflellvm::slpvectorizer::BoUpSLP::TreeEntry17218bcb0991SDimitry Andric     bool isAltShuffle() const {
17228bcb0991SDimitry Andric       return getOpcode() != getAltOpcode();
17238bcb0991SDimitry Andric     }
17248bcb0991SDimitry Andric 
isOpcodeOrAltllvm::slpvectorizer::BoUpSLP::TreeEntry17258bcb0991SDimitry Andric     bool isOpcodeOrAlt(Instruction *I) const {
17268bcb0991SDimitry Andric       unsigned CheckedOpcode = I->getOpcode();
17278bcb0991SDimitry Andric       return (getOpcode() == CheckedOpcode ||
17288bcb0991SDimitry Andric               getAltOpcode() == CheckedOpcode);
17298bcb0991SDimitry Andric     }
17308bcb0991SDimitry Andric 
17318bcb0991SDimitry Andric     /// Chooses the correct key for scheduling data. If \p Op has the same (or
17328bcb0991SDimitry Andric     /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
17338bcb0991SDimitry Andric     /// \p OpValue.
isOneOfllvm::slpvectorizer::BoUpSLP::TreeEntry17348bcb0991SDimitry Andric     Value *isOneOf(Value *Op) const {
17358bcb0991SDimitry Andric       auto *I = dyn_cast<Instruction>(Op);
17368bcb0991SDimitry Andric       if (I && isOpcodeOrAlt(I))
17378bcb0991SDimitry Andric         return Op;
17388bcb0991SDimitry Andric       return MainOp;
17398bcb0991SDimitry Andric     }
17408bcb0991SDimitry Andric 
setOperationsllvm::slpvectorizer::BoUpSLP::TreeEntry17418bcb0991SDimitry Andric     void setOperations(const InstructionsState &S) {
17428bcb0991SDimitry Andric       MainOp = S.MainOp;
17438bcb0991SDimitry Andric       AltOp = S.AltOp;
17448bcb0991SDimitry Andric     }
17458bcb0991SDimitry Andric 
getMainOpllvm::slpvectorizer::BoUpSLP::TreeEntry17468bcb0991SDimitry Andric     Instruction *getMainOp() const {
17478bcb0991SDimitry Andric       return MainOp;
17488bcb0991SDimitry Andric     }
17498bcb0991SDimitry Andric 
getAltOpllvm::slpvectorizer::BoUpSLP::TreeEntry17508bcb0991SDimitry Andric     Instruction *getAltOp() const {
17518bcb0991SDimitry Andric       return AltOp;
17528bcb0991SDimitry Andric     }
17538bcb0991SDimitry Andric 
17548bcb0991SDimitry Andric     /// The main/alternate opcodes for the list of instructions.
getOpcodellvm::slpvectorizer::BoUpSLP::TreeEntry17558bcb0991SDimitry Andric     unsigned getOpcode() const {
17568bcb0991SDimitry Andric       return MainOp ? MainOp->getOpcode() : 0;
17578bcb0991SDimitry Andric     }
17588bcb0991SDimitry Andric 
getAltOpcodellvm::slpvectorizer::BoUpSLP::TreeEntry17598bcb0991SDimitry Andric     unsigned getAltOpcode() const {
17608bcb0991SDimitry Andric       return AltOp ? AltOp->getOpcode() : 0;
17618bcb0991SDimitry Andric     }
17628bcb0991SDimitry Andric 
17638bcb0991SDimitry Andric     /// Update operations state of this entry if reorder occurred.
updateStateIfReorderllvm::slpvectorizer::BoUpSLP::TreeEntry17648bcb0991SDimitry Andric     bool updateStateIfReorder() {
17658bcb0991SDimitry Andric       if (ReorderIndices.empty())
17668bcb0991SDimitry Andric         return false;
17678bcb0991SDimitry Andric       InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front());
17688bcb0991SDimitry Andric       setOperations(S);
17698bcb0991SDimitry Andric       return true;
17708bcb0991SDimitry Andric     }
17715f7ddb14SDimitry Andric     /// When ReuseShuffleIndices is empty it just returns position of \p V
17725f7ddb14SDimitry Andric     /// within vector of Scalars. Otherwise, try to remap on its reuse index.
findLaneForValuellvm::slpvectorizer::BoUpSLP::TreeEntry17735f7ddb14SDimitry Andric     int findLaneForValue(Value *V) const {
17745f7ddb14SDimitry Andric       unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
17755f7ddb14SDimitry Andric       assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
17765f7ddb14SDimitry Andric       if (!ReuseShuffleIndices.empty()) {
17775f7ddb14SDimitry Andric         FoundLane = std::distance(ReuseShuffleIndices.begin(),
17785f7ddb14SDimitry Andric                                   find(ReuseShuffleIndices, FoundLane));
17795f7ddb14SDimitry Andric       }
17805f7ddb14SDimitry Andric       return FoundLane;
17815f7ddb14SDimitry Andric     }
17828bcb0991SDimitry Andric 
17830b57cec5SDimitry Andric #ifndef NDEBUG
17840b57cec5SDimitry Andric     /// Debug printer.
dumpllvm::slpvectorizer::BoUpSLP::TreeEntry17850b57cec5SDimitry Andric     LLVM_DUMP_METHOD void dump() const {
17860b57cec5SDimitry Andric       dbgs() << Idx << ".\n";
17870b57cec5SDimitry Andric       for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
17880b57cec5SDimitry Andric         dbgs() << "Operand " << OpI << ":\n";
17890b57cec5SDimitry Andric         for (const Value *V : Operands[OpI])
17900b57cec5SDimitry Andric           dbgs().indent(2) << *V << "\n";
17910b57cec5SDimitry Andric       }
17920b57cec5SDimitry Andric       dbgs() << "Scalars: \n";
17930b57cec5SDimitry Andric       for (Value *V : Scalars)
17940b57cec5SDimitry Andric         dbgs().indent(2) << *V << "\n";
1795480093f4SDimitry Andric       dbgs() << "State: ";
1796480093f4SDimitry Andric       switch (State) {
1797480093f4SDimitry Andric       case Vectorize:
1798480093f4SDimitry Andric         dbgs() << "Vectorize\n";
1799480093f4SDimitry Andric         break;
1800af732203SDimitry Andric       case ScatterVectorize:
1801af732203SDimitry Andric         dbgs() << "ScatterVectorize\n";
1802af732203SDimitry Andric         break;
1803480093f4SDimitry Andric       case NeedToGather:
1804480093f4SDimitry Andric         dbgs() << "NeedToGather\n";
1805480093f4SDimitry Andric         break;
1806480093f4SDimitry Andric       }
1807480093f4SDimitry Andric       dbgs() << "MainOp: ";
1808480093f4SDimitry Andric       if (MainOp)
1809480093f4SDimitry Andric         dbgs() << *MainOp << "\n";
1810480093f4SDimitry Andric       else
1811480093f4SDimitry Andric         dbgs() << "NULL\n";
1812480093f4SDimitry Andric       dbgs() << "AltOp: ";
1813480093f4SDimitry Andric       if (AltOp)
1814480093f4SDimitry Andric         dbgs() << *AltOp << "\n";
1815480093f4SDimitry Andric       else
1816480093f4SDimitry Andric         dbgs() << "NULL\n";
18170b57cec5SDimitry Andric       dbgs() << "VectorizedValue: ";
18180b57cec5SDimitry Andric       if (VectorizedValue)
1819480093f4SDimitry Andric         dbgs() << *VectorizedValue << "\n";
18200b57cec5SDimitry Andric       else
1821480093f4SDimitry Andric         dbgs() << "NULL\n";
18220b57cec5SDimitry Andric       dbgs() << "ReuseShuffleIndices: ";
18230b57cec5SDimitry Andric       if (ReuseShuffleIndices.empty())
1824af732203SDimitry Andric         dbgs() << "Empty";
18250b57cec5SDimitry Andric       else
18268bcb0991SDimitry Andric         for (unsigned ReuseIdx : ReuseShuffleIndices)
18278bcb0991SDimitry Andric           dbgs() << ReuseIdx << ", ";
18280b57cec5SDimitry Andric       dbgs() << "\n";
18290b57cec5SDimitry Andric       dbgs() << "ReorderIndices: ";
18308bcb0991SDimitry Andric       for (unsigned ReorderIdx : ReorderIndices)
18318bcb0991SDimitry Andric         dbgs() << ReorderIdx << ", ";
18320b57cec5SDimitry Andric       dbgs() << "\n";
18330b57cec5SDimitry Andric       dbgs() << "UserTreeIndices: ";
18340b57cec5SDimitry Andric       for (const auto &EInfo : UserTreeIndices)
18350b57cec5SDimitry Andric         dbgs() << EInfo << ", ";
18360b57cec5SDimitry Andric       dbgs() << "\n";
18370b57cec5SDimitry Andric     }
18380b57cec5SDimitry Andric #endif
18390b57cec5SDimitry Andric   };
18400b57cec5SDimitry Andric 
1841af732203SDimitry Andric #ifndef NDEBUG
dumpTreeCosts(const TreeEntry * E,InstructionCost ReuseShuffleCost,InstructionCost VecCost,InstructionCost ScalarCost) const18425f7ddb14SDimitry Andric   void dumpTreeCosts(const TreeEntry *E, InstructionCost ReuseShuffleCost,
1843af732203SDimitry Andric                      InstructionCost VecCost,
1844af732203SDimitry Andric                      InstructionCost ScalarCost) const {
1845af732203SDimitry Andric     dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
1846af732203SDimitry Andric     dbgs() << "SLP: Costs:\n";
1847af732203SDimitry Andric     dbgs() << "SLP:     ReuseShuffleCost = " << ReuseShuffleCost << "\n";
1848af732203SDimitry Andric     dbgs() << "SLP:     VectorCost = " << VecCost << "\n";
1849af732203SDimitry Andric     dbgs() << "SLP:     ScalarCost = " << ScalarCost << "\n";
1850af732203SDimitry Andric     dbgs() << "SLP:     ReuseShuffleCost + VecCost - ScalarCost = " <<
1851af732203SDimitry Andric                ReuseShuffleCost + VecCost - ScalarCost << "\n";
1852af732203SDimitry Andric   }
1853af732203SDimitry Andric #endif
1854af732203SDimitry Andric 
18550b57cec5SDimitry Andric   /// Create a new VectorizableTree entry.
newTreeEntry(ArrayRef<Value * > VL,Optional<ScheduleData * > Bundle,const InstructionsState & S,const EdgeInfo & UserTreeIdx,ArrayRef<unsigned> ReuseShuffleIndices=None,ArrayRef<unsigned> ReorderIndices=None)18568bcb0991SDimitry Andric   TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
18578bcb0991SDimitry Andric                           const InstructionsState &S,
18580b57cec5SDimitry Andric                           const EdgeInfo &UserTreeIdx,
18590b57cec5SDimitry Andric                           ArrayRef<unsigned> ReuseShuffleIndices = None,
18600b57cec5SDimitry Andric                           ArrayRef<unsigned> ReorderIndices = None) {
1861af732203SDimitry Andric     TreeEntry::EntryState EntryState =
1862af732203SDimitry Andric         Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
1863af732203SDimitry Andric     return newTreeEntry(VL, EntryState, Bundle, S, UserTreeIdx,
1864af732203SDimitry Andric                         ReuseShuffleIndices, ReorderIndices);
1865af732203SDimitry Andric   }
1866af732203SDimitry Andric 
newTreeEntry(ArrayRef<Value * > VL,TreeEntry::EntryState EntryState,Optional<ScheduleData * > Bundle,const InstructionsState & S,const EdgeInfo & UserTreeIdx,ArrayRef<unsigned> ReuseShuffleIndices=None,ArrayRef<unsigned> ReorderIndices=None)1867af732203SDimitry Andric   TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
1868af732203SDimitry Andric                           TreeEntry::EntryState EntryState,
1869af732203SDimitry Andric                           Optional<ScheduleData *> Bundle,
1870af732203SDimitry Andric                           const InstructionsState &S,
1871af732203SDimitry Andric                           const EdgeInfo &UserTreeIdx,
1872af732203SDimitry Andric                           ArrayRef<unsigned> ReuseShuffleIndices = None,
1873af732203SDimitry Andric                           ArrayRef<unsigned> ReorderIndices = None) {
1874af732203SDimitry Andric     assert(((!Bundle && EntryState == TreeEntry::NeedToGather) ||
1875af732203SDimitry Andric             (Bundle && EntryState != TreeEntry::NeedToGather)) &&
1876af732203SDimitry Andric            "Need to vectorize gather entry?");
18778bcb0991SDimitry Andric     VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
18780b57cec5SDimitry Andric     TreeEntry *Last = VectorizableTree.back().get();
18790b57cec5SDimitry Andric     Last->Idx = VectorizableTree.size() - 1;
18800b57cec5SDimitry Andric     Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
1881af732203SDimitry Andric     Last->State = EntryState;
18820b57cec5SDimitry Andric     Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
18830b57cec5SDimitry Andric                                      ReuseShuffleIndices.end());
1884af732203SDimitry Andric     Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
18858bcb0991SDimitry Andric     Last->setOperations(S);
1886af732203SDimitry Andric     if (Last->State != TreeEntry::NeedToGather) {
1887af732203SDimitry Andric       for (Value *V : VL) {
1888af732203SDimitry Andric         assert(!getTreeEntry(V) && "Scalar already in tree!");
1889af732203SDimitry Andric         ScalarToTreeEntry[V] = Last;
18900b57cec5SDimitry Andric       }
18918bcb0991SDimitry Andric       // Update the scheduler bundle to point to this TreeEntry.
18928bcb0991SDimitry Andric       unsigned Lane = 0;
18938bcb0991SDimitry Andric       for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
18948bcb0991SDimitry Andric            BundleMember = BundleMember->NextInBundle) {
18958bcb0991SDimitry Andric         BundleMember->TE = Last;
18968bcb0991SDimitry Andric         BundleMember->Lane = Lane;
18978bcb0991SDimitry Andric         ++Lane;
18988bcb0991SDimitry Andric       }
18998bcb0991SDimitry Andric       assert((!Bundle.getValue() || Lane == VL.size()) &&
19008bcb0991SDimitry Andric              "Bundle and VL out of sync");
19010b57cec5SDimitry Andric     } else {
19020b57cec5SDimitry Andric       MustGather.insert(VL.begin(), VL.end());
19030b57cec5SDimitry Andric     }
19040b57cec5SDimitry Andric 
19050b57cec5SDimitry Andric     if (UserTreeIdx.UserTE)
19060b57cec5SDimitry Andric       Last->UserTreeIndices.push_back(UserTreeIdx);
19070b57cec5SDimitry Andric 
19080b57cec5SDimitry Andric     return Last;
19090b57cec5SDimitry Andric   }
19100b57cec5SDimitry Andric 
19110b57cec5SDimitry Andric   /// -- Vectorization State --
19120b57cec5SDimitry Andric   /// Holds all of the tree entries.
19130b57cec5SDimitry Andric   TreeEntry::VecTreeTy VectorizableTree;
19140b57cec5SDimitry Andric 
19150b57cec5SDimitry Andric #ifndef NDEBUG
19160b57cec5SDimitry Andric   /// Debug printer.
dumpVectorizableTree() const19170b57cec5SDimitry Andric   LLVM_DUMP_METHOD void dumpVectorizableTree() const {
19180b57cec5SDimitry Andric     for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
19190b57cec5SDimitry Andric       VectorizableTree[Id]->dump();
19200b57cec5SDimitry Andric       dbgs() << "\n";
19210b57cec5SDimitry Andric     }
19220b57cec5SDimitry Andric   }
19230b57cec5SDimitry Andric #endif
19240b57cec5SDimitry Andric 
getTreeEntry(Value * V)1925af732203SDimitry Andric   TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); }
19260b57cec5SDimitry Andric 
getTreeEntry(Value * V) const19270b57cec5SDimitry Andric   const TreeEntry *getTreeEntry(Value *V) const {
1928af732203SDimitry Andric     return ScalarToTreeEntry.lookup(V);
19290b57cec5SDimitry Andric   }
19300b57cec5SDimitry Andric 
19310b57cec5SDimitry Andric   /// Maps a specific scalar to its tree entry.
19328bcb0991SDimitry Andric   SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
19330b57cec5SDimitry Andric 
19345ffd83dbSDimitry Andric   /// Maps a value to the proposed vectorizable size.
19355ffd83dbSDimitry Andric   SmallDenseMap<Value *, unsigned> InstrElementSize;
19365ffd83dbSDimitry Andric 
19370b57cec5SDimitry Andric   /// A list of scalars that we found that we need to keep as scalars.
19380b57cec5SDimitry Andric   ValueSet MustGather;
19390b57cec5SDimitry Andric 
19400b57cec5SDimitry Andric   /// This POD struct describes one external user in the vectorized tree.
19410b57cec5SDimitry Andric   struct ExternalUser {
ExternalUserllvm::slpvectorizer::BoUpSLP::ExternalUser19420b57cec5SDimitry Andric     ExternalUser(Value *S, llvm::User *U, int L)
19430b57cec5SDimitry Andric         : Scalar(S), User(U), Lane(L) {}
19440b57cec5SDimitry Andric 
19450b57cec5SDimitry Andric     // Which scalar in our function.
19460b57cec5SDimitry Andric     Value *Scalar;
19470b57cec5SDimitry Andric 
19480b57cec5SDimitry Andric     // Which user that uses the scalar.
19490b57cec5SDimitry Andric     llvm::User *User;
19500b57cec5SDimitry Andric 
19510b57cec5SDimitry Andric     // Which lane does the scalar belong to.
19520b57cec5SDimitry Andric     int Lane;
19530b57cec5SDimitry Andric   };
19540b57cec5SDimitry Andric   using UserList = SmallVector<ExternalUser, 16>;
19550b57cec5SDimitry Andric 
19560b57cec5SDimitry Andric   /// Checks if two instructions may access the same memory.
19570b57cec5SDimitry Andric   ///
19580b57cec5SDimitry Andric   /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
19590b57cec5SDimitry Andric   /// is invariant in the calling loop.
isAliased(const MemoryLocation & Loc1,Instruction * Inst1,Instruction * Inst2)19600b57cec5SDimitry Andric   bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
19610b57cec5SDimitry Andric                  Instruction *Inst2) {
19620b57cec5SDimitry Andric     // First check if the result is already in the cache.
19630b57cec5SDimitry Andric     AliasCacheKey key = std::make_pair(Inst1, Inst2);
19640b57cec5SDimitry Andric     Optional<bool> &result = AliasCache[key];
19650b57cec5SDimitry Andric     if (result.hasValue()) {
19660b57cec5SDimitry Andric       return result.getValue();
19670b57cec5SDimitry Andric     }
19680b57cec5SDimitry Andric     MemoryLocation Loc2 = getLocation(Inst2, AA);
19690b57cec5SDimitry Andric     bool aliased = true;
19700b57cec5SDimitry Andric     if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
19710b57cec5SDimitry Andric       // Do the alias check.
19725f7ddb14SDimitry Andric       aliased = !AA->isNoAlias(Loc1, Loc2);
19730b57cec5SDimitry Andric     }
19740b57cec5SDimitry Andric     // Store the result in the cache.
19750b57cec5SDimitry Andric     result = aliased;
19760b57cec5SDimitry Andric     return aliased;
19770b57cec5SDimitry Andric   }
19780b57cec5SDimitry Andric 
19790b57cec5SDimitry Andric   using AliasCacheKey = std::pair<Instruction *, Instruction *>;
19800b57cec5SDimitry Andric 
19810b57cec5SDimitry Andric   /// Cache for alias results.
19820b57cec5SDimitry Andric   /// TODO: consider moving this to the AliasAnalysis itself.
19830b57cec5SDimitry Andric   DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
19840b57cec5SDimitry Andric 
19850b57cec5SDimitry Andric   /// Removes an instruction from its block and eventually deletes it.
19860b57cec5SDimitry Andric   /// It's like Instruction::eraseFromParent() except that the actual deletion
19870b57cec5SDimitry Andric   /// is delayed until BoUpSLP is destructed.
19880b57cec5SDimitry Andric   /// This is required to ensure that there are no incorrect collisions in the
19890b57cec5SDimitry Andric   /// AliasCache, which can happen if a new instruction is allocated at the
19900b57cec5SDimitry Andric   /// same address as a previously deleted instruction.
eraseInstruction(Instruction * I,bool ReplaceOpsWithUndef=false)19918bcb0991SDimitry Andric   void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) {
19928bcb0991SDimitry Andric     auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first;
19938bcb0991SDimitry Andric     It->getSecond() = It->getSecond() && ReplaceOpsWithUndef;
19940b57cec5SDimitry Andric   }
19950b57cec5SDimitry Andric 
19960b57cec5SDimitry Andric   /// Temporary store for deleted instructions. Instructions will be deleted
19970b57cec5SDimitry Andric   /// eventually when the BoUpSLP is destructed.
19988bcb0991SDimitry Andric   DenseMap<Instruction *, bool> DeletedInstructions;
19990b57cec5SDimitry Andric 
20000b57cec5SDimitry Andric   /// A list of values that need to extracted out of the tree.
20010b57cec5SDimitry Andric   /// This list holds pairs of (Internal Scalar : External User). External User
20020b57cec5SDimitry Andric   /// can be nullptr, it means that this Internal Scalar will be used later,
20030b57cec5SDimitry Andric   /// after vectorization.
20040b57cec5SDimitry Andric   UserList ExternalUses;
20050b57cec5SDimitry Andric 
20060b57cec5SDimitry Andric   /// Values used only by @llvm.assume calls.
20070b57cec5SDimitry Andric   SmallPtrSet<const Value *, 32> EphValues;
20080b57cec5SDimitry Andric 
20090b57cec5SDimitry Andric   /// Holds all of the instructions that we gathered.
20100b57cec5SDimitry Andric   SetVector<Instruction *> GatherSeq;
20110b57cec5SDimitry Andric 
20120b57cec5SDimitry Andric   /// A list of blocks that we are going to CSE.
20130b57cec5SDimitry Andric   SetVector<BasicBlock *> CSEBlocks;
20140b57cec5SDimitry Andric 
20150b57cec5SDimitry Andric   /// Contains all scheduling relevant data for an instruction.
20160b57cec5SDimitry Andric   /// A ScheduleData either represents a single instruction or a member of an
20170b57cec5SDimitry Andric   /// instruction bundle (= a group of instructions which is combined into a
20180b57cec5SDimitry Andric   /// vector instruction).
20190b57cec5SDimitry Andric   struct ScheduleData {
20200b57cec5SDimitry Andric     // The initial value for the dependency counters. It means that the
20210b57cec5SDimitry Andric     // dependencies are not calculated yet.
20220b57cec5SDimitry Andric     enum { InvalidDeps = -1 };
20230b57cec5SDimitry Andric 
20240b57cec5SDimitry Andric     ScheduleData() = default;
20250b57cec5SDimitry Andric 
initllvm::slpvectorizer::BoUpSLP::ScheduleData20260b57cec5SDimitry Andric     void init(int BlockSchedulingRegionID, Value *OpVal) {
20270b57cec5SDimitry Andric       FirstInBundle = this;
20280b57cec5SDimitry Andric       NextInBundle = nullptr;
20290b57cec5SDimitry Andric       NextLoadStore = nullptr;
20300b57cec5SDimitry Andric       IsScheduled = false;
20310b57cec5SDimitry Andric       SchedulingRegionID = BlockSchedulingRegionID;
20320b57cec5SDimitry Andric       UnscheduledDepsInBundle = UnscheduledDeps;
20330b57cec5SDimitry Andric       clearDependencies();
20340b57cec5SDimitry Andric       OpValue = OpVal;
20358bcb0991SDimitry Andric       TE = nullptr;
20368bcb0991SDimitry Andric       Lane = -1;
20370b57cec5SDimitry Andric     }
20380b57cec5SDimitry Andric 
20390b57cec5SDimitry Andric     /// Returns true if the dependency information has been calculated.
hasValidDependenciesllvm::slpvectorizer::BoUpSLP::ScheduleData20400b57cec5SDimitry Andric     bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
20410b57cec5SDimitry Andric 
20420b57cec5SDimitry Andric     /// Returns true for single instructions and for bundle representatives
20430b57cec5SDimitry Andric     /// (= the head of a bundle).
isSchedulingEntityllvm::slpvectorizer::BoUpSLP::ScheduleData20440b57cec5SDimitry Andric     bool isSchedulingEntity() const { return FirstInBundle == this; }
20450b57cec5SDimitry Andric 
20460b57cec5SDimitry Andric     /// Returns true if it represents an instruction bundle and not only a
20470b57cec5SDimitry Andric     /// single instruction.
isPartOfBundlellvm::slpvectorizer::BoUpSLP::ScheduleData20480b57cec5SDimitry Andric     bool isPartOfBundle() const {
20490b57cec5SDimitry Andric       return NextInBundle != nullptr || FirstInBundle != this;
20500b57cec5SDimitry Andric     }
20510b57cec5SDimitry Andric 
20520b57cec5SDimitry Andric     /// Returns true if it is ready for scheduling, i.e. it has no more
20530b57cec5SDimitry Andric     /// unscheduled depending instructions/bundles.
isReadyllvm::slpvectorizer::BoUpSLP::ScheduleData20540b57cec5SDimitry Andric     bool isReady() const {
20550b57cec5SDimitry Andric       assert(isSchedulingEntity() &&
20560b57cec5SDimitry Andric              "can't consider non-scheduling entity for ready list");
20570b57cec5SDimitry Andric       return UnscheduledDepsInBundle == 0 && !IsScheduled;
20580b57cec5SDimitry Andric     }
20590b57cec5SDimitry Andric 
20600b57cec5SDimitry Andric     /// Modifies the number of unscheduled dependencies, also updating it for
20610b57cec5SDimitry Andric     /// the whole bundle.
incrementUnscheduledDepsllvm::slpvectorizer::BoUpSLP::ScheduleData20620b57cec5SDimitry Andric     int incrementUnscheduledDeps(int Incr) {
20630b57cec5SDimitry Andric       UnscheduledDeps += Incr;
20640b57cec5SDimitry Andric       return FirstInBundle->UnscheduledDepsInBundle += Incr;
20650b57cec5SDimitry Andric     }
20660b57cec5SDimitry Andric 
20670b57cec5SDimitry Andric     /// Sets the number of unscheduled dependencies to the number of
20680b57cec5SDimitry Andric     /// dependencies.
resetUnscheduledDepsllvm::slpvectorizer::BoUpSLP::ScheduleData20690b57cec5SDimitry Andric     void resetUnscheduledDeps() {
20700b57cec5SDimitry Andric       incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
20710b57cec5SDimitry Andric     }
20720b57cec5SDimitry Andric 
20730b57cec5SDimitry Andric     /// Clears all dependency information.
clearDependenciesllvm::slpvectorizer::BoUpSLP::ScheduleData20740b57cec5SDimitry Andric     void clearDependencies() {
20750b57cec5SDimitry Andric       Dependencies = InvalidDeps;
20760b57cec5SDimitry Andric       resetUnscheduledDeps();
20770b57cec5SDimitry Andric       MemoryDependencies.clear();
20780b57cec5SDimitry Andric     }
20790b57cec5SDimitry Andric 
dumpllvm::slpvectorizer::BoUpSLP::ScheduleData20800b57cec5SDimitry Andric     void dump(raw_ostream &os) const {
20810b57cec5SDimitry Andric       if (!isSchedulingEntity()) {
20820b57cec5SDimitry Andric         os << "/ " << *Inst;
20830b57cec5SDimitry Andric       } else if (NextInBundle) {
20840b57cec5SDimitry Andric         os << '[' << *Inst;
20850b57cec5SDimitry Andric         ScheduleData *SD = NextInBundle;
20860b57cec5SDimitry Andric         while (SD) {
20870b57cec5SDimitry Andric           os << ';' << *SD->Inst;
20880b57cec5SDimitry Andric           SD = SD->NextInBundle;
20890b57cec5SDimitry Andric         }
20900b57cec5SDimitry Andric         os << ']';
20910b57cec5SDimitry Andric       } else {
20920b57cec5SDimitry Andric         os << *Inst;
20930b57cec5SDimitry Andric       }
20940b57cec5SDimitry Andric     }
20950b57cec5SDimitry Andric 
20960b57cec5SDimitry Andric     Instruction *Inst = nullptr;
20970b57cec5SDimitry Andric 
20980b57cec5SDimitry Andric     /// Points to the head in an instruction bundle (and always to this for
20990b57cec5SDimitry Andric     /// single instructions).
21000b57cec5SDimitry Andric     ScheduleData *FirstInBundle = nullptr;
21010b57cec5SDimitry Andric 
21020b57cec5SDimitry Andric     /// Single linked list of all instructions in a bundle. Null if it is a
21030b57cec5SDimitry Andric     /// single instruction.
21040b57cec5SDimitry Andric     ScheduleData *NextInBundle = nullptr;
21050b57cec5SDimitry Andric 
21060b57cec5SDimitry Andric     /// Single linked list of all memory instructions (e.g. load, store, call)
21070b57cec5SDimitry Andric     /// in the block - until the end of the scheduling region.
21080b57cec5SDimitry Andric     ScheduleData *NextLoadStore = nullptr;
21090b57cec5SDimitry Andric 
21100b57cec5SDimitry Andric     /// The dependent memory instructions.
21110b57cec5SDimitry Andric     /// This list is derived on demand in calculateDependencies().
21120b57cec5SDimitry Andric     SmallVector<ScheduleData *, 4> MemoryDependencies;
21130b57cec5SDimitry Andric 
21140b57cec5SDimitry Andric     /// This ScheduleData is in the current scheduling region if this matches
21150b57cec5SDimitry Andric     /// the current SchedulingRegionID of BlockScheduling.
21160b57cec5SDimitry Andric     int SchedulingRegionID = 0;
21170b57cec5SDimitry Andric 
21180b57cec5SDimitry Andric     /// Used for getting a "good" final ordering of instructions.
21190b57cec5SDimitry Andric     int SchedulingPriority = 0;
21200b57cec5SDimitry Andric 
21210b57cec5SDimitry Andric     /// The number of dependencies. Constitutes of the number of users of the
21220b57cec5SDimitry Andric     /// instruction plus the number of dependent memory instructions (if any).
21230b57cec5SDimitry Andric     /// This value is calculated on demand.
21240b57cec5SDimitry Andric     /// If InvalidDeps, the number of dependencies is not calculated yet.
21250b57cec5SDimitry Andric     int Dependencies = InvalidDeps;
21260b57cec5SDimitry Andric 
21270b57cec5SDimitry Andric     /// The number of dependencies minus the number of dependencies of scheduled
21280b57cec5SDimitry Andric     /// instructions. As soon as this is zero, the instruction/bundle gets ready
21290b57cec5SDimitry Andric     /// for scheduling.
21300b57cec5SDimitry Andric     /// Note that this is negative as long as Dependencies is not calculated.
21310b57cec5SDimitry Andric     int UnscheduledDeps = InvalidDeps;
21320b57cec5SDimitry Andric 
21330b57cec5SDimitry Andric     /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
21340b57cec5SDimitry Andric     /// single instructions.
21350b57cec5SDimitry Andric     int UnscheduledDepsInBundle = InvalidDeps;
21360b57cec5SDimitry Andric 
21370b57cec5SDimitry Andric     /// True if this instruction is scheduled (or considered as scheduled in the
21380b57cec5SDimitry Andric     /// dry-run).
21390b57cec5SDimitry Andric     bool IsScheduled = false;
21400b57cec5SDimitry Andric 
21410b57cec5SDimitry Andric     /// Opcode of the current instruction in the schedule data.
21420b57cec5SDimitry Andric     Value *OpValue = nullptr;
21438bcb0991SDimitry Andric 
21448bcb0991SDimitry Andric     /// The TreeEntry that this instruction corresponds to.
21458bcb0991SDimitry Andric     TreeEntry *TE = nullptr;
21468bcb0991SDimitry Andric 
21478bcb0991SDimitry Andric     /// The lane of this node in the TreeEntry.
21488bcb0991SDimitry Andric     int Lane = -1;
21490b57cec5SDimitry Andric   };
21500b57cec5SDimitry Andric 
21510b57cec5SDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & os,const BoUpSLP::ScheduleData & SD)21520b57cec5SDimitry Andric   friend inline raw_ostream &operator<<(raw_ostream &os,
21530b57cec5SDimitry Andric                                         const BoUpSLP::ScheduleData &SD) {
21540b57cec5SDimitry Andric     SD.dump(os);
21550b57cec5SDimitry Andric     return os;
21560b57cec5SDimitry Andric   }
21570b57cec5SDimitry Andric #endif
21580b57cec5SDimitry Andric 
21590b57cec5SDimitry Andric   friend struct GraphTraits<BoUpSLP *>;
21600b57cec5SDimitry Andric   friend struct DOTGraphTraits<BoUpSLP *>;
21610b57cec5SDimitry Andric 
21620b57cec5SDimitry Andric   /// Contains all scheduling data for a basic block.
21630b57cec5SDimitry Andric   struct BlockScheduling {
BlockSchedulingllvm::slpvectorizer::BoUpSLP::BlockScheduling21640b57cec5SDimitry Andric     BlockScheduling(BasicBlock *BB)
21650b57cec5SDimitry Andric         : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
21660b57cec5SDimitry Andric 
clearllvm::slpvectorizer::BoUpSLP::BlockScheduling21670b57cec5SDimitry Andric     void clear() {
21680b57cec5SDimitry Andric       ReadyInsts.clear();
21690b57cec5SDimitry Andric       ScheduleStart = nullptr;
21700b57cec5SDimitry Andric       ScheduleEnd = nullptr;
21710b57cec5SDimitry Andric       FirstLoadStoreInRegion = nullptr;
21720b57cec5SDimitry Andric       LastLoadStoreInRegion = nullptr;
21730b57cec5SDimitry Andric 
21740b57cec5SDimitry Andric       // Reduce the maximum schedule region size by the size of the
21750b57cec5SDimitry Andric       // previous scheduling run.
21760b57cec5SDimitry Andric       ScheduleRegionSizeLimit -= ScheduleRegionSize;
21770b57cec5SDimitry Andric       if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
21780b57cec5SDimitry Andric         ScheduleRegionSizeLimit = MinScheduleRegionSize;
21790b57cec5SDimitry Andric       ScheduleRegionSize = 0;
21800b57cec5SDimitry Andric 
21810b57cec5SDimitry Andric       // Make a new scheduling region, i.e. all existing ScheduleData is not
21820b57cec5SDimitry Andric       // in the new region yet.
21830b57cec5SDimitry Andric       ++SchedulingRegionID;
21840b57cec5SDimitry Andric     }
21850b57cec5SDimitry Andric 
getScheduleDatallvm::slpvectorizer::BoUpSLP::BlockScheduling21860b57cec5SDimitry Andric     ScheduleData *getScheduleData(Value *V) {
21870b57cec5SDimitry Andric       ScheduleData *SD = ScheduleDataMap[V];
21880b57cec5SDimitry Andric       if (SD && SD->SchedulingRegionID == SchedulingRegionID)
21890b57cec5SDimitry Andric         return SD;
21900b57cec5SDimitry Andric       return nullptr;
21910b57cec5SDimitry Andric     }
21920b57cec5SDimitry Andric 
getScheduleDatallvm::slpvectorizer::BoUpSLP::BlockScheduling21930b57cec5SDimitry Andric     ScheduleData *getScheduleData(Value *V, Value *Key) {
21940b57cec5SDimitry Andric       if (V == Key)
21950b57cec5SDimitry Andric         return getScheduleData(V);
21960b57cec5SDimitry Andric       auto I = ExtraScheduleDataMap.find(V);
21970b57cec5SDimitry Andric       if (I != ExtraScheduleDataMap.end()) {
21980b57cec5SDimitry Andric         ScheduleData *SD = I->second[Key];
21990b57cec5SDimitry Andric         if (SD && SD->SchedulingRegionID == SchedulingRegionID)
22000b57cec5SDimitry Andric           return SD;
22010b57cec5SDimitry Andric       }
22020b57cec5SDimitry Andric       return nullptr;
22030b57cec5SDimitry Andric     }
22040b57cec5SDimitry Andric 
isInSchedulingRegionllvm::slpvectorizer::BoUpSLP::BlockScheduling2205480093f4SDimitry Andric     bool isInSchedulingRegion(ScheduleData *SD) const {
22060b57cec5SDimitry Andric       return SD->SchedulingRegionID == SchedulingRegionID;
22070b57cec5SDimitry Andric     }
22080b57cec5SDimitry Andric 
22090b57cec5SDimitry Andric     /// Marks an instruction as scheduled and puts all dependent ready
22100b57cec5SDimitry Andric     /// instructions into the ready-list.
22110b57cec5SDimitry Andric     template <typename ReadyListType>
schedulellvm::slpvectorizer::BoUpSLP::BlockScheduling22120b57cec5SDimitry Andric     void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
22130b57cec5SDimitry Andric       SD->IsScheduled = true;
22140b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP:   schedule " << *SD << "\n");
22150b57cec5SDimitry Andric 
22160b57cec5SDimitry Andric       ScheduleData *BundleMember = SD;
22170b57cec5SDimitry Andric       while (BundleMember) {
22180b57cec5SDimitry Andric         if (BundleMember->Inst != BundleMember->OpValue) {
22190b57cec5SDimitry Andric           BundleMember = BundleMember->NextInBundle;
22200b57cec5SDimitry Andric           continue;
22210b57cec5SDimitry Andric         }
22220b57cec5SDimitry Andric         // Handle the def-use chain dependencies.
22238bcb0991SDimitry Andric 
22248bcb0991SDimitry Andric         // Decrement the unscheduled counter and insert to ready list if ready.
22258bcb0991SDimitry Andric         auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
22260b57cec5SDimitry Andric           doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
22270b57cec5SDimitry Andric             if (OpDef && OpDef->hasValidDependencies() &&
22280b57cec5SDimitry Andric                 OpDef->incrementUnscheduledDeps(-1) == 0) {
22290b57cec5SDimitry Andric               // There are no more unscheduled dependencies after
22300b57cec5SDimitry Andric               // decrementing, so we can put the dependent instruction
22310b57cec5SDimitry Andric               // into the ready list.
22320b57cec5SDimitry Andric               ScheduleData *DepBundle = OpDef->FirstInBundle;
22330b57cec5SDimitry Andric               assert(!DepBundle->IsScheduled &&
22340b57cec5SDimitry Andric                      "already scheduled bundle gets ready");
22350b57cec5SDimitry Andric               ReadyList.insert(DepBundle);
22360b57cec5SDimitry Andric               LLVM_DEBUG(dbgs()
22370b57cec5SDimitry Andric                          << "SLP:    gets ready (def): " << *DepBundle << "\n");
22380b57cec5SDimitry Andric             }
22390b57cec5SDimitry Andric           });
22408bcb0991SDimitry Andric         };
22418bcb0991SDimitry Andric 
22428bcb0991SDimitry Andric         // If BundleMember is a vector bundle, its operands may have been
22438bcb0991SDimitry Andric         // reordered duiring buildTree(). We therefore need to get its operands
22448bcb0991SDimitry Andric         // through the TreeEntry.
22458bcb0991SDimitry Andric         if (TreeEntry *TE = BundleMember->TE) {
22468bcb0991SDimitry Andric           int Lane = BundleMember->Lane;
22478bcb0991SDimitry Andric           assert(Lane >= 0 && "Lane not set");
22485ffd83dbSDimitry Andric 
22495ffd83dbSDimitry Andric           // Since vectorization tree is being built recursively this assertion
22505ffd83dbSDimitry Andric           // ensures that the tree entry has all operands set before reaching
22515ffd83dbSDimitry Andric           // this code. Couple of exceptions known at the moment are extracts
22525ffd83dbSDimitry Andric           // where their second (immediate) operand is not added. Since
22535ffd83dbSDimitry Andric           // immediates do not affect scheduler behavior this is considered
22545ffd83dbSDimitry Andric           // okay.
22555ffd83dbSDimitry Andric           auto *In = TE->getMainOp();
22565ffd83dbSDimitry Andric           assert(In &&
22575ffd83dbSDimitry Andric                  (isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
22585ffd83dbSDimitry Andric                   In->getNumOperands() == TE->getNumOperands()) &&
22595ffd83dbSDimitry Andric                  "Missed TreeEntry operands?");
22605ffd83dbSDimitry Andric           (void)In; // fake use to avoid build failure when assertions disabled
22615ffd83dbSDimitry Andric 
22628bcb0991SDimitry Andric           for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
22638bcb0991SDimitry Andric                OpIdx != NumOperands; ++OpIdx)
22648bcb0991SDimitry Andric             if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
22658bcb0991SDimitry Andric               DecrUnsched(I);
22668bcb0991SDimitry Andric         } else {
22678bcb0991SDimitry Andric           // If BundleMember is a stand-alone instruction, no operand reordering
22688bcb0991SDimitry Andric           // has taken place, so we directly access its operands.
22698bcb0991SDimitry Andric           for (Use &U : BundleMember->Inst->operands())
22708bcb0991SDimitry Andric             if (auto *I = dyn_cast<Instruction>(U.get()))
22718bcb0991SDimitry Andric               DecrUnsched(I);
22720b57cec5SDimitry Andric         }
22730b57cec5SDimitry Andric         // Handle the memory dependencies.
22740b57cec5SDimitry Andric         for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
22750b57cec5SDimitry Andric           if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
22760b57cec5SDimitry Andric             // There are no more unscheduled dependencies after decrementing,
22770b57cec5SDimitry Andric             // so we can put the dependent instruction into the ready list.
22780b57cec5SDimitry Andric             ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
22790b57cec5SDimitry Andric             assert(!DepBundle->IsScheduled &&
22800b57cec5SDimitry Andric                    "already scheduled bundle gets ready");
22810b57cec5SDimitry Andric             ReadyList.insert(DepBundle);
22820b57cec5SDimitry Andric             LLVM_DEBUG(dbgs()
22830b57cec5SDimitry Andric                        << "SLP:    gets ready (mem): " << *DepBundle << "\n");
22840b57cec5SDimitry Andric           }
22850b57cec5SDimitry Andric         }
22860b57cec5SDimitry Andric         BundleMember = BundleMember->NextInBundle;
22870b57cec5SDimitry Andric       }
22880b57cec5SDimitry Andric     }
22890b57cec5SDimitry Andric 
doForAllOpcodesllvm::slpvectorizer::BoUpSLP::BlockScheduling22900b57cec5SDimitry Andric     void doForAllOpcodes(Value *V,
22910b57cec5SDimitry Andric                          function_ref<void(ScheduleData *SD)> Action) {
22920b57cec5SDimitry Andric       if (ScheduleData *SD = getScheduleData(V))
22930b57cec5SDimitry Andric         Action(SD);
22940b57cec5SDimitry Andric       auto I = ExtraScheduleDataMap.find(V);
22950b57cec5SDimitry Andric       if (I != ExtraScheduleDataMap.end())
22960b57cec5SDimitry Andric         for (auto &P : I->second)
22970b57cec5SDimitry Andric           if (P.second->SchedulingRegionID == SchedulingRegionID)
22980b57cec5SDimitry Andric             Action(P.second);
22990b57cec5SDimitry Andric     }
23000b57cec5SDimitry Andric 
23010b57cec5SDimitry Andric     /// Put all instructions into the ReadyList which are ready for scheduling.
23020b57cec5SDimitry Andric     template <typename ReadyListType>
initialFillReadyListllvm::slpvectorizer::BoUpSLP::BlockScheduling23030b57cec5SDimitry Andric     void initialFillReadyList(ReadyListType &ReadyList) {
23040b57cec5SDimitry Andric       for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
23050b57cec5SDimitry Andric         doForAllOpcodes(I, [&](ScheduleData *SD) {
23060b57cec5SDimitry Andric           if (SD->isSchedulingEntity() && SD->isReady()) {
23070b57cec5SDimitry Andric             ReadyList.insert(SD);
23080b57cec5SDimitry Andric             LLVM_DEBUG(dbgs()
23090b57cec5SDimitry Andric                        << "SLP:    initially in ready list: " << *I << "\n");
23100b57cec5SDimitry Andric           }
23110b57cec5SDimitry Andric         });
23120b57cec5SDimitry Andric       }
23130b57cec5SDimitry Andric     }
23140b57cec5SDimitry Andric 
23150b57cec5SDimitry Andric     /// Checks if a bundle of instructions can be scheduled, i.e. has no
23160b57cec5SDimitry Andric     /// cyclic dependencies. This is only a dry-run, no instructions are
23170b57cec5SDimitry Andric     /// actually moved at this stage.
23188bcb0991SDimitry Andric     /// \returns the scheduling bundle. The returned Optional value is non-None
23198bcb0991SDimitry Andric     /// if \p VL is allowed to be scheduled.
23208bcb0991SDimitry Andric     Optional<ScheduleData *>
23218bcb0991SDimitry Andric     tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
23220b57cec5SDimitry Andric                       const InstructionsState &S);
23230b57cec5SDimitry Andric 
23240b57cec5SDimitry Andric     /// Un-bundles a group of instructions.
23250b57cec5SDimitry Andric     void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
23260b57cec5SDimitry Andric 
23270b57cec5SDimitry Andric     /// Allocates schedule data chunk.
23280b57cec5SDimitry Andric     ScheduleData *allocateScheduleDataChunks();
23290b57cec5SDimitry Andric 
23300b57cec5SDimitry Andric     /// Extends the scheduling region so that V is inside the region.
23310b57cec5SDimitry Andric     /// \returns true if the region size is within the limit.
23320b57cec5SDimitry Andric     bool extendSchedulingRegion(Value *V, const InstructionsState &S);
23330b57cec5SDimitry Andric 
23340b57cec5SDimitry Andric     /// Initialize the ScheduleData structures for new instructions in the
23350b57cec5SDimitry Andric     /// scheduling region.
23360b57cec5SDimitry Andric     void initScheduleData(Instruction *FromI, Instruction *ToI,
23370b57cec5SDimitry Andric                           ScheduleData *PrevLoadStore,
23380b57cec5SDimitry Andric                           ScheduleData *NextLoadStore);
23390b57cec5SDimitry Andric 
23400b57cec5SDimitry Andric     /// Updates the dependency information of a bundle and of all instructions/
23410b57cec5SDimitry Andric     /// bundles which depend on the original bundle.
23420b57cec5SDimitry Andric     void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
23430b57cec5SDimitry Andric                                BoUpSLP *SLP);
23440b57cec5SDimitry Andric 
23450b57cec5SDimitry Andric     /// Sets all instruction in the scheduling region to un-scheduled.
23460b57cec5SDimitry Andric     void resetSchedule();
23470b57cec5SDimitry Andric 
23480b57cec5SDimitry Andric     BasicBlock *BB;
23490b57cec5SDimitry Andric 
23500b57cec5SDimitry Andric     /// Simple memory allocation for ScheduleData.
23510b57cec5SDimitry Andric     std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
23520b57cec5SDimitry Andric 
23530b57cec5SDimitry Andric     /// The size of a ScheduleData array in ScheduleDataChunks.
23540b57cec5SDimitry Andric     int ChunkSize;
23550b57cec5SDimitry Andric 
23560b57cec5SDimitry Andric     /// The allocator position in the current chunk, which is the last entry
23570b57cec5SDimitry Andric     /// of ScheduleDataChunks.
23580b57cec5SDimitry Andric     int ChunkPos;
23590b57cec5SDimitry Andric 
23600b57cec5SDimitry Andric     /// Attaches ScheduleData to Instruction.
23610b57cec5SDimitry Andric     /// Note that the mapping survives during all vectorization iterations, i.e.
23620b57cec5SDimitry Andric     /// ScheduleData structures are recycled.
23630b57cec5SDimitry Andric     DenseMap<Value *, ScheduleData *> ScheduleDataMap;
23640b57cec5SDimitry Andric 
23650b57cec5SDimitry Andric     /// Attaches ScheduleData to Instruction with the leading key.
23660b57cec5SDimitry Andric     DenseMap<Value *, SmallDenseMap<Value *, ScheduleData *>>
23670b57cec5SDimitry Andric         ExtraScheduleDataMap;
23680b57cec5SDimitry Andric 
23690b57cec5SDimitry Andric     struct ReadyList : SmallVector<ScheduleData *, 8> {
insertllvm::slpvectorizer::BoUpSLP::BlockScheduling::ReadyList23700b57cec5SDimitry Andric       void insert(ScheduleData *SD) { push_back(SD); }
23710b57cec5SDimitry Andric     };
23720b57cec5SDimitry Andric 
23730b57cec5SDimitry Andric     /// The ready-list for scheduling (only used for the dry-run).
23740b57cec5SDimitry Andric     ReadyList ReadyInsts;
23750b57cec5SDimitry Andric 
23760b57cec5SDimitry Andric     /// The first instruction of the scheduling region.
23770b57cec5SDimitry Andric     Instruction *ScheduleStart = nullptr;
23780b57cec5SDimitry Andric 
23790b57cec5SDimitry Andric     /// The first instruction _after_ the scheduling region.
23800b57cec5SDimitry Andric     Instruction *ScheduleEnd = nullptr;
23810b57cec5SDimitry Andric 
23820b57cec5SDimitry Andric     /// The first memory accessing instruction in the scheduling region
23830b57cec5SDimitry Andric     /// (can be null).
23840b57cec5SDimitry Andric     ScheduleData *FirstLoadStoreInRegion = nullptr;
23850b57cec5SDimitry Andric 
23860b57cec5SDimitry Andric     /// The last memory accessing instruction in the scheduling region
23870b57cec5SDimitry Andric     /// (can be null).
23880b57cec5SDimitry Andric     ScheduleData *LastLoadStoreInRegion = nullptr;
23890b57cec5SDimitry Andric 
23900b57cec5SDimitry Andric     /// The current size of the scheduling region.
23910b57cec5SDimitry Andric     int ScheduleRegionSize = 0;
23920b57cec5SDimitry Andric 
23930b57cec5SDimitry Andric     /// The maximum size allowed for the scheduling region.
23940b57cec5SDimitry Andric     int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
23950b57cec5SDimitry Andric 
23960b57cec5SDimitry Andric     /// The ID of the scheduling region. For a new vectorization iteration this
23970b57cec5SDimitry Andric     /// is incremented which "removes" all ScheduleData from the region.
23980b57cec5SDimitry Andric     // Make sure that the initial SchedulingRegionID is greater than the
23990b57cec5SDimitry Andric     // initial SchedulingRegionID in ScheduleData (which is 0).
24000b57cec5SDimitry Andric     int SchedulingRegionID = 1;
24010b57cec5SDimitry Andric   };
24020b57cec5SDimitry Andric 
24030b57cec5SDimitry Andric   /// Attaches the BlockScheduling structures to basic blocks.
24040b57cec5SDimitry Andric   MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;
24050b57cec5SDimitry Andric 
24060b57cec5SDimitry Andric   /// Performs the "real" scheduling. Done before vectorization is actually
24070b57cec5SDimitry Andric   /// performed in a basic block.
24080b57cec5SDimitry Andric   void scheduleBlock(BlockScheduling *BS);
24090b57cec5SDimitry Andric 
24100b57cec5SDimitry Andric   /// List of users to ignore during scheduling and that don't need extracting.
24110b57cec5SDimitry Andric   ArrayRef<Value *> UserIgnoreList;
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric   /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
24140b57cec5SDimitry Andric   /// sorted SmallVectors of unsigned.
24150b57cec5SDimitry Andric   struct OrdersTypeDenseMapInfo {
getEmptyKeyllvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24160b57cec5SDimitry Andric     static OrdersType getEmptyKey() {
24170b57cec5SDimitry Andric       OrdersType V;
24180b57cec5SDimitry Andric       V.push_back(~1U);
24190b57cec5SDimitry Andric       return V;
24200b57cec5SDimitry Andric     }
24210b57cec5SDimitry Andric 
getTombstoneKeyllvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24220b57cec5SDimitry Andric     static OrdersType getTombstoneKey() {
24230b57cec5SDimitry Andric       OrdersType V;
24240b57cec5SDimitry Andric       V.push_back(~2U);
24250b57cec5SDimitry Andric       return V;
24260b57cec5SDimitry Andric     }
24270b57cec5SDimitry Andric 
getHashValuellvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24280b57cec5SDimitry Andric     static unsigned getHashValue(const OrdersType &V) {
24290b57cec5SDimitry Andric       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
24300b57cec5SDimitry Andric     }
24310b57cec5SDimitry Andric 
isEqualllvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24320b57cec5SDimitry Andric     static bool isEqual(const OrdersType &LHS, const OrdersType &RHS) {
24330b57cec5SDimitry Andric       return LHS == RHS;
24340b57cec5SDimitry Andric     }
24350b57cec5SDimitry Andric   };
24360b57cec5SDimitry Andric 
24370b57cec5SDimitry Andric   /// Contains orders of operations along with the number of bundles that have
24380b57cec5SDimitry Andric   /// operations in this order. It stores only those orders that require
24390b57cec5SDimitry Andric   /// reordering, if reordering is not required it is counted using \a
24400b57cec5SDimitry Andric   /// NumOpsWantToKeepOriginalOrder.
24410b57cec5SDimitry Andric   DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> NumOpsWantToKeepOrder;
24420b57cec5SDimitry Andric   /// Number of bundles that do not require reordering.
24430b57cec5SDimitry Andric   unsigned NumOpsWantToKeepOriginalOrder = 0;
24440b57cec5SDimitry Andric 
24450b57cec5SDimitry Andric   // Analysis and block reference.
24460b57cec5SDimitry Andric   Function *F;
24470b57cec5SDimitry Andric   ScalarEvolution *SE;
24480b57cec5SDimitry Andric   TargetTransformInfo *TTI;
24490b57cec5SDimitry Andric   TargetLibraryInfo *TLI;
2450af732203SDimitry Andric   AAResults *AA;
24510b57cec5SDimitry Andric   LoopInfo *LI;
24520b57cec5SDimitry Andric   DominatorTree *DT;
24530b57cec5SDimitry Andric   AssumptionCache *AC;
24540b57cec5SDimitry Andric   DemandedBits *DB;
24550b57cec5SDimitry Andric   const DataLayout *DL;
24560b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE;
24570b57cec5SDimitry Andric 
24580b57cec5SDimitry Andric   unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
24590b57cec5SDimitry Andric   unsigned MinVecRegSize; // Set by cl::opt (default: 128).
24600b57cec5SDimitry Andric 
24610b57cec5SDimitry Andric   /// Instruction builder to construct the vectorized tree.
24620b57cec5SDimitry Andric   IRBuilder<> Builder;
24630b57cec5SDimitry Andric 
24640b57cec5SDimitry Andric   /// A map of scalar integer values to the smallest bit width with which they
24650b57cec5SDimitry Andric   /// can legally be represented. The values map to (width, signed) pairs,
24660b57cec5SDimitry Andric   /// where "width" indicates the minimum bit width and "signed" is True if the
24670b57cec5SDimitry Andric   /// value must be signed-extended, rather than zero-extended, back to its
24680b57cec5SDimitry Andric   /// original width.
24690b57cec5SDimitry Andric   MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
24700b57cec5SDimitry Andric };
24710b57cec5SDimitry Andric 
24720b57cec5SDimitry Andric } // end namespace slpvectorizer
24730b57cec5SDimitry Andric 
24740b57cec5SDimitry Andric template <> struct GraphTraits<BoUpSLP *> {
24750b57cec5SDimitry Andric   using TreeEntry = BoUpSLP::TreeEntry;
24760b57cec5SDimitry Andric 
24770b57cec5SDimitry Andric   /// NodeRef has to be a pointer per the GraphWriter.
24780b57cec5SDimitry Andric   using NodeRef = TreeEntry *;
24790b57cec5SDimitry Andric 
24800b57cec5SDimitry Andric   using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;
24810b57cec5SDimitry Andric 
24820b57cec5SDimitry Andric   /// Add the VectorizableTree to the index iterator to be able to return
24830b57cec5SDimitry Andric   /// TreeEntry pointers.
24840b57cec5SDimitry Andric   struct ChildIteratorType
24850b57cec5SDimitry Andric       : public iterator_adaptor_base<
24860b57cec5SDimitry Andric             ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
24870b57cec5SDimitry Andric     ContainerTy &VectorizableTree;
24880b57cec5SDimitry Andric 
ChildIteratorTypellvm::GraphTraits::ChildIteratorType24890b57cec5SDimitry Andric     ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
24900b57cec5SDimitry Andric                       ContainerTy &VT)
24910b57cec5SDimitry Andric         : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
24920b57cec5SDimitry Andric 
operator *llvm::GraphTraits::ChildIteratorType24930b57cec5SDimitry Andric     NodeRef operator*() { return I->UserTE; }
24940b57cec5SDimitry Andric   };
24950b57cec5SDimitry Andric 
getEntryNodellvm::GraphTraits24960b57cec5SDimitry Andric   static NodeRef getEntryNode(BoUpSLP &R) {
24970b57cec5SDimitry Andric     return R.VectorizableTree[0].get();
24980b57cec5SDimitry Andric   }
24990b57cec5SDimitry Andric 
child_beginllvm::GraphTraits25000b57cec5SDimitry Andric   static ChildIteratorType child_begin(NodeRef N) {
25010b57cec5SDimitry Andric     return {N->UserTreeIndices.begin(), N->Container};
25020b57cec5SDimitry Andric   }
25030b57cec5SDimitry Andric 
child_endllvm::GraphTraits25040b57cec5SDimitry Andric   static ChildIteratorType child_end(NodeRef N) {
25050b57cec5SDimitry Andric     return {N->UserTreeIndices.end(), N->Container};
25060b57cec5SDimitry Andric   }
25070b57cec5SDimitry Andric 
25080b57cec5SDimitry Andric   /// For the node iterator we just need to turn the TreeEntry iterator into a
25090b57cec5SDimitry Andric   /// TreeEntry* iterator so that it dereferences to NodeRef.
25100b57cec5SDimitry Andric   class nodes_iterator {
25110b57cec5SDimitry Andric     using ItTy = ContainerTy::iterator;
25120b57cec5SDimitry Andric     ItTy It;
25130b57cec5SDimitry Andric 
25140b57cec5SDimitry Andric   public:
nodes_iterator(const ItTy & It2)25150b57cec5SDimitry Andric     nodes_iterator(const ItTy &It2) : It(It2) {}
operator *()25160b57cec5SDimitry Andric     NodeRef operator*() { return It->get(); }
operator ++()25170b57cec5SDimitry Andric     nodes_iterator operator++() {
25180b57cec5SDimitry Andric       ++It;
25190b57cec5SDimitry Andric       return *this;
25200b57cec5SDimitry Andric     }
operator !=(const nodes_iterator & N2) const25210b57cec5SDimitry Andric     bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
25220b57cec5SDimitry Andric   };
25230b57cec5SDimitry Andric 
nodes_beginllvm::GraphTraits25240b57cec5SDimitry Andric   static nodes_iterator nodes_begin(BoUpSLP *R) {
25250b57cec5SDimitry Andric     return nodes_iterator(R->VectorizableTree.begin());
25260b57cec5SDimitry Andric   }
25270b57cec5SDimitry Andric 
nodes_endllvm::GraphTraits25280b57cec5SDimitry Andric   static nodes_iterator nodes_end(BoUpSLP *R) {
25290b57cec5SDimitry Andric     return nodes_iterator(R->VectorizableTree.end());
25300b57cec5SDimitry Andric   }
25310b57cec5SDimitry Andric 
sizellvm::GraphTraits25320b57cec5SDimitry Andric   static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
25330b57cec5SDimitry Andric };
25340b57cec5SDimitry Andric 
25350b57cec5SDimitry Andric template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
25360b57cec5SDimitry Andric   using TreeEntry = BoUpSLP::TreeEntry;
25370b57cec5SDimitry Andric 
DOTGraphTraitsllvm::DOTGraphTraits25380b57cec5SDimitry Andric   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
25390b57cec5SDimitry Andric 
getNodeLabelllvm::DOTGraphTraits25400b57cec5SDimitry Andric   std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
25410b57cec5SDimitry Andric     std::string Str;
25420b57cec5SDimitry Andric     raw_string_ostream OS(Str);
25430b57cec5SDimitry Andric     if (isSplat(Entry->Scalars)) {
25440b57cec5SDimitry Andric       OS << "<splat> " << *Entry->Scalars[0];
25450b57cec5SDimitry Andric       return Str;
25460b57cec5SDimitry Andric     }
25470b57cec5SDimitry Andric     for (auto V : Entry->Scalars) {
25480b57cec5SDimitry Andric       OS << *V;
2549af732203SDimitry Andric       if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
2550af732203SDimitry Andric             return EU.Scalar == V;
2551af732203SDimitry Andric           }))
25520b57cec5SDimitry Andric         OS << " <extract>";
25530b57cec5SDimitry Andric       OS << "\n";
25540b57cec5SDimitry Andric     }
25550b57cec5SDimitry Andric     return Str;
25560b57cec5SDimitry Andric   }
25570b57cec5SDimitry Andric 
getNodeAttributesllvm::DOTGraphTraits25580b57cec5SDimitry Andric   static std::string getNodeAttributes(const TreeEntry *Entry,
25590b57cec5SDimitry Andric                                        const BoUpSLP *) {
2560480093f4SDimitry Andric     if (Entry->State == TreeEntry::NeedToGather)
25610b57cec5SDimitry Andric       return "color=red";
25620b57cec5SDimitry Andric     return "";
25630b57cec5SDimitry Andric   }
25640b57cec5SDimitry Andric };
25650b57cec5SDimitry Andric 
25660b57cec5SDimitry Andric } // end namespace llvm
25670b57cec5SDimitry Andric 
~BoUpSLP()25688bcb0991SDimitry Andric BoUpSLP::~BoUpSLP() {
25698bcb0991SDimitry Andric   for (const auto &Pair : DeletedInstructions) {
25708bcb0991SDimitry Andric     // Replace operands of ignored instructions with Undefs in case if they were
25718bcb0991SDimitry Andric     // marked for deletion.
25728bcb0991SDimitry Andric     if (Pair.getSecond()) {
25738bcb0991SDimitry Andric       Value *Undef = UndefValue::get(Pair.getFirst()->getType());
25748bcb0991SDimitry Andric       Pair.getFirst()->replaceAllUsesWith(Undef);
25758bcb0991SDimitry Andric     }
25768bcb0991SDimitry Andric     Pair.getFirst()->dropAllReferences();
25778bcb0991SDimitry Andric   }
25788bcb0991SDimitry Andric   for (const auto &Pair : DeletedInstructions) {
25798bcb0991SDimitry Andric     assert(Pair.getFirst()->use_empty() &&
25808bcb0991SDimitry Andric            "trying to erase instruction with users.");
25818bcb0991SDimitry Andric     Pair.getFirst()->eraseFromParent();
25828bcb0991SDimitry Andric   }
2583af732203SDimitry Andric #ifdef EXPENSIVE_CHECKS
2584af732203SDimitry Andric   // If we could guarantee that this call is not extremely slow, we could
2585af732203SDimitry Andric   // remove the ifdef limitation (see PR47712).
25865ffd83dbSDimitry Andric   assert(!verifyFunction(*F, &dbgs()));
2587af732203SDimitry Andric #endif
25888bcb0991SDimitry Andric }
25898bcb0991SDimitry Andric 
eraseInstructions(ArrayRef<Value * > AV)25908bcb0991SDimitry Andric void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
25918bcb0991SDimitry Andric   for (auto *V : AV) {
25928bcb0991SDimitry Andric     if (auto *I = dyn_cast<Instruction>(V))
2593af732203SDimitry Andric       eraseInstruction(I, /*ReplaceOpsWithUndef=*/true);
25948bcb0991SDimitry Andric   };
25958bcb0991SDimitry Andric }
25968bcb0991SDimitry Andric 
buildTree(ArrayRef<Value * > Roots,ArrayRef<Value * > UserIgnoreLst)25970b57cec5SDimitry Andric void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
25980b57cec5SDimitry Andric                         ArrayRef<Value *> UserIgnoreLst) {
25990b57cec5SDimitry Andric   ExtraValueToDebugLocsMap ExternallyUsedValues;
26000b57cec5SDimitry Andric   buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
26010b57cec5SDimitry Andric }
26020b57cec5SDimitry Andric 
buildTree(ArrayRef<Value * > Roots,ExtraValueToDebugLocsMap & ExternallyUsedValues,ArrayRef<Value * > UserIgnoreLst)26030b57cec5SDimitry Andric void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
26040b57cec5SDimitry Andric                         ExtraValueToDebugLocsMap &ExternallyUsedValues,
26050b57cec5SDimitry Andric                         ArrayRef<Value *> UserIgnoreLst) {
26060b57cec5SDimitry Andric   deleteTree();
26070b57cec5SDimitry Andric   UserIgnoreList = UserIgnoreLst;
26080b57cec5SDimitry Andric   if (!allSameType(Roots))
26090b57cec5SDimitry Andric     return;
26100b57cec5SDimitry Andric   buildTree_rec(Roots, 0, EdgeInfo());
26110b57cec5SDimitry Andric 
26120b57cec5SDimitry Andric   // Collect the values that we need to extract from the tree.
26130b57cec5SDimitry Andric   for (auto &TEPtr : VectorizableTree) {
26140b57cec5SDimitry Andric     TreeEntry *Entry = TEPtr.get();
26150b57cec5SDimitry Andric 
26160b57cec5SDimitry Andric     // No need to handle users of gathered values.
2617480093f4SDimitry Andric     if (Entry->State == TreeEntry::NeedToGather)
26180b57cec5SDimitry Andric       continue;
26190b57cec5SDimitry Andric 
26200b57cec5SDimitry Andric     // For each lane:
26210b57cec5SDimitry Andric     for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
26220b57cec5SDimitry Andric       Value *Scalar = Entry->Scalars[Lane];
26235f7ddb14SDimitry Andric       int FoundLane = Entry->findLaneForValue(Scalar);
26240b57cec5SDimitry Andric 
26250b57cec5SDimitry Andric       // Check if the scalar is externally used as an extra arg.
26260b57cec5SDimitry Andric       auto ExtI = ExternallyUsedValues.find(Scalar);
26270b57cec5SDimitry Andric       if (ExtI != ExternallyUsedValues.end()) {
26280b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
26290b57cec5SDimitry Andric                           << Lane << " from " << *Scalar << ".\n");
26300b57cec5SDimitry Andric         ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
26310b57cec5SDimitry Andric       }
26320b57cec5SDimitry Andric       for (User *U : Scalar->users()) {
26330b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
26340b57cec5SDimitry Andric 
26350b57cec5SDimitry Andric         Instruction *UserInst = dyn_cast<Instruction>(U);
26360b57cec5SDimitry Andric         if (!UserInst)
26370b57cec5SDimitry Andric           continue;
26380b57cec5SDimitry Andric 
26390b57cec5SDimitry Andric         // Skip in-tree scalars that become vectors
26400b57cec5SDimitry Andric         if (TreeEntry *UseEntry = getTreeEntry(U)) {
26410b57cec5SDimitry Andric           Value *UseScalar = UseEntry->Scalars[0];
26420b57cec5SDimitry Andric           // Some in-tree scalars will remain as scalar in vectorized
26430b57cec5SDimitry Andric           // instructions. If that is the case, the one in Lane 0 will
26440b57cec5SDimitry Andric           // be used.
26450b57cec5SDimitry Andric           if (UseScalar != U ||
26465f7ddb14SDimitry Andric               UseEntry->State == TreeEntry::ScatterVectorize ||
26470b57cec5SDimitry Andric               !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
26480b57cec5SDimitry Andric             LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
26490b57cec5SDimitry Andric                               << ".\n");
2650480093f4SDimitry Andric             assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
26510b57cec5SDimitry Andric             continue;
26520b57cec5SDimitry Andric           }
26530b57cec5SDimitry Andric         }
26540b57cec5SDimitry Andric 
26550b57cec5SDimitry Andric         // Ignore users in the user ignore list.
26560b57cec5SDimitry Andric         if (is_contained(UserIgnoreList, UserInst))
26570b57cec5SDimitry Andric           continue;
26580b57cec5SDimitry Andric 
26590b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
26600b57cec5SDimitry Andric                           << Lane << " from " << *Scalar << ".\n");
26610b57cec5SDimitry Andric         ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane));
26620b57cec5SDimitry Andric       }
26630b57cec5SDimitry Andric     }
26640b57cec5SDimitry Andric   }
26650b57cec5SDimitry Andric }
26660b57cec5SDimitry Andric 
buildTree_rec(ArrayRef<Value * > VL,unsigned Depth,const EdgeInfo & UserTreeIdx)26670b57cec5SDimitry Andric void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
26680b57cec5SDimitry Andric                             const EdgeInfo &UserTreeIdx) {
26690b57cec5SDimitry Andric   assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
26700b57cec5SDimitry Andric 
26710b57cec5SDimitry Andric   InstructionsState S = getSameOpcode(VL);
26720b57cec5SDimitry Andric   if (Depth == RecursionMaxDepth) {
26730b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
26748bcb0991SDimitry Andric     newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26750b57cec5SDimitry Andric     return;
26760b57cec5SDimitry Andric   }
26770b57cec5SDimitry Andric 
26785f7ddb14SDimitry Andric   // Don't handle scalable vectors
26795f7ddb14SDimitry Andric   if (S.getOpcode() == Instruction::ExtractElement &&
26805f7ddb14SDimitry Andric       isa<ScalableVectorType>(
26815f7ddb14SDimitry Andric           cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
26825f7ddb14SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
26835f7ddb14SDimitry Andric     newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26845f7ddb14SDimitry Andric     return;
26855f7ddb14SDimitry Andric   }
26865f7ddb14SDimitry Andric 
26870b57cec5SDimitry Andric   // Don't handle vectors.
26885f7ddb14SDimitry Andric   if (S.OpValue->getType()->isVectorTy() &&
26895f7ddb14SDimitry Andric       !isa<InsertElementInst>(S.OpValue)) {
26900b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
26918bcb0991SDimitry Andric     newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26920b57cec5SDimitry Andric     return;
26930b57cec5SDimitry Andric   }
26940b57cec5SDimitry Andric 
26950b57cec5SDimitry Andric   if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
26960b57cec5SDimitry Andric     if (SI->getValueOperand()->getType()->isVectorTy()) {
26970b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
26988bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26990b57cec5SDimitry Andric       return;
27000b57cec5SDimitry Andric     }
27010b57cec5SDimitry Andric 
27020b57cec5SDimitry Andric   // If all of the operands are identical or constant we have a simple solution.
27030b57cec5SDimitry Andric   if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
27040b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
27058bcb0991SDimitry Andric     newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27060b57cec5SDimitry Andric     return;
27070b57cec5SDimitry Andric   }
27080b57cec5SDimitry Andric 
27090b57cec5SDimitry Andric   // We now know that this is a vector of instructions of the same type from
27100b57cec5SDimitry Andric   // the same block.
27110b57cec5SDimitry Andric 
27120b57cec5SDimitry Andric   // Don't vectorize ephemeral values.
27138bcb0991SDimitry Andric   for (Value *V : VL) {
27148bcb0991SDimitry Andric     if (EphValues.count(V)) {
27158bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
27160b57cec5SDimitry Andric                         << ") is ephemeral.\n");
27178bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27180b57cec5SDimitry Andric       return;
27190b57cec5SDimitry Andric     }
27200b57cec5SDimitry Andric   }
27210b57cec5SDimitry Andric 
27220b57cec5SDimitry Andric   // Check if this is a duplicate of another entry.
27230b57cec5SDimitry Andric   if (TreeEntry *E = getTreeEntry(S.OpValue)) {
27240b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
27250b57cec5SDimitry Andric     if (!E->isSame(VL)) {
27260b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
27278bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27280b57cec5SDimitry Andric       return;
27290b57cec5SDimitry Andric     }
27300b57cec5SDimitry Andric     // Record the reuse of the tree node.  FIXME, currently this is only used to
27310b57cec5SDimitry Andric     // properly draw the graph rather than for the actual vectorization.
27320b57cec5SDimitry Andric     E->UserTreeIndices.push_back(UserTreeIdx);
27330b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
27340b57cec5SDimitry Andric                       << ".\n");
27350b57cec5SDimitry Andric     return;
27360b57cec5SDimitry Andric   }
27370b57cec5SDimitry Andric 
27380b57cec5SDimitry Andric   // Check that none of the instructions in the bundle are already in the tree.
27398bcb0991SDimitry Andric   for (Value *V : VL) {
27408bcb0991SDimitry Andric     auto *I = dyn_cast<Instruction>(V);
27410b57cec5SDimitry Andric     if (!I)
27420b57cec5SDimitry Andric       continue;
27430b57cec5SDimitry Andric     if (getTreeEntry(I)) {
27448bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
27450b57cec5SDimitry Andric                         << ") is already in tree.\n");
27468bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27470b57cec5SDimitry Andric       return;
27480b57cec5SDimitry Andric     }
27490b57cec5SDimitry Andric   }
27500b57cec5SDimitry Andric 
27510b57cec5SDimitry Andric   // If any of the scalars is marked as a value that needs to stay scalar, then
27520b57cec5SDimitry Andric   // we need to gather the scalars.
27530b57cec5SDimitry Andric   // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
27548bcb0991SDimitry Andric   for (Value *V : VL) {
27558bcb0991SDimitry Andric     if (MustGather.count(V) || is_contained(UserIgnoreList, V)) {
27560b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
27578bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27580b57cec5SDimitry Andric       return;
27590b57cec5SDimitry Andric     }
27600b57cec5SDimitry Andric   }
27610b57cec5SDimitry Andric 
27620b57cec5SDimitry Andric   // Check that all of the users of the scalars that we want to vectorize are
27630b57cec5SDimitry Andric   // schedulable.
27640b57cec5SDimitry Andric   auto *VL0 = cast<Instruction>(S.OpValue);
27650b57cec5SDimitry Andric   BasicBlock *BB = VL0->getParent();
27660b57cec5SDimitry Andric 
27670b57cec5SDimitry Andric   if (!DT->isReachableFromEntry(BB)) {
27680b57cec5SDimitry Andric     // Don't go into unreachable blocks. They may contain instructions with
27690b57cec5SDimitry Andric     // dependency cycles which confuse the final scheduling.
27700b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
27718bcb0991SDimitry Andric     newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27720b57cec5SDimitry Andric     return;
27730b57cec5SDimitry Andric   }
27740b57cec5SDimitry Andric 
27750b57cec5SDimitry Andric   // Check that every instruction appears once in this bundle.
27760b57cec5SDimitry Andric   SmallVector<unsigned, 4> ReuseShuffleIndicies;
27770b57cec5SDimitry Andric   SmallVector<Value *, 4> UniqueValues;
27780b57cec5SDimitry Andric   DenseMap<Value *, unsigned> UniquePositions;
27790b57cec5SDimitry Andric   for (Value *V : VL) {
27800b57cec5SDimitry Andric     auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
27810b57cec5SDimitry Andric     ReuseShuffleIndicies.emplace_back(Res.first->second);
27820b57cec5SDimitry Andric     if (Res.second)
27830b57cec5SDimitry Andric       UniqueValues.emplace_back(V);
27840b57cec5SDimitry Andric   }
27858bcb0991SDimitry Andric   size_t NumUniqueScalarValues = UniqueValues.size();
27868bcb0991SDimitry Andric   if (NumUniqueScalarValues == VL.size()) {
27870b57cec5SDimitry Andric     ReuseShuffleIndicies.clear();
27880b57cec5SDimitry Andric   } else {
27890b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
27908bcb0991SDimitry Andric     if (NumUniqueScalarValues <= 1 ||
27918bcb0991SDimitry Andric         !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
27920b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
27938bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27940b57cec5SDimitry Andric       return;
27950b57cec5SDimitry Andric     }
27960b57cec5SDimitry Andric     VL = UniqueValues;
27970b57cec5SDimitry Andric   }
27980b57cec5SDimitry Andric 
27990b57cec5SDimitry Andric   auto &BSRef = BlocksSchedules[BB];
28000b57cec5SDimitry Andric   if (!BSRef)
28018bcb0991SDimitry Andric     BSRef = std::make_unique<BlockScheduling>(BB);
28020b57cec5SDimitry Andric 
28030b57cec5SDimitry Andric   BlockScheduling &BS = *BSRef.get();
28040b57cec5SDimitry Andric 
28058bcb0991SDimitry Andric   Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
28068bcb0991SDimitry Andric   if (!Bundle) {
28070b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
28080b57cec5SDimitry Andric     assert((!BS.getScheduleData(VL0) ||
28090b57cec5SDimitry Andric             !BS.getScheduleData(VL0)->isPartOfBundle()) &&
28100b57cec5SDimitry Andric            "tryScheduleBundle should cancelScheduling on failure");
28118bcb0991SDimitry Andric     newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
28128bcb0991SDimitry Andric                  ReuseShuffleIndicies);
28130b57cec5SDimitry Andric     return;
28140b57cec5SDimitry Andric   }
28150b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
28160b57cec5SDimitry Andric 
28170b57cec5SDimitry Andric   unsigned ShuffleOrOp = S.isAltShuffle() ?
28180b57cec5SDimitry Andric                 (unsigned) Instruction::ShuffleVector : S.getOpcode();
28190b57cec5SDimitry Andric   switch (ShuffleOrOp) {
28200b57cec5SDimitry Andric     case Instruction::PHI: {
28218bcb0991SDimitry Andric       auto *PH = cast<PHINode>(VL0);
28220b57cec5SDimitry Andric 
28230b57cec5SDimitry Andric       // Check for terminator values (e.g. invoke).
2824af732203SDimitry Andric       for (Value *V : VL)
2825af732203SDimitry Andric         for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
28260b57cec5SDimitry Andric           Instruction *Term = dyn_cast<Instruction>(
2827af732203SDimitry Andric               cast<PHINode>(V)->getIncomingValueForBlock(
2828af732203SDimitry Andric                   PH->getIncomingBlock(I)));
28290b57cec5SDimitry Andric           if (Term && Term->isTerminator()) {
28300b57cec5SDimitry Andric             LLVM_DEBUG(dbgs()
28310b57cec5SDimitry Andric                        << "SLP: Need to swizzle PHINodes (terminator use).\n");
28320b57cec5SDimitry Andric             BS.cancelScheduling(VL, VL0);
28338bcb0991SDimitry Andric             newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
28348bcb0991SDimitry Andric                          ReuseShuffleIndicies);
28350b57cec5SDimitry Andric             return;
28360b57cec5SDimitry Andric           }
28370b57cec5SDimitry Andric         }
28380b57cec5SDimitry Andric 
28398bcb0991SDimitry Andric       TreeEntry *TE =
28408bcb0991SDimitry Andric           newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies);
28410b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
28420b57cec5SDimitry Andric 
28438bcb0991SDimitry Andric       // Keeps the reordered operands to avoid code duplication.
28448bcb0991SDimitry Andric       SmallVector<ValueList, 2> OperandsVec;
2845af732203SDimitry Andric       for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
28465f7ddb14SDimitry Andric         if (!DT->isReachableFromEntry(PH->getIncomingBlock(I))) {
28475f7ddb14SDimitry Andric           ValueList Operands(VL.size(), PoisonValue::get(PH->getType()));
28485f7ddb14SDimitry Andric           TE->setOperand(I, Operands);
28495f7ddb14SDimitry Andric           OperandsVec.push_back(Operands);
28505f7ddb14SDimitry Andric           continue;
28515f7ddb14SDimitry Andric         }
28520b57cec5SDimitry Andric         ValueList Operands;
28530b57cec5SDimitry Andric         // Prepare the operand vector.
2854af732203SDimitry Andric         for (Value *V : VL)
2855af732203SDimitry Andric           Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
2856af732203SDimitry Andric               PH->getIncomingBlock(I)));
2857af732203SDimitry Andric         TE->setOperand(I, Operands);
28588bcb0991SDimitry Andric         OperandsVec.push_back(Operands);
28590b57cec5SDimitry Andric       }
28608bcb0991SDimitry Andric       for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
28618bcb0991SDimitry Andric         buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
28620b57cec5SDimitry Andric       return;
28630b57cec5SDimitry Andric     }
28640b57cec5SDimitry Andric     case Instruction::ExtractValue:
28650b57cec5SDimitry Andric     case Instruction::ExtractElement: {
28660b57cec5SDimitry Andric       OrdersType CurrentOrder;
28670b57cec5SDimitry Andric       bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
28680b57cec5SDimitry Andric       if (Reuse) {
28690b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
28700b57cec5SDimitry Andric         ++NumOpsWantToKeepOriginalOrder;
28718bcb0991SDimitry Andric         newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
28720b57cec5SDimitry Andric                      ReuseShuffleIndicies);
28730b57cec5SDimitry Andric         // This is a special case, as it does not gather, but at the same time
28740b57cec5SDimitry Andric         // we are not extending buildTree_rec() towards the operands.
28750b57cec5SDimitry Andric         ValueList Op0;
28760b57cec5SDimitry Andric         Op0.assign(VL.size(), VL0->getOperand(0));
28778bcb0991SDimitry Andric         VectorizableTree.back()->setOperand(0, Op0);
28780b57cec5SDimitry Andric         return;
28790b57cec5SDimitry Andric       }
28800b57cec5SDimitry Andric       if (!CurrentOrder.empty()) {
28810b57cec5SDimitry Andric         LLVM_DEBUG({
28820b57cec5SDimitry Andric           dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
28830b57cec5SDimitry Andric                     "with order";
28840b57cec5SDimitry Andric           for (unsigned Idx : CurrentOrder)
28850b57cec5SDimitry Andric             dbgs() << " " << Idx;
28860b57cec5SDimitry Andric           dbgs() << "\n";
28870b57cec5SDimitry Andric         });
28880b57cec5SDimitry Andric         // Insert new order with initial value 0, if it does not exist,
28890b57cec5SDimitry Andric         // otherwise return the iterator to the existing one.
28908bcb0991SDimitry Andric         newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
2891af732203SDimitry Andric                      ReuseShuffleIndicies, CurrentOrder);
2892af732203SDimitry Andric         findRootOrder(CurrentOrder);
2893af732203SDimitry Andric         ++NumOpsWantToKeepOrder[CurrentOrder];
28940b57cec5SDimitry Andric         // This is a special case, as it does not gather, but at the same time
28950b57cec5SDimitry Andric         // we are not extending buildTree_rec() towards the operands.
28960b57cec5SDimitry Andric         ValueList Op0;
28970b57cec5SDimitry Andric         Op0.assign(VL.size(), VL0->getOperand(0));
28988bcb0991SDimitry Andric         VectorizableTree.back()->setOperand(0, Op0);
28990b57cec5SDimitry Andric         return;
29000b57cec5SDimitry Andric       }
29010b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
29028bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29038bcb0991SDimitry Andric                    ReuseShuffleIndicies);
29040b57cec5SDimitry Andric       BS.cancelScheduling(VL, VL0);
29050b57cec5SDimitry Andric       return;
29060b57cec5SDimitry Andric     }
29075f7ddb14SDimitry Andric     case Instruction::InsertElement: {
29085f7ddb14SDimitry Andric       assert(ReuseShuffleIndicies.empty() && "All inserts should be unique");
29095f7ddb14SDimitry Andric 
29105f7ddb14SDimitry Andric       // Check that we have a buildvector and not a shuffle of 2 or more
29115f7ddb14SDimitry Andric       // different vectors.
29125f7ddb14SDimitry Andric       ValueSet SourceVectors;
29135f7ddb14SDimitry Andric       for (Value *V : VL)
29145f7ddb14SDimitry Andric         SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
29155f7ddb14SDimitry Andric 
29165f7ddb14SDimitry Andric       if (count_if(VL, [&SourceVectors](Value *V) {
29175f7ddb14SDimitry Andric             return !SourceVectors.contains(V);
29185f7ddb14SDimitry Andric           }) >= 2) {
29195f7ddb14SDimitry Andric         // Found 2nd source vector - cancel.
29205f7ddb14SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
29215f7ddb14SDimitry Andric                              "different source vectors.\n");
29225f7ddb14SDimitry Andric         newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29235f7ddb14SDimitry Andric                      ReuseShuffleIndicies);
29245f7ddb14SDimitry Andric         BS.cancelScheduling(VL, VL0);
29255f7ddb14SDimitry Andric         return;
29265f7ddb14SDimitry Andric       }
29275f7ddb14SDimitry Andric 
29285f7ddb14SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx);
29295f7ddb14SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
29305f7ddb14SDimitry Andric 
29315f7ddb14SDimitry Andric       constexpr int NumOps = 2;
29325f7ddb14SDimitry Andric       ValueList VectorOperands[NumOps];
29335f7ddb14SDimitry Andric       for (int I = 0; I < NumOps; ++I) {
29345f7ddb14SDimitry Andric         for (Value *V : VL)
29355f7ddb14SDimitry Andric           VectorOperands[I].push_back(cast<Instruction>(V)->getOperand(I));
29365f7ddb14SDimitry Andric 
29375f7ddb14SDimitry Andric         TE->setOperand(I, VectorOperands[I]);
29385f7ddb14SDimitry Andric       }
29395f7ddb14SDimitry Andric       buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, 0});
29405f7ddb14SDimitry Andric       return;
29415f7ddb14SDimitry Andric     }
29420b57cec5SDimitry Andric     case Instruction::Load: {
29430b57cec5SDimitry Andric       // Check that a vectorized load would load the same memory as a scalar
29440b57cec5SDimitry Andric       // load. For example, we don't want to vectorize loads that are smaller
29450b57cec5SDimitry Andric       // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
29460b57cec5SDimitry Andric       // treats loading/storing it as an i8 struct. If we vectorize loads/stores
29470b57cec5SDimitry Andric       // from such a struct, we read/write packed bits disagreeing with the
29480b57cec5SDimitry Andric       // unvectorized version.
29490b57cec5SDimitry Andric       Type *ScalarTy = VL0->getType();
29500b57cec5SDimitry Andric 
29510b57cec5SDimitry Andric       if (DL->getTypeSizeInBits(ScalarTy) !=
29520b57cec5SDimitry Andric           DL->getTypeAllocSizeInBits(ScalarTy)) {
29530b57cec5SDimitry Andric         BS.cancelScheduling(VL, VL0);
29548bcb0991SDimitry Andric         newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29558bcb0991SDimitry Andric                      ReuseShuffleIndicies);
29560b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
29570b57cec5SDimitry Andric         return;
29580b57cec5SDimitry Andric       }
29590b57cec5SDimitry Andric 
29600b57cec5SDimitry Andric       // Make sure all loads in the bundle are simple - we can't vectorize
29610b57cec5SDimitry Andric       // atomic or volatile loads.
29620b57cec5SDimitry Andric       SmallVector<Value *, 4> PointerOps(VL.size());
29630b57cec5SDimitry Andric       auto POIter = PointerOps.begin();
29640b57cec5SDimitry Andric       for (Value *V : VL) {
29650b57cec5SDimitry Andric         auto *L = cast<LoadInst>(V);
29660b57cec5SDimitry Andric         if (!L->isSimple()) {
29670b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
29688bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29698bcb0991SDimitry Andric                        ReuseShuffleIndicies);
29700b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
29710b57cec5SDimitry Andric           return;
29720b57cec5SDimitry Andric         }
29730b57cec5SDimitry Andric         *POIter = L->getPointerOperand();
29740b57cec5SDimitry Andric         ++POIter;
29750b57cec5SDimitry Andric       }
29760b57cec5SDimitry Andric 
29770b57cec5SDimitry Andric       OrdersType CurrentOrder;
29780b57cec5SDimitry Andric       // Check the order of pointer operands.
29795f7ddb14SDimitry Andric       if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
29800b57cec5SDimitry Andric         Value *Ptr0;
29810b57cec5SDimitry Andric         Value *PtrN;
29820b57cec5SDimitry Andric         if (CurrentOrder.empty()) {
29830b57cec5SDimitry Andric           Ptr0 = PointerOps.front();
29840b57cec5SDimitry Andric           PtrN = PointerOps.back();
29850b57cec5SDimitry Andric         } else {
29860b57cec5SDimitry Andric           Ptr0 = PointerOps[CurrentOrder.front()];
29870b57cec5SDimitry Andric           PtrN = PointerOps[CurrentOrder.back()];
29880b57cec5SDimitry Andric         }
29895f7ddb14SDimitry Andric         Optional<int> Diff = getPointersDiff(
29905f7ddb14SDimitry Andric             ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
29910b57cec5SDimitry Andric         // Check that the sorted loads are consecutive.
29925f7ddb14SDimitry Andric         if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
29930b57cec5SDimitry Andric           if (CurrentOrder.empty()) {
29940b57cec5SDimitry Andric             // Original loads are consecutive and does not require reordering.
29950b57cec5SDimitry Andric             ++NumOpsWantToKeepOriginalOrder;
29968bcb0991SDimitry Andric             TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
29978bcb0991SDimitry Andric                                          UserTreeIdx, ReuseShuffleIndicies);
29988bcb0991SDimitry Andric             TE->setOperandsInOrder();
29990b57cec5SDimitry Andric             LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
30000b57cec5SDimitry Andric           } else {
30010b57cec5SDimitry Andric             // Need to reorder.
30028bcb0991SDimitry Andric             TreeEntry *TE =
30038bcb0991SDimitry Andric                 newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
3004af732203SDimitry Andric                              ReuseShuffleIndicies, CurrentOrder);
30058bcb0991SDimitry Andric             TE->setOperandsInOrder();
30060b57cec5SDimitry Andric             LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
3007af732203SDimitry Andric             findRootOrder(CurrentOrder);
3008af732203SDimitry Andric             ++NumOpsWantToKeepOrder[CurrentOrder];
30090b57cec5SDimitry Andric           }
30100b57cec5SDimitry Andric           return;
30110b57cec5SDimitry Andric         }
30125f7ddb14SDimitry Andric         Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
30135f7ddb14SDimitry Andric         for (Value *V : VL)
30145f7ddb14SDimitry Andric           CommonAlignment =
30155f7ddb14SDimitry Andric               commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
30165f7ddb14SDimitry Andric         if (TTI->isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
30175f7ddb14SDimitry Andric                                      CommonAlignment)) {
3018af732203SDimitry Andric           // Vectorizing non-consecutive loads with `llvm.masked.gather`.
30195f7ddb14SDimitry Andric           TreeEntry *TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle,
30205f7ddb14SDimitry Andric                                        S, UserTreeIdx, ReuseShuffleIndicies);
3021af732203SDimitry Andric           TE->setOperandsInOrder();
3022af732203SDimitry Andric           buildTree_rec(PointerOps, Depth + 1, {TE, 0});
30235f7ddb14SDimitry Andric           LLVM_DEBUG(dbgs()
30245f7ddb14SDimitry Andric                      << "SLP: added a vector of non-consecutive loads.\n");
3025af732203SDimitry Andric           return;
30260b57cec5SDimitry Andric         }
30275f7ddb14SDimitry Andric       }
30280b57cec5SDimitry Andric 
30290b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
30300b57cec5SDimitry Andric       BS.cancelScheduling(VL, VL0);
30318bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
30328bcb0991SDimitry Andric                    ReuseShuffleIndicies);
30330b57cec5SDimitry Andric       return;
30340b57cec5SDimitry Andric     }
30350b57cec5SDimitry Andric     case Instruction::ZExt:
30360b57cec5SDimitry Andric     case Instruction::SExt:
30370b57cec5SDimitry Andric     case Instruction::FPToUI:
30380b57cec5SDimitry Andric     case Instruction::FPToSI:
30390b57cec5SDimitry Andric     case Instruction::FPExt:
30400b57cec5SDimitry Andric     case Instruction::PtrToInt:
30410b57cec5SDimitry Andric     case Instruction::IntToPtr:
30420b57cec5SDimitry Andric     case Instruction::SIToFP:
30430b57cec5SDimitry Andric     case Instruction::UIToFP:
30440b57cec5SDimitry Andric     case Instruction::Trunc:
30450b57cec5SDimitry Andric     case Instruction::FPTrunc:
30460b57cec5SDimitry Andric     case Instruction::BitCast: {
30470b57cec5SDimitry Andric       Type *SrcTy = VL0->getOperand(0)->getType();
30488bcb0991SDimitry Andric       for (Value *V : VL) {
30498bcb0991SDimitry Andric         Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
30500b57cec5SDimitry Andric         if (Ty != SrcTy || !isValidElementType(Ty)) {
30510b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
30528bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
30538bcb0991SDimitry Andric                        ReuseShuffleIndicies);
30540b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
30550b57cec5SDimitry Andric                      << "SLP: Gathering casts with different src types.\n");
30560b57cec5SDimitry Andric           return;
30570b57cec5SDimitry Andric         }
30580b57cec5SDimitry Andric       }
30598bcb0991SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
30608bcb0991SDimitry Andric                                    ReuseShuffleIndicies);
30610b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
30620b57cec5SDimitry Andric 
30638bcb0991SDimitry Andric       TE->setOperandsInOrder();
30640b57cec5SDimitry Andric       for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
30650b57cec5SDimitry Andric         ValueList Operands;
30660b57cec5SDimitry Andric         // Prepare the operand vector.
30678bcb0991SDimitry Andric         for (Value *V : VL)
30688bcb0991SDimitry Andric           Operands.push_back(cast<Instruction>(V)->getOperand(i));
30690b57cec5SDimitry Andric 
30700b57cec5SDimitry Andric         buildTree_rec(Operands, Depth + 1, {TE, i});
30710b57cec5SDimitry Andric       }
30720b57cec5SDimitry Andric       return;
30730b57cec5SDimitry Andric     }
30740b57cec5SDimitry Andric     case Instruction::ICmp:
30750b57cec5SDimitry Andric     case Instruction::FCmp: {
30760b57cec5SDimitry Andric       // Check that all of the compares have the same predicate.
30770b57cec5SDimitry Andric       CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
30780b57cec5SDimitry Andric       CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
30790b57cec5SDimitry Andric       Type *ComparedTy = VL0->getOperand(0)->getType();
30808bcb0991SDimitry Andric       for (Value *V : VL) {
30818bcb0991SDimitry Andric         CmpInst *Cmp = cast<CmpInst>(V);
30820b57cec5SDimitry Andric         if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
30830b57cec5SDimitry Andric             Cmp->getOperand(0)->getType() != ComparedTy) {
30840b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
30858bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
30868bcb0991SDimitry Andric                        ReuseShuffleIndicies);
30870b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
30880b57cec5SDimitry Andric                      << "SLP: Gathering cmp with different predicate.\n");
30890b57cec5SDimitry Andric           return;
30900b57cec5SDimitry Andric         }
30910b57cec5SDimitry Andric       }
30920b57cec5SDimitry Andric 
30938bcb0991SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
30948bcb0991SDimitry Andric                                    ReuseShuffleIndicies);
30950b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
30960b57cec5SDimitry Andric 
30970b57cec5SDimitry Andric       ValueList Left, Right;
30980b57cec5SDimitry Andric       if (cast<CmpInst>(VL0)->isCommutative()) {
30990b57cec5SDimitry Andric         // Commutative predicate - collect + sort operands of the instructions
31000b57cec5SDimitry Andric         // so that each side is more likely to have the same opcode.
31010b57cec5SDimitry Andric         assert(P0 == SwapP0 && "Commutative Predicate mismatch");
3102480093f4SDimitry Andric         reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
31030b57cec5SDimitry Andric       } else {
31040b57cec5SDimitry Andric         // Collect operands - commute if it uses the swapped predicate.
31050b57cec5SDimitry Andric         for (Value *V : VL) {
31060b57cec5SDimitry Andric           auto *Cmp = cast<CmpInst>(V);
31070b57cec5SDimitry Andric           Value *LHS = Cmp->getOperand(0);
31080b57cec5SDimitry Andric           Value *RHS = Cmp->getOperand(1);
31090b57cec5SDimitry Andric           if (Cmp->getPredicate() != P0)
31100b57cec5SDimitry Andric             std::swap(LHS, RHS);
31110b57cec5SDimitry Andric           Left.push_back(LHS);
31120b57cec5SDimitry Andric           Right.push_back(RHS);
31130b57cec5SDimitry Andric         }
31140b57cec5SDimitry Andric       }
31158bcb0991SDimitry Andric       TE->setOperand(0, Left);
31168bcb0991SDimitry Andric       TE->setOperand(1, Right);
31170b57cec5SDimitry Andric       buildTree_rec(Left, Depth + 1, {TE, 0});
31180b57cec5SDimitry Andric       buildTree_rec(Right, Depth + 1, {TE, 1});
31190b57cec5SDimitry Andric       return;
31200b57cec5SDimitry Andric     }
31210b57cec5SDimitry Andric     case Instruction::Select:
31220b57cec5SDimitry Andric     case Instruction::FNeg:
31230b57cec5SDimitry Andric     case Instruction::Add:
31240b57cec5SDimitry Andric     case Instruction::FAdd:
31250b57cec5SDimitry Andric     case Instruction::Sub:
31260b57cec5SDimitry Andric     case Instruction::FSub:
31270b57cec5SDimitry Andric     case Instruction::Mul:
31280b57cec5SDimitry Andric     case Instruction::FMul:
31290b57cec5SDimitry Andric     case Instruction::UDiv:
31300b57cec5SDimitry Andric     case Instruction::SDiv:
31310b57cec5SDimitry Andric     case Instruction::FDiv:
31320b57cec5SDimitry Andric     case Instruction::URem:
31330b57cec5SDimitry Andric     case Instruction::SRem:
31340b57cec5SDimitry Andric     case Instruction::FRem:
31350b57cec5SDimitry Andric     case Instruction::Shl:
31360b57cec5SDimitry Andric     case Instruction::LShr:
31370b57cec5SDimitry Andric     case Instruction::AShr:
31380b57cec5SDimitry Andric     case Instruction::And:
31390b57cec5SDimitry Andric     case Instruction::Or:
31400b57cec5SDimitry Andric     case Instruction::Xor: {
31418bcb0991SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
31428bcb0991SDimitry Andric                                    ReuseShuffleIndicies);
31430b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
31440b57cec5SDimitry Andric 
31450b57cec5SDimitry Andric       // Sort operands of the instructions so that each side is more likely to
31460b57cec5SDimitry Andric       // have the same opcode.
31470b57cec5SDimitry Andric       if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
31480b57cec5SDimitry Andric         ValueList Left, Right;
3149480093f4SDimitry Andric         reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
31508bcb0991SDimitry Andric         TE->setOperand(0, Left);
31518bcb0991SDimitry Andric         TE->setOperand(1, Right);
31520b57cec5SDimitry Andric         buildTree_rec(Left, Depth + 1, {TE, 0});
31530b57cec5SDimitry Andric         buildTree_rec(Right, Depth + 1, {TE, 1});
31540b57cec5SDimitry Andric         return;
31550b57cec5SDimitry Andric       }
31560b57cec5SDimitry Andric 
31578bcb0991SDimitry Andric       TE->setOperandsInOrder();
31580b57cec5SDimitry Andric       for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
31590b57cec5SDimitry Andric         ValueList Operands;
31600b57cec5SDimitry Andric         // Prepare the operand vector.
3161af732203SDimitry Andric         for (Value *V : VL)
3162af732203SDimitry Andric           Operands.push_back(cast<Instruction>(V)->getOperand(i));
31630b57cec5SDimitry Andric 
31640b57cec5SDimitry Andric         buildTree_rec(Operands, Depth + 1, {TE, i});
31650b57cec5SDimitry Andric       }
31660b57cec5SDimitry Andric       return;
31670b57cec5SDimitry Andric     }
31680b57cec5SDimitry Andric     case Instruction::GetElementPtr: {
31690b57cec5SDimitry Andric       // We don't combine GEPs with complicated (nested) indexing.
31708bcb0991SDimitry Andric       for (Value *V : VL) {
31718bcb0991SDimitry Andric         if (cast<Instruction>(V)->getNumOperands() != 2) {
31720b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
31730b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
31748bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
31758bcb0991SDimitry Andric                        ReuseShuffleIndicies);
31760b57cec5SDimitry Andric           return;
31770b57cec5SDimitry Andric         }
31780b57cec5SDimitry Andric       }
31790b57cec5SDimitry Andric 
31800b57cec5SDimitry Andric       // We can't combine several GEPs into one vector if they operate on
31810b57cec5SDimitry Andric       // different types.
31820b57cec5SDimitry Andric       Type *Ty0 = VL0->getOperand(0)->getType();
31838bcb0991SDimitry Andric       for (Value *V : VL) {
31848bcb0991SDimitry Andric         Type *CurTy = cast<Instruction>(V)->getOperand(0)->getType();
31850b57cec5SDimitry Andric         if (Ty0 != CurTy) {
31860b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
31870b57cec5SDimitry Andric                      << "SLP: not-vectorizable GEP (different types).\n");
31880b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
31898bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
31908bcb0991SDimitry Andric                        ReuseShuffleIndicies);
31910b57cec5SDimitry Andric           return;
31920b57cec5SDimitry Andric         }
31930b57cec5SDimitry Andric       }
31940b57cec5SDimitry Andric 
31950b57cec5SDimitry Andric       // We don't combine GEPs with non-constant indexes.
3196480093f4SDimitry Andric       Type *Ty1 = VL0->getOperand(1)->getType();
31978bcb0991SDimitry Andric       for (Value *V : VL) {
31988bcb0991SDimitry Andric         auto Op = cast<Instruction>(V)->getOperand(1);
3199480093f4SDimitry Andric         if (!isa<ConstantInt>(Op) ||
3200480093f4SDimitry Andric             (Op->getType() != Ty1 &&
3201480093f4SDimitry Andric              Op->getType()->getScalarSizeInBits() >
3202480093f4SDimitry Andric                  DL->getIndexSizeInBits(
3203480093f4SDimitry Andric                      V->getType()->getPointerAddressSpace()))) {
32040b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
32050b57cec5SDimitry Andric                      << "SLP: not-vectorizable GEP (non-constant indexes).\n");
32060b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
32078bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
32088bcb0991SDimitry Andric                        ReuseShuffleIndicies);
32090b57cec5SDimitry Andric           return;
32100b57cec5SDimitry Andric         }
32110b57cec5SDimitry Andric       }
32120b57cec5SDimitry Andric 
32138bcb0991SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
32148bcb0991SDimitry Andric                                    ReuseShuffleIndicies);
32150b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
32168bcb0991SDimitry Andric       TE->setOperandsInOrder();
32170b57cec5SDimitry Andric       for (unsigned i = 0, e = 2; i < e; ++i) {
32180b57cec5SDimitry Andric         ValueList Operands;
32190b57cec5SDimitry Andric         // Prepare the operand vector.
32208bcb0991SDimitry Andric         for (Value *V : VL)
32218bcb0991SDimitry Andric           Operands.push_back(cast<Instruction>(V)->getOperand(i));
32220b57cec5SDimitry Andric 
32230b57cec5SDimitry Andric         buildTree_rec(Operands, Depth + 1, {TE, i});
32240b57cec5SDimitry Andric       }
32250b57cec5SDimitry Andric       return;
32260b57cec5SDimitry Andric     }
32270b57cec5SDimitry Andric     case Instruction::Store: {
32288bcb0991SDimitry Andric       // Check if the stores are consecutive or if we need to swizzle them.
3229480093f4SDimitry Andric       llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
3230af732203SDimitry Andric       // Avoid types that are padded when being allocated as scalars, while
3231af732203SDimitry Andric       // being packed together in a vector (such as i1).
3232af732203SDimitry Andric       if (DL->getTypeSizeInBits(ScalarTy) !=
3233af732203SDimitry Andric           DL->getTypeAllocSizeInBits(ScalarTy)) {
3234af732203SDimitry Andric         BS.cancelScheduling(VL, VL0);
3235af732203SDimitry Andric         newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
3236af732203SDimitry Andric                      ReuseShuffleIndicies);
3237af732203SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
3238af732203SDimitry Andric         return;
3239af732203SDimitry Andric       }
3240480093f4SDimitry Andric       // Make sure all stores in the bundle are simple - we can't vectorize
3241480093f4SDimitry Andric       // atomic or volatile stores.
3242480093f4SDimitry Andric       SmallVector<Value *, 4> PointerOps(VL.size());
3243480093f4SDimitry Andric       ValueList Operands(VL.size());
3244480093f4SDimitry Andric       auto POIter = PointerOps.begin();
3245480093f4SDimitry Andric       auto OIter = Operands.begin();
3246480093f4SDimitry Andric       for (Value *V : VL) {
3247480093f4SDimitry Andric         auto *SI = cast<StoreInst>(V);
3248480093f4SDimitry Andric         if (!SI->isSimple()) {
3249480093f4SDimitry Andric           BS.cancelScheduling(VL, VL0);
3250480093f4SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
3251480093f4SDimitry Andric                        ReuseShuffleIndicies);
3252480093f4SDimitry Andric           LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
3253480093f4SDimitry Andric           return;
3254480093f4SDimitry Andric         }
3255480093f4SDimitry Andric         *POIter = SI->getPointerOperand();
3256480093f4SDimitry Andric         *OIter = SI->getValueOperand();
3257480093f4SDimitry Andric         ++POIter;
3258480093f4SDimitry Andric         ++OIter;
3259480093f4SDimitry Andric       }
3260480093f4SDimitry Andric 
3261480093f4SDimitry Andric       OrdersType CurrentOrder;
3262480093f4SDimitry Andric       // Check the order of pointer operands.
32635f7ddb14SDimitry Andric       if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
3264480093f4SDimitry Andric         Value *Ptr0;
3265480093f4SDimitry Andric         Value *PtrN;
3266480093f4SDimitry Andric         if (CurrentOrder.empty()) {
3267480093f4SDimitry Andric           Ptr0 = PointerOps.front();
3268480093f4SDimitry Andric           PtrN = PointerOps.back();
3269480093f4SDimitry Andric         } else {
3270480093f4SDimitry Andric           Ptr0 = PointerOps[CurrentOrder.front()];
3271480093f4SDimitry Andric           PtrN = PointerOps[CurrentOrder.back()];
3272480093f4SDimitry Andric         }
32735f7ddb14SDimitry Andric         Optional<int> Dist =
32745f7ddb14SDimitry Andric             getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
3275480093f4SDimitry Andric         // Check that the sorted pointer operands are consecutive.
32765f7ddb14SDimitry Andric         if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
3277480093f4SDimitry Andric           if (CurrentOrder.empty()) {
3278480093f4SDimitry Andric             // Original stores are consecutive and does not require reordering.
3279480093f4SDimitry Andric             ++NumOpsWantToKeepOriginalOrder;
3280480093f4SDimitry Andric             TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
3281480093f4SDimitry Andric                                          UserTreeIdx, ReuseShuffleIndicies);
3282480093f4SDimitry Andric             TE->setOperandsInOrder();
3283480093f4SDimitry Andric             buildTree_rec(Operands, Depth + 1, {TE, 0});
3284480093f4SDimitry Andric             LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
3285480093f4SDimitry Andric           } else {
3286480093f4SDimitry Andric             TreeEntry *TE =
3287480093f4SDimitry Andric                 newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
3288af732203SDimitry Andric                              ReuseShuffleIndicies, CurrentOrder);
3289480093f4SDimitry Andric             TE->setOperandsInOrder();
3290480093f4SDimitry Andric             buildTree_rec(Operands, Depth + 1, {TE, 0});
3291480093f4SDimitry Andric             LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
3292af732203SDimitry Andric             findRootOrder(CurrentOrder);
3293af732203SDimitry Andric             ++NumOpsWantToKeepOrder[CurrentOrder];
3294480093f4SDimitry Andric           }
3295480093f4SDimitry Andric           return;
3296480093f4SDimitry Andric         }
3297480093f4SDimitry Andric       }
3298480093f4SDimitry Andric 
32990b57cec5SDimitry Andric       BS.cancelScheduling(VL, VL0);
33008bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33018bcb0991SDimitry Andric                    ReuseShuffleIndicies);
33020b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
33030b57cec5SDimitry Andric       return;
33040b57cec5SDimitry Andric     }
33050b57cec5SDimitry Andric     case Instruction::Call: {
33065ffd83dbSDimitry Andric       // Check if the calls are all to the same vectorizable intrinsic or
33075ffd83dbSDimitry Andric       // library function.
33080b57cec5SDimitry Andric       CallInst *CI = cast<CallInst>(VL0);
33090b57cec5SDimitry Andric       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
33105ffd83dbSDimitry Andric 
33115ffd83dbSDimitry Andric       VFShape Shape = VFShape::get(
3312af732203SDimitry Andric           *CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
33135ffd83dbSDimitry Andric           false /*HasGlobalPred*/);
33145ffd83dbSDimitry Andric       Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
33155ffd83dbSDimitry Andric 
33165ffd83dbSDimitry Andric       if (!VecFunc && !isTriviallyVectorizable(ID)) {
33170b57cec5SDimitry Andric         BS.cancelScheduling(VL, VL0);
33188bcb0991SDimitry Andric         newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33198bcb0991SDimitry Andric                      ReuseShuffleIndicies);
33200b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
33210b57cec5SDimitry Andric         return;
33220b57cec5SDimitry Andric       }
33235ffd83dbSDimitry Andric       Function *F = CI->getCalledFunction();
33240b57cec5SDimitry Andric       unsigned NumArgs = CI->getNumArgOperands();
33250b57cec5SDimitry Andric       SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
33260b57cec5SDimitry Andric       for (unsigned j = 0; j != NumArgs; ++j)
33270b57cec5SDimitry Andric         if (hasVectorInstrinsicScalarOpd(ID, j))
33280b57cec5SDimitry Andric           ScalarArgs[j] = CI->getArgOperand(j);
33298bcb0991SDimitry Andric       for (Value *V : VL) {
33308bcb0991SDimitry Andric         CallInst *CI2 = dyn_cast<CallInst>(V);
33315ffd83dbSDimitry Andric         if (!CI2 || CI2->getCalledFunction() != F ||
33320b57cec5SDimitry Andric             getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
33335ffd83dbSDimitry Andric             (VecFunc &&
33345ffd83dbSDimitry Andric              VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
33350b57cec5SDimitry Andric             !CI->hasIdenticalOperandBundleSchema(*CI2)) {
33360b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
33378bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33388bcb0991SDimitry Andric                        ReuseShuffleIndicies);
33398bcb0991SDimitry Andric           LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V
33400b57cec5SDimitry Andric                             << "\n");
33410b57cec5SDimitry Andric           return;
33420b57cec5SDimitry Andric         }
33430b57cec5SDimitry Andric         // Some intrinsics have scalar arguments and should be same in order for
33440b57cec5SDimitry Andric         // them to be vectorized.
33450b57cec5SDimitry Andric         for (unsigned j = 0; j != NumArgs; ++j) {
33460b57cec5SDimitry Andric           if (hasVectorInstrinsicScalarOpd(ID, j)) {
33470b57cec5SDimitry Andric             Value *A1J = CI2->getArgOperand(j);
33480b57cec5SDimitry Andric             if (ScalarArgs[j] != A1J) {
33490b57cec5SDimitry Andric               BS.cancelScheduling(VL, VL0);
33508bcb0991SDimitry Andric               newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33518bcb0991SDimitry Andric                            ReuseShuffleIndicies);
33520b57cec5SDimitry Andric               LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
33530b57cec5SDimitry Andric                                 << " argument " << ScalarArgs[j] << "!=" << A1J
33540b57cec5SDimitry Andric                                 << "\n");
33550b57cec5SDimitry Andric               return;
33560b57cec5SDimitry Andric             }
33570b57cec5SDimitry Andric           }
33580b57cec5SDimitry Andric         }
33590b57cec5SDimitry Andric         // Verify that the bundle operands are identical between the two calls.
33600b57cec5SDimitry Andric         if (CI->hasOperandBundles() &&
33610b57cec5SDimitry Andric             !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
33620b57cec5SDimitry Andric                         CI->op_begin() + CI->getBundleOperandsEndIndex(),
33630b57cec5SDimitry Andric                         CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
33640b57cec5SDimitry Andric           BS.cancelScheduling(VL, VL0);
33658bcb0991SDimitry Andric           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33668bcb0991SDimitry Andric                        ReuseShuffleIndicies);
33670b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
33688bcb0991SDimitry Andric                             << *CI << "!=" << *V << '\n');
33690b57cec5SDimitry Andric           return;
33700b57cec5SDimitry Andric         }
33710b57cec5SDimitry Andric       }
33720b57cec5SDimitry Andric 
33738bcb0991SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
33748bcb0991SDimitry Andric                                    ReuseShuffleIndicies);
33758bcb0991SDimitry Andric       TE->setOperandsInOrder();
33760b57cec5SDimitry Andric       for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
33770b57cec5SDimitry Andric         ValueList Operands;
33780b57cec5SDimitry Andric         // Prepare the operand vector.
33798bcb0991SDimitry Andric         for (Value *V : VL) {
33808bcb0991SDimitry Andric           auto *CI2 = cast<CallInst>(V);
33810b57cec5SDimitry Andric           Operands.push_back(CI2->getArgOperand(i));
33820b57cec5SDimitry Andric         }
33830b57cec5SDimitry Andric         buildTree_rec(Operands, Depth + 1, {TE, i});
33840b57cec5SDimitry Andric       }
33850b57cec5SDimitry Andric       return;
33860b57cec5SDimitry Andric     }
33870b57cec5SDimitry Andric     case Instruction::ShuffleVector: {
33880b57cec5SDimitry Andric       // If this is not an alternate sequence of opcode like add-sub
33890b57cec5SDimitry Andric       // then do not vectorize this instruction.
33900b57cec5SDimitry Andric       if (!S.isAltShuffle()) {
33910b57cec5SDimitry Andric         BS.cancelScheduling(VL, VL0);
33928bcb0991SDimitry Andric         newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33938bcb0991SDimitry Andric                      ReuseShuffleIndicies);
33940b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
33950b57cec5SDimitry Andric         return;
33960b57cec5SDimitry Andric       }
33978bcb0991SDimitry Andric       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
33988bcb0991SDimitry Andric                                    ReuseShuffleIndicies);
33990b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
34000b57cec5SDimitry Andric 
34010b57cec5SDimitry Andric       // Reorder operands if reordering would enable vectorization.
34020b57cec5SDimitry Andric       if (isa<BinaryOperator>(VL0)) {
34030b57cec5SDimitry Andric         ValueList Left, Right;
3404480093f4SDimitry Andric         reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
34058bcb0991SDimitry Andric         TE->setOperand(0, Left);
34068bcb0991SDimitry Andric         TE->setOperand(1, Right);
34070b57cec5SDimitry Andric         buildTree_rec(Left, Depth + 1, {TE, 0});
34080b57cec5SDimitry Andric         buildTree_rec(Right, Depth + 1, {TE, 1});
34090b57cec5SDimitry Andric         return;
34100b57cec5SDimitry Andric       }
34110b57cec5SDimitry Andric 
34128bcb0991SDimitry Andric       TE->setOperandsInOrder();
34130b57cec5SDimitry Andric       for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
34140b57cec5SDimitry Andric         ValueList Operands;
34150b57cec5SDimitry Andric         // Prepare the operand vector.
34168bcb0991SDimitry Andric         for (Value *V : VL)
34178bcb0991SDimitry Andric           Operands.push_back(cast<Instruction>(V)->getOperand(i));
34180b57cec5SDimitry Andric 
34190b57cec5SDimitry Andric         buildTree_rec(Operands, Depth + 1, {TE, i});
34200b57cec5SDimitry Andric       }
34210b57cec5SDimitry Andric       return;
34220b57cec5SDimitry Andric     }
34230b57cec5SDimitry Andric     default:
34240b57cec5SDimitry Andric       BS.cancelScheduling(VL, VL0);
34258bcb0991SDimitry Andric       newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
34268bcb0991SDimitry Andric                    ReuseShuffleIndicies);
34270b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
34280b57cec5SDimitry Andric       return;
34290b57cec5SDimitry Andric   }
34300b57cec5SDimitry Andric }
34310b57cec5SDimitry Andric 
canMapToVector(Type * T,const DataLayout & DL) const34320b57cec5SDimitry Andric unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
3433480093f4SDimitry Andric   unsigned N = 1;
3434480093f4SDimitry Andric   Type *EltTy = T;
3435480093f4SDimitry Andric 
34365ffd83dbSDimitry Andric   while (isa<StructType>(EltTy) || isa<ArrayType>(EltTy) ||
34375ffd83dbSDimitry Andric          isa<VectorType>(EltTy)) {
3438480093f4SDimitry Andric     if (auto *ST = dyn_cast<StructType>(EltTy)) {
3439480093f4SDimitry Andric       // Check that struct is homogeneous.
3440480093f4SDimitry Andric       for (const auto *Ty : ST->elements())
3441480093f4SDimitry Andric         if (Ty != *ST->element_begin())
3442480093f4SDimitry Andric           return 0;
3443480093f4SDimitry Andric       N *= ST->getNumElements();
34440b57cec5SDimitry Andric       EltTy = *ST->element_begin();
34455ffd83dbSDimitry Andric     } else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {
34465ffd83dbSDimitry Andric       N *= AT->getNumElements();
34475ffd83dbSDimitry Andric       EltTy = AT->getElementType();
34480b57cec5SDimitry Andric     } else {
3449af732203SDimitry Andric       auto *VT = cast<FixedVectorType>(EltTy);
34505ffd83dbSDimitry Andric       N *= VT->getNumElements();
34515ffd83dbSDimitry Andric       EltTy = VT->getElementType();
34520b57cec5SDimitry Andric     }
3453480093f4SDimitry Andric   }
3454480093f4SDimitry Andric 
34550b57cec5SDimitry Andric   if (!isValidElementType(EltTy))
34560b57cec5SDimitry Andric     return 0;
34575ffd83dbSDimitry Andric   uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
34580b57cec5SDimitry Andric   if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
34590b57cec5SDimitry Andric     return 0;
34600b57cec5SDimitry Andric   return N;
34610b57cec5SDimitry Andric }
34620b57cec5SDimitry Andric 
canReuseExtract(ArrayRef<Value * > VL,Value * OpValue,SmallVectorImpl<unsigned> & CurrentOrder) const34630b57cec5SDimitry Andric bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
34640b57cec5SDimitry Andric                               SmallVectorImpl<unsigned> &CurrentOrder) const {
34650b57cec5SDimitry Andric   Instruction *E0 = cast<Instruction>(OpValue);
34660b57cec5SDimitry Andric   assert(E0->getOpcode() == Instruction::ExtractElement ||
34670b57cec5SDimitry Andric          E0->getOpcode() == Instruction::ExtractValue);
34680b57cec5SDimitry Andric   assert(E0->getOpcode() == getSameOpcode(VL).getOpcode() && "Invalid opcode");
34690b57cec5SDimitry Andric   // Check if all of the extracts come from the same vector and from the
34700b57cec5SDimitry Andric   // correct offset.
34710b57cec5SDimitry Andric   Value *Vec = E0->getOperand(0);
34720b57cec5SDimitry Andric 
34730b57cec5SDimitry Andric   CurrentOrder.clear();
34740b57cec5SDimitry Andric 
34750b57cec5SDimitry Andric   // We have to extract from a vector/aggregate with the same number of elements.
34760b57cec5SDimitry Andric   unsigned NElts;
34770b57cec5SDimitry Andric   if (E0->getOpcode() == Instruction::ExtractValue) {
34780b57cec5SDimitry Andric     const DataLayout &DL = E0->getModule()->getDataLayout();
34790b57cec5SDimitry Andric     NElts = canMapToVector(Vec->getType(), DL);
34800b57cec5SDimitry Andric     if (!NElts)
34810b57cec5SDimitry Andric       return false;
34820b57cec5SDimitry Andric     // Check if load can be rewritten as load of vector.
34830b57cec5SDimitry Andric     LoadInst *LI = dyn_cast<LoadInst>(Vec);
34840b57cec5SDimitry Andric     if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size()))
34850b57cec5SDimitry Andric       return false;
34860b57cec5SDimitry Andric   } else {
3487af732203SDimitry Andric     NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
34880b57cec5SDimitry Andric   }
34890b57cec5SDimitry Andric 
34900b57cec5SDimitry Andric   if (NElts != VL.size())
34910b57cec5SDimitry Andric     return false;
34920b57cec5SDimitry Andric 
34930b57cec5SDimitry Andric   // Check that all of the indices extract from the correct offset.
34940b57cec5SDimitry Andric   bool ShouldKeepOrder = true;
34950b57cec5SDimitry Andric   unsigned E = VL.size();
34960b57cec5SDimitry Andric   // Assign to all items the initial value E + 1 so we can check if the extract
34970b57cec5SDimitry Andric   // instruction index was used already.
34980b57cec5SDimitry Andric   // Also, later we can check that all the indices are used and we have a
34990b57cec5SDimitry Andric   // consecutive access in the extract instructions, by checking that no
35000b57cec5SDimitry Andric   // element of CurrentOrder still has value E + 1.
35010b57cec5SDimitry Andric   CurrentOrder.assign(E, E + 1);
35020b57cec5SDimitry Andric   unsigned I = 0;
35030b57cec5SDimitry Andric   for (; I < E; ++I) {
35040b57cec5SDimitry Andric     auto *Inst = cast<Instruction>(VL[I]);
35050b57cec5SDimitry Andric     if (Inst->getOperand(0) != Vec)
35060b57cec5SDimitry Andric       break;
35070b57cec5SDimitry Andric     Optional<unsigned> Idx = getExtractIndex(Inst);
35080b57cec5SDimitry Andric     if (!Idx)
35090b57cec5SDimitry Andric       break;
35100b57cec5SDimitry Andric     const unsigned ExtIdx = *Idx;
35110b57cec5SDimitry Andric     if (ExtIdx != I) {
35120b57cec5SDimitry Andric       if (ExtIdx >= E || CurrentOrder[ExtIdx] != E + 1)
35130b57cec5SDimitry Andric         break;
35140b57cec5SDimitry Andric       ShouldKeepOrder = false;
35150b57cec5SDimitry Andric       CurrentOrder[ExtIdx] = I;
35160b57cec5SDimitry Andric     } else {
35170b57cec5SDimitry Andric       if (CurrentOrder[I] != E + 1)
35180b57cec5SDimitry Andric         break;
35190b57cec5SDimitry Andric       CurrentOrder[I] = I;
35200b57cec5SDimitry Andric     }
35210b57cec5SDimitry Andric   }
35220b57cec5SDimitry Andric   if (I < E) {
35230b57cec5SDimitry Andric     CurrentOrder.clear();
35240b57cec5SDimitry Andric     return false;
35250b57cec5SDimitry Andric   }
35260b57cec5SDimitry Andric 
35270b57cec5SDimitry Andric   return ShouldKeepOrder;
35280b57cec5SDimitry Andric }
35290b57cec5SDimitry Andric 
areAllUsersVectorized(Instruction * I,ArrayRef<Value * > VectorizedVals) const35305f7ddb14SDimitry Andric bool BoUpSLP::areAllUsersVectorized(Instruction *I,
35315f7ddb14SDimitry Andric                                     ArrayRef<Value *> VectorizedVals) const {
35325f7ddb14SDimitry Andric   return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
35335f7ddb14SDimitry Andric          llvm::all_of(I->users(), [this](User *U) {
35340b57cec5SDimitry Andric            return ScalarToTreeEntry.count(U) > 0;
35350b57cec5SDimitry Andric          });
35360b57cec5SDimitry Andric }
35370b57cec5SDimitry Andric 
3538af732203SDimitry Andric static std::pair<InstructionCost, InstructionCost>
getVectorCallCosts(CallInst * CI,FixedVectorType * VecTy,TargetTransformInfo * TTI,TargetLibraryInfo * TLI)3539af732203SDimitry Andric getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
3540af732203SDimitry Andric                    TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
35415ffd83dbSDimitry Andric   Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
35425ffd83dbSDimitry Andric 
35435ffd83dbSDimitry Andric   // Calculate the cost of the scalar and vector calls.
35445f7ddb14SDimitry Andric   SmallVector<Type *, 4> VecTys;
35455f7ddb14SDimitry Andric   for (Use &Arg : CI->args())
35465f7ddb14SDimitry Andric     VecTys.push_back(
35475f7ddb14SDimitry Andric         FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
35485f7ddb14SDimitry Andric   FastMathFlags FMF;
35495f7ddb14SDimitry Andric   if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
35505f7ddb14SDimitry Andric     FMF = FPCI->getFastMathFlags();
35515f7ddb14SDimitry Andric   SmallVector<const Value *> Arguments(CI->arg_begin(), CI->arg_end());
35525f7ddb14SDimitry Andric   IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
35535f7ddb14SDimitry Andric                                     dyn_cast<IntrinsicInst>(CI));
3554af732203SDimitry Andric   auto IntrinsicCost =
35555ffd83dbSDimitry Andric     TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
35565ffd83dbSDimitry Andric 
3557af732203SDimitry Andric   auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
3558af732203SDimitry Andric                                      VecTy->getNumElements())),
35595ffd83dbSDimitry Andric                             false /*HasGlobalPred*/);
35605ffd83dbSDimitry Andric   Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
3561af732203SDimitry Andric   auto LibCost = IntrinsicCost;
35625ffd83dbSDimitry Andric   if (!CI->isNoBuiltin() && VecFunc) {
35635ffd83dbSDimitry Andric     // Calculate the cost of the vector library call.
35645ffd83dbSDimitry Andric     // If the corresponding vector call is cheaper, return its cost.
35655ffd83dbSDimitry Andric     LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
35665ffd83dbSDimitry Andric                                     TTI::TCK_RecipThroughput);
35675ffd83dbSDimitry Andric   }
35685ffd83dbSDimitry Andric   return {IntrinsicCost, LibCost};
35695ffd83dbSDimitry Andric }
35705ffd83dbSDimitry Andric 
35715f7ddb14SDimitry Andric /// Compute the cost of creating a vector of type \p VecTy containing the
35725f7ddb14SDimitry Andric /// extracted values from \p VL.
35735f7ddb14SDimitry Andric static InstructionCost
computeExtractCost(ArrayRef<Value * > VL,FixedVectorType * VecTy,TargetTransformInfo::ShuffleKind ShuffleKind,ArrayRef<int> Mask,TargetTransformInfo & TTI)35745f7ddb14SDimitry Andric computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
35755f7ddb14SDimitry Andric                    TargetTransformInfo::ShuffleKind ShuffleKind,
35765f7ddb14SDimitry Andric                    ArrayRef<int> Mask, TargetTransformInfo &TTI) {
35775f7ddb14SDimitry Andric   unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
35785f7ddb14SDimitry Andric 
35795f7ddb14SDimitry Andric   if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || !NumOfParts ||
35805f7ddb14SDimitry Andric       VecTy->getNumElements() < NumOfParts)
35815f7ddb14SDimitry Andric     return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
35825f7ddb14SDimitry Andric 
35835f7ddb14SDimitry Andric   bool AllConsecutive = true;
35845f7ddb14SDimitry Andric   unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
35855f7ddb14SDimitry Andric   unsigned Idx = -1;
35865f7ddb14SDimitry Andric   InstructionCost Cost = 0;
35875f7ddb14SDimitry Andric 
35885f7ddb14SDimitry Andric   // Process extracts in blocks of EltsPerVector to check if the source vector
35895f7ddb14SDimitry Andric   // operand can be re-used directly. If not, add the cost of creating a shuffle
35905f7ddb14SDimitry Andric   // to extract the values into a vector register.
35915f7ddb14SDimitry Andric   for (auto *V : VL) {
35925f7ddb14SDimitry Andric     ++Idx;
35935f7ddb14SDimitry Andric 
35945f7ddb14SDimitry Andric     // Reached the start of a new vector registers.
35955f7ddb14SDimitry Andric     if (Idx % EltsPerVector == 0) {
35965f7ddb14SDimitry Andric       AllConsecutive = true;
35975f7ddb14SDimitry Andric       continue;
35985f7ddb14SDimitry Andric     }
35995f7ddb14SDimitry Andric 
36005f7ddb14SDimitry Andric     // Check all extracts for a vector register on the target directly
36015f7ddb14SDimitry Andric     // extract values in order.
36025f7ddb14SDimitry Andric     unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
36035f7ddb14SDimitry Andric     unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
36045f7ddb14SDimitry Andric     AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
36055f7ddb14SDimitry Andric                       CurrentIdx % EltsPerVector == Idx % EltsPerVector;
36065f7ddb14SDimitry Andric 
36075f7ddb14SDimitry Andric     if (AllConsecutive)
36085f7ddb14SDimitry Andric       continue;
36095f7ddb14SDimitry Andric 
36105f7ddb14SDimitry Andric     // Skip all indices, except for the last index per vector block.
36115f7ddb14SDimitry Andric     if ((Idx + 1) % EltsPerVector != 0 && Idx + 1 != VL.size())
36125f7ddb14SDimitry Andric       continue;
36135f7ddb14SDimitry Andric 
36145f7ddb14SDimitry Andric     // If we have a series of extracts which are not consecutive and hence
36155f7ddb14SDimitry Andric     // cannot re-use the source vector register directly, compute the shuffle
36165f7ddb14SDimitry Andric     // cost to extract the a vector with EltsPerVector elements.
36175f7ddb14SDimitry Andric     Cost += TTI.getShuffleCost(
36185f7ddb14SDimitry Andric         TargetTransformInfo::SK_PermuteSingleSrc,
36195f7ddb14SDimitry Andric         FixedVectorType::get(VecTy->getElementType(), EltsPerVector));
36205f7ddb14SDimitry Andric   }
36215f7ddb14SDimitry Andric   return Cost;
36225f7ddb14SDimitry Andric }
36235f7ddb14SDimitry Andric 
36245f7ddb14SDimitry Andric /// Shuffles \p Mask in accordance with the given \p SubMask.
addMask(SmallVectorImpl<int> & Mask,ArrayRef<int> SubMask)36255f7ddb14SDimitry Andric static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
36265f7ddb14SDimitry Andric   if (SubMask.empty())
36275f7ddb14SDimitry Andric     return;
36285f7ddb14SDimitry Andric   if (Mask.empty()) {
36295f7ddb14SDimitry Andric     Mask.append(SubMask.begin(), SubMask.end());
36305f7ddb14SDimitry Andric     return;
36315f7ddb14SDimitry Andric   }
36325f7ddb14SDimitry Andric   SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
36335f7ddb14SDimitry Andric   int TermValue = std::min(Mask.size(), SubMask.size());
36345f7ddb14SDimitry Andric   for (int I = 0, E = SubMask.size(); I < E; ++I) {
36355f7ddb14SDimitry Andric     if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
36365f7ddb14SDimitry Andric         Mask[SubMask[I]] >= TermValue) {
36375f7ddb14SDimitry Andric       NewMask[I] = UndefMaskElem;
36385f7ddb14SDimitry Andric       continue;
36395f7ddb14SDimitry Andric     }
36405f7ddb14SDimitry Andric     NewMask[I] = Mask[SubMask[I]];
36415f7ddb14SDimitry Andric   }
36425f7ddb14SDimitry Andric   Mask.swap(NewMask);
36435f7ddb14SDimitry Andric }
36445f7ddb14SDimitry Andric 
getEntryCost(const TreeEntry * E,ArrayRef<Value * > VectorizedVals)36455f7ddb14SDimitry Andric InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
36465f7ddb14SDimitry Andric                                       ArrayRef<Value *> VectorizedVals) {
36470b57cec5SDimitry Andric   ArrayRef<Value*> VL = E->Scalars;
36480b57cec5SDimitry Andric 
36490b57cec5SDimitry Andric   Type *ScalarTy = VL[0]->getType();
36500b57cec5SDimitry Andric   if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
36510b57cec5SDimitry Andric     ScalarTy = SI->getValueOperand()->getType();
36520b57cec5SDimitry Andric   else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
36530b57cec5SDimitry Andric     ScalarTy = CI->getOperand(0)->getType();
36545f7ddb14SDimitry Andric   else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
36555f7ddb14SDimitry Andric     ScalarTy = IE->getOperand(1)->getType();
36565ffd83dbSDimitry Andric   auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
36575ffd83dbSDimitry Andric   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
36580b57cec5SDimitry Andric 
36590b57cec5SDimitry Andric   // If we have computed a smaller type for the expression, update VecTy so
36600b57cec5SDimitry Andric   // that the costs will be accurate.
36610b57cec5SDimitry Andric   if (MinBWs.count(VL[0]))
36625ffd83dbSDimitry Andric     VecTy = FixedVectorType::get(
36630b57cec5SDimitry Andric         IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
36645f7ddb14SDimitry Andric   auto *FinalVecTy = VecTy;
36650b57cec5SDimitry Andric 
36660b57cec5SDimitry Andric   unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
36670b57cec5SDimitry Andric   bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
36685f7ddb14SDimitry Andric   if (NeedToShuffleReuses)
36695f7ddb14SDimitry Andric     FinalVecTy =
36705f7ddb14SDimitry Andric         FixedVectorType::get(VecTy->getElementType(), ReuseShuffleNumbers);
36715f7ddb14SDimitry Andric   // FIXME: it tries to fix a problem with MSVC buildbots.
36725f7ddb14SDimitry Andric   TargetTransformInfo &TTIRef = *TTI;
36735f7ddb14SDimitry Andric   auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
36745f7ddb14SDimitry Andric                                VectorizedVals](InstructionCost &Cost,
36755f7ddb14SDimitry Andric                                                bool IsGather) {
36765f7ddb14SDimitry Andric     DenseMap<Value *, int> ExtractVectorsTys;
36770b57cec5SDimitry Andric     for (auto *V : VL) {
36780b57cec5SDimitry Andric       // If all users of instruction are going to be vectorized and this
36790b57cec5SDimitry Andric       // instruction itself is not going to be vectorized, consider this
36800b57cec5SDimitry Andric       // instruction as dead and remove its cost from the final cost of the
36810b57cec5SDimitry Andric       // vectorized tree.
36825f7ddb14SDimitry Andric       if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
36835f7ddb14SDimitry Andric           (IsGather && ScalarToTreeEntry.count(V)))
36845f7ddb14SDimitry Andric         continue;
36855f7ddb14SDimitry Andric       auto *EE = cast<ExtractElementInst>(V);
36865f7ddb14SDimitry Andric       unsigned Idx = *getExtractIndex(EE);
36875f7ddb14SDimitry Andric       if (TTIRef.getNumberOfParts(VecTy) !=
36885f7ddb14SDimitry Andric           TTIRef.getNumberOfParts(EE->getVectorOperandType())) {
36895f7ddb14SDimitry Andric         auto It =
36905f7ddb14SDimitry Andric             ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
36915f7ddb14SDimitry Andric         It->getSecond() = std::min<int>(It->second, Idx);
36925f7ddb14SDimitry Andric       }
36935f7ddb14SDimitry Andric       // Take credit for instruction that will become dead.
36945f7ddb14SDimitry Andric       if (EE->hasOneUse()) {
36955f7ddb14SDimitry Andric         Instruction *Ext = EE->user_back();
36965f7ddb14SDimitry Andric         if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
36975f7ddb14SDimitry Andric             all_of(Ext->users(),
36985f7ddb14SDimitry Andric                    [](User *U) { return isa<GetElementPtrInst>(U); })) {
36995f7ddb14SDimitry Andric           // Use getExtractWithExtendCost() to calculate the cost of
37005f7ddb14SDimitry Andric           // extractelement/ext pair.
37015f7ddb14SDimitry Andric           Cost -=
37025f7ddb14SDimitry Andric               TTIRef.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
37035f7ddb14SDimitry Andric                                               EE->getVectorOperandType(), Idx);
37045f7ddb14SDimitry Andric           // Add back the cost of s|zext which is subtracted separately.
37055f7ddb14SDimitry Andric           Cost += TTIRef.getCastInstrCost(
37065f7ddb14SDimitry Andric               Ext->getOpcode(), Ext->getType(), EE->getType(),
37075f7ddb14SDimitry Andric               TTI::getCastContextHint(Ext), CostKind, Ext);
37085f7ddb14SDimitry Andric           continue;
37090b57cec5SDimitry Andric         }
37100b57cec5SDimitry Andric       }
37115f7ddb14SDimitry Andric       Cost -= TTIRef.getVectorInstrCost(Instruction::ExtractElement,
37125f7ddb14SDimitry Andric                                         EE->getVectorOperandType(), Idx);
37135f7ddb14SDimitry Andric     }
37145f7ddb14SDimitry Andric     // Add a cost for subvector extracts/inserts if required.
37155f7ddb14SDimitry Andric     for (const auto &Data : ExtractVectorsTys) {
37165f7ddb14SDimitry Andric       auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
37175f7ddb14SDimitry Andric       unsigned NumElts = VecTy->getNumElements();
37185f7ddb14SDimitry Andric       if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
37195f7ddb14SDimitry Andric         unsigned Idx = (Data.second / NumElts) * NumElts;
37205f7ddb14SDimitry Andric         unsigned EENumElts = EEVTy->getNumElements();
37215f7ddb14SDimitry Andric         if (Idx + NumElts <= EENumElts) {
37225f7ddb14SDimitry Andric           Cost +=
37235f7ddb14SDimitry Andric               TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
37245f7ddb14SDimitry Andric                                     EEVTy, None, Idx, VecTy);
37255f7ddb14SDimitry Andric         } else {
37265f7ddb14SDimitry Andric           // Need to round up the subvector type vectorization factor to avoid a
37275f7ddb14SDimitry Andric           // crash in cost model functions. Make SubVT so that Idx + VF of SubVT
37285f7ddb14SDimitry Andric           // <= EENumElts.
37295f7ddb14SDimitry Andric           auto *SubVT =
37305f7ddb14SDimitry Andric               FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
37315f7ddb14SDimitry Andric           Cost +=
37325f7ddb14SDimitry Andric               TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
37335f7ddb14SDimitry Andric                                     EEVTy, None, Idx, SubVT);
37345f7ddb14SDimitry Andric         }
37355f7ddb14SDimitry Andric       } else {
37365f7ddb14SDimitry Andric         Cost += TTIRef.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
37375f7ddb14SDimitry Andric                                       VecTy, None, 0, EEVTy);
37380b57cec5SDimitry Andric       }
37390b57cec5SDimitry Andric     }
37405f7ddb14SDimitry Andric   };
37415f7ddb14SDimitry Andric   if (E->State == TreeEntry::NeedToGather) {
37425f7ddb14SDimitry Andric     if (allConstant(VL))
37435f7ddb14SDimitry Andric       return 0;
37445f7ddb14SDimitry Andric     if (isa<InsertElementInst>(VL[0]))
37455f7ddb14SDimitry Andric       return InstructionCost::getInvalid();
37465f7ddb14SDimitry Andric     SmallVector<int> Mask;
37475f7ddb14SDimitry Andric     SmallVector<const TreeEntry *> Entries;
37485f7ddb14SDimitry Andric     Optional<TargetTransformInfo::ShuffleKind> Shuffle =
37495f7ddb14SDimitry Andric         isGatherShuffledEntry(E, Mask, Entries);
37505f7ddb14SDimitry Andric     if (Shuffle.hasValue()) {
37515f7ddb14SDimitry Andric       InstructionCost GatherCost = 0;
37525f7ddb14SDimitry Andric       if (ShuffleVectorInst::isIdentityMask(Mask)) {
37535f7ddb14SDimitry Andric         // Perfect match in the graph, will reuse the previously vectorized
37545f7ddb14SDimitry Andric         // node. Cost is 0.
37555f7ddb14SDimitry Andric         LLVM_DEBUG(
37565f7ddb14SDimitry Andric             dbgs()
37575f7ddb14SDimitry Andric             << "SLP: perfect diamond match for gather bundle that starts with "
37585f7ddb14SDimitry Andric             << *VL.front() << ".\n");
37595f7ddb14SDimitry Andric         if (NeedToShuffleReuses)
37605f7ddb14SDimitry Andric           GatherCost =
37615f7ddb14SDimitry Andric               TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
37625f7ddb14SDimitry Andric                                   FinalVecTy, E->ReuseShuffleIndices);
37635f7ddb14SDimitry Andric       } else {
37645f7ddb14SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
37655f7ddb14SDimitry Andric                           << " entries for bundle that starts with "
37665f7ddb14SDimitry Andric                           << *VL.front() << ".\n");
37675f7ddb14SDimitry Andric         // Detected that instead of gather we can emit a shuffle of single/two
37685f7ddb14SDimitry Andric         // previously vectorized nodes. Add the cost of the permutation rather
37695f7ddb14SDimitry Andric         // than gather.
37705f7ddb14SDimitry Andric         ::addMask(Mask, E->ReuseShuffleIndices);
37715f7ddb14SDimitry Andric         GatherCost = TTI->getShuffleCost(*Shuffle, FinalVecTy, Mask);
37725f7ddb14SDimitry Andric       }
37735f7ddb14SDimitry Andric       return GatherCost;
37745f7ddb14SDimitry Andric     }
37755f7ddb14SDimitry Andric     if (isSplat(VL)) {
37765f7ddb14SDimitry Andric       // Found the broadcasting of the single scalar, calculate the cost as the
37775f7ddb14SDimitry Andric       // broadcast.
37785f7ddb14SDimitry Andric       return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
37795f7ddb14SDimitry Andric     }
37805f7ddb14SDimitry Andric     if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
37815f7ddb14SDimitry Andric         allSameBlock(VL) &&
37825f7ddb14SDimitry Andric         !isa<ScalableVectorType>(
37835f7ddb14SDimitry Andric             cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
37845f7ddb14SDimitry Andric       // Check that gather of extractelements can be represented as just a
37855f7ddb14SDimitry Andric       // shuffle of a single/two vectors the scalars are extracted from.
37865f7ddb14SDimitry Andric       SmallVector<int> Mask;
37875f7ddb14SDimitry Andric       Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
37885f7ddb14SDimitry Andric           isShuffle(VL, Mask);
37895f7ddb14SDimitry Andric       if (ShuffleKind.hasValue()) {
37905f7ddb14SDimitry Andric         // Found the bunch of extractelement instructions that must be gathered
37915f7ddb14SDimitry Andric         // into a vector and can be represented as a permutation elements in a
37925f7ddb14SDimitry Andric         // single input vector or of 2 input vectors.
37935f7ddb14SDimitry Andric         InstructionCost Cost =
37945f7ddb14SDimitry Andric             computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
37955f7ddb14SDimitry Andric         AdjustExtractsCost(Cost, /*IsGather=*/true);
37965f7ddb14SDimitry Andric         if (NeedToShuffleReuses)
37975f7ddb14SDimitry Andric           Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
37985f7ddb14SDimitry Andric                                       FinalVecTy, E->ReuseShuffleIndices);
37995f7ddb14SDimitry Andric         return Cost;
38005f7ddb14SDimitry Andric       }
38015f7ddb14SDimitry Andric     }
38025f7ddb14SDimitry Andric     InstructionCost ReuseShuffleCost = 0;
38035f7ddb14SDimitry Andric     if (NeedToShuffleReuses)
38045f7ddb14SDimitry Andric       ReuseShuffleCost = TTI->getShuffleCost(
38055f7ddb14SDimitry Andric           TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
38060b57cec5SDimitry Andric     return ReuseShuffleCost + getGatherCost(VL);
38070b57cec5SDimitry Andric   }
38085f7ddb14SDimitry Andric   InstructionCost CommonCost = 0;
38095f7ddb14SDimitry Andric   SmallVector<int> Mask;
38105f7ddb14SDimitry Andric   if (!E->ReorderIndices.empty()) {
38115f7ddb14SDimitry Andric     SmallVector<int> NewMask;
38125f7ddb14SDimitry Andric     if (E->getOpcode() == Instruction::Store) {
38135f7ddb14SDimitry Andric       // For stores the order is actually a mask.
38145f7ddb14SDimitry Andric       NewMask.resize(E->ReorderIndices.size());
38155f7ddb14SDimitry Andric       copy(E->ReorderIndices, NewMask.begin());
38165f7ddb14SDimitry Andric     } else {
38175f7ddb14SDimitry Andric       inversePermutation(E->ReorderIndices, NewMask);
38185f7ddb14SDimitry Andric     }
38195f7ddb14SDimitry Andric     ::addMask(Mask, NewMask);
38205f7ddb14SDimitry Andric   }
38215f7ddb14SDimitry Andric   if (NeedToShuffleReuses)
38225f7ddb14SDimitry Andric     ::addMask(Mask, E->ReuseShuffleIndices);
38235f7ddb14SDimitry Andric   if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))
38245f7ddb14SDimitry Andric     CommonCost =
38255f7ddb14SDimitry Andric         TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
3826af732203SDimitry Andric   assert((E->State == TreeEntry::Vectorize ||
3827af732203SDimitry Andric           E->State == TreeEntry::ScatterVectorize) &&
3828af732203SDimitry Andric          "Unhandled state");
38298bcb0991SDimitry Andric   assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
38308bcb0991SDimitry Andric   Instruction *VL0 = E->getMainOp();
38318bcb0991SDimitry Andric   unsigned ShuffleOrOp =
38328bcb0991SDimitry Andric       E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
38330b57cec5SDimitry Andric   switch (ShuffleOrOp) {
38340b57cec5SDimitry Andric     case Instruction::PHI:
38350b57cec5SDimitry Andric       return 0;
38360b57cec5SDimitry Andric 
38370b57cec5SDimitry Andric     case Instruction::ExtractValue:
38385ffd83dbSDimitry Andric     case Instruction::ExtractElement: {
38395f7ddb14SDimitry Andric       // The common cost of removal ExtractElement/ExtractValue instructions +
38405f7ddb14SDimitry Andric       // the cost of shuffles, if required to resuffle the original vector.
38410b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
38420b57cec5SDimitry Andric         unsigned Idx = 0;
38430b57cec5SDimitry Andric         for (unsigned I : E->ReuseShuffleIndices) {
38440b57cec5SDimitry Andric           if (ShuffleOrOp == Instruction::ExtractElement) {
38455f7ddb14SDimitry Andric             auto *EE = cast<ExtractElementInst>(VL[I]);
38465f7ddb14SDimitry Andric             CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
38475f7ddb14SDimitry Andric                                                   EE->getVectorOperandType(),
38485f7ddb14SDimitry Andric                                                   *getExtractIndex(EE));
38490b57cec5SDimitry Andric           } else {
38505f7ddb14SDimitry Andric             CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
38515f7ddb14SDimitry Andric                                                   VecTy, Idx);
38520b57cec5SDimitry Andric             ++Idx;
38530b57cec5SDimitry Andric           }
38540b57cec5SDimitry Andric         }
38550b57cec5SDimitry Andric         Idx = ReuseShuffleNumbers;
38560b57cec5SDimitry Andric         for (Value *V : VL) {
38570b57cec5SDimitry Andric           if (ShuffleOrOp == Instruction::ExtractElement) {
38585f7ddb14SDimitry Andric             auto *EE = cast<ExtractElementInst>(V);
38595f7ddb14SDimitry Andric             CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
38605f7ddb14SDimitry Andric                                                   EE->getVectorOperandType(),
38615f7ddb14SDimitry Andric                                                   *getExtractIndex(EE));
38620b57cec5SDimitry Andric           } else {
38630b57cec5SDimitry Andric             --Idx;
38645f7ddb14SDimitry Andric             CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
38655f7ddb14SDimitry Andric                                                   VecTy, Idx);
38660b57cec5SDimitry Andric           }
38670b57cec5SDimitry Andric         }
38680b57cec5SDimitry Andric       }
38695f7ddb14SDimitry Andric       if (ShuffleOrOp == Instruction::ExtractValue) {
3870af732203SDimitry Andric         for (unsigned I = 0, E = VL.size(); I < E; ++I) {
38715f7ddb14SDimitry Andric           auto *EI = cast<Instruction>(VL[I]);
38720b57cec5SDimitry Andric           // Take credit for instruction that will become dead.
3873af732203SDimitry Andric           if (EI->hasOneUse()) {
3874af732203SDimitry Andric             Instruction *Ext = EI->user_back();
38750b57cec5SDimitry Andric             if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
38760b57cec5SDimitry Andric                 all_of(Ext->users(),
38770b57cec5SDimitry Andric                        [](User *U) { return isa<GetElementPtrInst>(U); })) {
38780b57cec5SDimitry Andric               // Use getExtractWithExtendCost() to calculate the cost of
38790b57cec5SDimitry Andric               // extractelement/ext pair.
38805f7ddb14SDimitry Andric               CommonCost -= TTI->getExtractWithExtendCost(
3881af732203SDimitry Andric                   Ext->getOpcode(), Ext->getType(), VecTy, I);
38820b57cec5SDimitry Andric               // Add back the cost of s|zext which is subtracted separately.
38835f7ddb14SDimitry Andric               CommonCost += TTI->getCastInstrCost(
3884af732203SDimitry Andric                   Ext->getOpcode(), Ext->getType(), EI->getType(),
3885af732203SDimitry Andric                   TTI::getCastContextHint(Ext), CostKind, Ext);
38860b57cec5SDimitry Andric               continue;
38870b57cec5SDimitry Andric             }
38880b57cec5SDimitry Andric           }
38895f7ddb14SDimitry Andric           CommonCost -=
3890af732203SDimitry Andric               TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
38910b57cec5SDimitry Andric         }
38925f7ddb14SDimitry Andric       } else {
38935f7ddb14SDimitry Andric         AdjustExtractsCost(CommonCost, /*IsGather=*/false);
38940b57cec5SDimitry Andric       }
38955f7ddb14SDimitry Andric       return CommonCost;
38965f7ddb14SDimitry Andric     }
38975f7ddb14SDimitry Andric     case Instruction::InsertElement: {
38985f7ddb14SDimitry Andric       auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
38995f7ddb14SDimitry Andric 
39005f7ddb14SDimitry Andric       unsigned const NumElts = SrcVecTy->getNumElements();
39015f7ddb14SDimitry Andric       unsigned const NumScalars = VL.size();
39025f7ddb14SDimitry Andric       APInt DemandedElts = APInt::getNullValue(NumElts);
39035f7ddb14SDimitry Andric       // TODO: Add support for Instruction::InsertValue.
39045f7ddb14SDimitry Andric       unsigned Offset = UINT_MAX;
39055f7ddb14SDimitry Andric       bool IsIdentity = true;
39065f7ddb14SDimitry Andric       SmallVector<int> ShuffleMask(NumElts, UndefMaskElem);
39075f7ddb14SDimitry Andric       for (unsigned I = 0; I < NumScalars; ++I) {
39085f7ddb14SDimitry Andric         Optional<int> InsertIdx = getInsertIndex(VL[I], 0);
39095f7ddb14SDimitry Andric         if (!InsertIdx || *InsertIdx == UndefMaskElem)
39105f7ddb14SDimitry Andric           continue;
39115f7ddb14SDimitry Andric         unsigned Idx = *InsertIdx;
39125f7ddb14SDimitry Andric         DemandedElts.setBit(Idx);
39135f7ddb14SDimitry Andric         if (Idx < Offset) {
39145f7ddb14SDimitry Andric           Offset = Idx;
39155f7ddb14SDimitry Andric           IsIdentity &= I == 0;
39165f7ddb14SDimitry Andric         } else {
39175f7ddb14SDimitry Andric           assert(Idx >= Offset && "Failed to find vector index offset");
39185f7ddb14SDimitry Andric           IsIdentity &= Idx - Offset == I;
39195f7ddb14SDimitry Andric         }
39205f7ddb14SDimitry Andric         ShuffleMask[Idx] = I;
39215f7ddb14SDimitry Andric       }
39225f7ddb14SDimitry Andric       assert(Offset < NumElts && "Failed to find vector index offset");
39235f7ddb14SDimitry Andric 
39245f7ddb14SDimitry Andric       InstructionCost Cost = 0;
39255f7ddb14SDimitry Andric       Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
39265f7ddb14SDimitry Andric                                             /*Insert*/ true, /*Extract*/ false);
39275f7ddb14SDimitry Andric 
39285f7ddb14SDimitry Andric       if (IsIdentity && NumElts != NumScalars && Offset % NumScalars != 0) {
39295f7ddb14SDimitry Andric         // FIXME: Replace with SK_InsertSubvector once it is properly supported.
39305f7ddb14SDimitry Andric         unsigned Sz = PowerOf2Ceil(Offset + NumScalars);
39315f7ddb14SDimitry Andric         Cost += TTI->getShuffleCost(
39325f7ddb14SDimitry Andric             TargetTransformInfo::SK_PermuteSingleSrc,
39335f7ddb14SDimitry Andric             FixedVectorType::get(SrcVecTy->getElementType(), Sz));
39345f7ddb14SDimitry Andric       } else if (!IsIdentity) {
39355f7ddb14SDimitry Andric         Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy,
39365f7ddb14SDimitry Andric                                     ShuffleMask);
39375f7ddb14SDimitry Andric       }
39385f7ddb14SDimitry Andric 
39395f7ddb14SDimitry Andric       return Cost;
39400b57cec5SDimitry Andric     }
39410b57cec5SDimitry Andric     case Instruction::ZExt:
39420b57cec5SDimitry Andric     case Instruction::SExt:
39430b57cec5SDimitry Andric     case Instruction::FPToUI:
39440b57cec5SDimitry Andric     case Instruction::FPToSI:
39450b57cec5SDimitry Andric     case Instruction::FPExt:
39460b57cec5SDimitry Andric     case Instruction::PtrToInt:
39470b57cec5SDimitry Andric     case Instruction::IntToPtr:
39480b57cec5SDimitry Andric     case Instruction::SIToFP:
39490b57cec5SDimitry Andric     case Instruction::UIToFP:
39500b57cec5SDimitry Andric     case Instruction::Trunc:
39510b57cec5SDimitry Andric     case Instruction::FPTrunc:
39520b57cec5SDimitry Andric     case Instruction::BitCast: {
39530b57cec5SDimitry Andric       Type *SrcTy = VL0->getOperand(0)->getType();
3954af732203SDimitry Andric       InstructionCost ScalarEltCost =
3955af732203SDimitry Andric           TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
3956af732203SDimitry Andric                                 TTI::getCastContextHint(VL0), CostKind, VL0);
39570b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
39585f7ddb14SDimitry Andric         CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
39590b57cec5SDimitry Andric       }
39600b57cec5SDimitry Andric 
39610b57cec5SDimitry Andric       // Calculate the cost of this instruction.
3962af732203SDimitry Andric       InstructionCost ScalarCost = VL.size() * ScalarEltCost;
39630b57cec5SDimitry Andric 
39645ffd83dbSDimitry Andric       auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
3965af732203SDimitry Andric       InstructionCost VecCost = 0;
39660b57cec5SDimitry Andric       // Check if the values are candidates to demote.
39670b57cec5SDimitry Andric       if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
39685f7ddb14SDimitry Andric         VecCost = CommonCost + TTI->getCastInstrCost(
39695f7ddb14SDimitry Andric                                    E->getOpcode(), VecTy, SrcVecTy,
3970af732203SDimitry Andric                                    TTI::getCastContextHint(VL0), CostKind, VL0);
39710b57cec5SDimitry Andric       }
39725f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
39730b57cec5SDimitry Andric       return VecCost - ScalarCost;
39740b57cec5SDimitry Andric     }
39750b57cec5SDimitry Andric     case Instruction::FCmp:
39760b57cec5SDimitry Andric     case Instruction::ICmp:
39770b57cec5SDimitry Andric     case Instruction::Select: {
39780b57cec5SDimitry Andric       // Calculate the cost of this instruction.
3979af732203SDimitry Andric       InstructionCost ScalarEltCost =
3980af732203SDimitry Andric           TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
3981af732203SDimitry Andric                                   CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
39820b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
39835f7ddb14SDimitry Andric         CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
39840b57cec5SDimitry Andric       }
39855ffd83dbSDimitry Andric       auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
3986af732203SDimitry Andric       InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
3987af732203SDimitry Andric 
3988af732203SDimitry Andric       // Check if all entries in VL are either compares or selects with compares
3989af732203SDimitry Andric       // as condition that have the same predicates.
3990af732203SDimitry Andric       CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE;
3991af732203SDimitry Andric       bool First = true;
3992af732203SDimitry Andric       for (auto *V : VL) {
3993af732203SDimitry Andric         CmpInst::Predicate CurrentPred;
3994af732203SDimitry Andric         auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
3995af732203SDimitry Andric         if ((!match(V, m_Select(MatchCmp, m_Value(), m_Value())) &&
3996af732203SDimitry Andric              !match(V, MatchCmp)) ||
3997af732203SDimitry Andric             (!First && VecPred != CurrentPred)) {
3998af732203SDimitry Andric           VecPred = CmpInst::BAD_ICMP_PREDICATE;
3999af732203SDimitry Andric           break;
4000af732203SDimitry Andric         }
4001af732203SDimitry Andric         First = false;
4002af732203SDimitry Andric         VecPred = CurrentPred;
4003af732203SDimitry Andric       }
4004af732203SDimitry Andric 
4005af732203SDimitry Andric       InstructionCost VecCost = TTI->getCmpSelInstrCost(
4006af732203SDimitry Andric           E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
4007af732203SDimitry Andric       // Check if it is possible and profitable to use min/max for selects in
4008af732203SDimitry Andric       // VL.
4009af732203SDimitry Andric       //
4010af732203SDimitry Andric       auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
4011af732203SDimitry Andric       if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
4012af732203SDimitry Andric         IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
4013af732203SDimitry Andric                                           {VecTy, VecTy});
4014af732203SDimitry Andric         InstructionCost IntrinsicCost =
4015af732203SDimitry Andric             TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
4016af732203SDimitry Andric         // If the selects are the only uses of the compares, they will be dead
4017af732203SDimitry Andric         // and we can adjust the cost by removing their cost.
4018af732203SDimitry Andric         if (IntrinsicAndUse.second)
4019af732203SDimitry Andric           IntrinsicCost -=
4020af732203SDimitry Andric               TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy,
4021af732203SDimitry Andric                                       CmpInst::BAD_ICMP_PREDICATE, CostKind);
4022af732203SDimitry Andric         VecCost = std::min(VecCost, IntrinsicCost);
4023af732203SDimitry Andric       }
40245f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
40255f7ddb14SDimitry Andric       return CommonCost + VecCost - ScalarCost;
40260b57cec5SDimitry Andric     }
40270b57cec5SDimitry Andric     case Instruction::FNeg:
40280b57cec5SDimitry Andric     case Instruction::Add:
40290b57cec5SDimitry Andric     case Instruction::FAdd:
40300b57cec5SDimitry Andric     case Instruction::Sub:
40310b57cec5SDimitry Andric     case Instruction::FSub:
40320b57cec5SDimitry Andric     case Instruction::Mul:
40330b57cec5SDimitry Andric     case Instruction::FMul:
40340b57cec5SDimitry Andric     case Instruction::UDiv:
40350b57cec5SDimitry Andric     case Instruction::SDiv:
40360b57cec5SDimitry Andric     case Instruction::FDiv:
40370b57cec5SDimitry Andric     case Instruction::URem:
40380b57cec5SDimitry Andric     case Instruction::SRem:
40390b57cec5SDimitry Andric     case Instruction::FRem:
40400b57cec5SDimitry Andric     case Instruction::Shl:
40410b57cec5SDimitry Andric     case Instruction::LShr:
40420b57cec5SDimitry Andric     case Instruction::AShr:
40430b57cec5SDimitry Andric     case Instruction::And:
40440b57cec5SDimitry Andric     case Instruction::Or:
40450b57cec5SDimitry Andric     case Instruction::Xor: {
40460b57cec5SDimitry Andric       // Certain instructions can be cheaper to vectorize if they have a
40470b57cec5SDimitry Andric       // constant second vector operand.
40480b57cec5SDimitry Andric       TargetTransformInfo::OperandValueKind Op1VK =
40490b57cec5SDimitry Andric           TargetTransformInfo::OK_AnyValue;
40500b57cec5SDimitry Andric       TargetTransformInfo::OperandValueKind Op2VK =
40510b57cec5SDimitry Andric           TargetTransformInfo::OK_UniformConstantValue;
40520b57cec5SDimitry Andric       TargetTransformInfo::OperandValueProperties Op1VP =
40530b57cec5SDimitry Andric           TargetTransformInfo::OP_None;
40540b57cec5SDimitry Andric       TargetTransformInfo::OperandValueProperties Op2VP =
40550b57cec5SDimitry Andric           TargetTransformInfo::OP_PowerOf2;
40560b57cec5SDimitry Andric 
40570b57cec5SDimitry Andric       // If all operands are exactly the same ConstantInt then set the
40580b57cec5SDimitry Andric       // operand kind to OK_UniformConstantValue.
40590b57cec5SDimitry Andric       // If instead not all operands are constants, then set the operand kind
40600b57cec5SDimitry Andric       // to OK_AnyValue. If all operands are constants but not the same,
40610b57cec5SDimitry Andric       // then set the operand kind to OK_NonUniformConstantValue.
40620b57cec5SDimitry Andric       ConstantInt *CInt0 = nullptr;
40630b57cec5SDimitry Andric       for (unsigned i = 0, e = VL.size(); i < e; ++i) {
40640b57cec5SDimitry Andric         const Instruction *I = cast<Instruction>(VL[i]);
40650b57cec5SDimitry Andric         unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
40660b57cec5SDimitry Andric         ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
40670b57cec5SDimitry Andric         if (!CInt) {
40680b57cec5SDimitry Andric           Op2VK = TargetTransformInfo::OK_AnyValue;
40690b57cec5SDimitry Andric           Op2VP = TargetTransformInfo::OP_None;
40700b57cec5SDimitry Andric           break;
40710b57cec5SDimitry Andric         }
40720b57cec5SDimitry Andric         if (Op2VP == TargetTransformInfo::OP_PowerOf2 &&
40730b57cec5SDimitry Andric             !CInt->getValue().isPowerOf2())
40740b57cec5SDimitry Andric           Op2VP = TargetTransformInfo::OP_None;
40750b57cec5SDimitry Andric         if (i == 0) {
40760b57cec5SDimitry Andric           CInt0 = CInt;
40770b57cec5SDimitry Andric           continue;
40780b57cec5SDimitry Andric         }
40790b57cec5SDimitry Andric         if (CInt0 != CInt)
40800b57cec5SDimitry Andric           Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
40810b57cec5SDimitry Andric       }
40820b57cec5SDimitry Andric 
40830b57cec5SDimitry Andric       SmallVector<const Value *, 4> Operands(VL0->operand_values());
4084af732203SDimitry Andric       InstructionCost ScalarEltCost =
4085af732203SDimitry Andric           TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
4086af732203SDimitry Andric                                       Op2VK, Op1VP, Op2VP, Operands, VL0);
40870b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
40885f7ddb14SDimitry Andric         CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
40890b57cec5SDimitry Andric       }
4090af732203SDimitry Andric       InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
4091af732203SDimitry Andric       InstructionCost VecCost =
4092af732203SDimitry Andric           TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
4093af732203SDimitry Andric                                       Op2VK, Op1VP, Op2VP, Operands, VL0);
40945f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
40955f7ddb14SDimitry Andric       return CommonCost + VecCost - ScalarCost;
40960b57cec5SDimitry Andric     }
40970b57cec5SDimitry Andric     case Instruction::GetElementPtr: {
40980b57cec5SDimitry Andric       TargetTransformInfo::OperandValueKind Op1VK =
40990b57cec5SDimitry Andric           TargetTransformInfo::OK_AnyValue;
41000b57cec5SDimitry Andric       TargetTransformInfo::OperandValueKind Op2VK =
41010b57cec5SDimitry Andric           TargetTransformInfo::OK_UniformConstantValue;
41020b57cec5SDimitry Andric 
4103af732203SDimitry Andric       InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
4104af732203SDimitry Andric           Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
41050b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
41065f7ddb14SDimitry Andric         CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
41070b57cec5SDimitry Andric       }
4108af732203SDimitry Andric       InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
4109af732203SDimitry Andric       InstructionCost VecCost = TTI->getArithmeticInstrCost(
4110af732203SDimitry Andric           Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
41115f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
41125f7ddb14SDimitry Andric       return CommonCost + VecCost - ScalarCost;
41130b57cec5SDimitry Andric     }
41140b57cec5SDimitry Andric     case Instruction::Load: {
41150b57cec5SDimitry Andric       // Cost of wide load - cost of scalar loads.
41165f7ddb14SDimitry Andric       Align Alignment = cast<LoadInst>(VL0)->getAlign();
4117af732203SDimitry Andric       InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
41185f7ddb14SDimitry Andric           Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0);
41190b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
41205f7ddb14SDimitry Andric         CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
41210b57cec5SDimitry Andric       }
4122af732203SDimitry Andric       InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
4123af732203SDimitry Andric       InstructionCost VecLdCost;
4124af732203SDimitry Andric       if (E->State == TreeEntry::Vectorize) {
41255f7ddb14SDimitry Andric         VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
41265ffd83dbSDimitry Andric                                          CostKind, VL0);
4127af732203SDimitry Andric       } else {
4128af732203SDimitry Andric         assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
41295f7ddb14SDimitry Andric         Align CommonAlignment = Alignment;
41305f7ddb14SDimitry Andric         for (Value *V : VL)
41315f7ddb14SDimitry Andric           CommonAlignment =
41325f7ddb14SDimitry Andric               commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
4133af732203SDimitry Andric         VecLdCost = TTI->getGatherScatterOpCost(
4134af732203SDimitry Andric             Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
41355f7ddb14SDimitry Andric             /*VariableMask=*/false, CommonAlignment, CostKind, VL0);
4136af732203SDimitry Andric       }
41375f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecLdCost, ScalarLdCost));
41385f7ddb14SDimitry Andric       return CommonCost + VecLdCost - ScalarLdCost;
41390b57cec5SDimitry Andric     }
41400b57cec5SDimitry Andric     case Instruction::Store: {
41410b57cec5SDimitry Andric       // We know that we can merge the stores. Calculate the cost.
4142480093f4SDimitry Andric       bool IsReorder = !E->ReorderIndices.empty();
4143480093f4SDimitry Andric       auto *SI =
4144480093f4SDimitry Andric           cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
41455ffd83dbSDimitry Andric       Align Alignment = SI->getAlign();
4146af732203SDimitry Andric       InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
4147af732203SDimitry Andric           Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
4148af732203SDimitry Andric       InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
4149af732203SDimitry Andric       InstructionCost VecStCost = TTI->getMemoryOpCost(
4150af732203SDimitry Andric           Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
41515f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecStCost, ScalarStCost));
41525f7ddb14SDimitry Andric       return CommonCost + VecStCost - ScalarStCost;
41530b57cec5SDimitry Andric     }
41540b57cec5SDimitry Andric     case Instruction::Call: {
41550b57cec5SDimitry Andric       CallInst *CI = cast<CallInst>(VL0);
41560b57cec5SDimitry Andric       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
41570b57cec5SDimitry Andric 
41580b57cec5SDimitry Andric       // Calculate the cost of the scalar and vector calls.
41595f7ddb14SDimitry Andric       IntrinsicCostAttributes CostAttrs(ID, *CI, 1);
4160af732203SDimitry Andric       InstructionCost ScalarEltCost =
4161af732203SDimitry Andric           TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
41620b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
41635f7ddb14SDimitry Andric         CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
41640b57cec5SDimitry Andric       }
4165af732203SDimitry Andric       InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
41660b57cec5SDimitry Andric 
41675ffd83dbSDimitry Andric       auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
4168af732203SDimitry Andric       InstructionCost VecCallCost =
4169af732203SDimitry Andric           std::min(VecCallCosts.first, VecCallCosts.second);
41700b57cec5SDimitry Andric 
41710b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
41720b57cec5SDimitry Andric                         << " (" << VecCallCost << "-" << ScalarCallCost << ")"
41730b57cec5SDimitry Andric                         << " for " << *CI << "\n");
41740b57cec5SDimitry Andric 
41755f7ddb14SDimitry Andric       return CommonCost + VecCallCost - ScalarCallCost;
41760b57cec5SDimitry Andric     }
41770b57cec5SDimitry Andric     case Instruction::ShuffleVector: {
41788bcb0991SDimitry Andric       assert(E->isAltShuffle() &&
41798bcb0991SDimitry Andric              ((Instruction::isBinaryOp(E->getOpcode()) &&
41808bcb0991SDimitry Andric                Instruction::isBinaryOp(E->getAltOpcode())) ||
41818bcb0991SDimitry Andric               (Instruction::isCast(E->getOpcode()) &&
41828bcb0991SDimitry Andric                Instruction::isCast(E->getAltOpcode()))) &&
41830b57cec5SDimitry Andric              "Invalid Shuffle Vector Operand");
4184af732203SDimitry Andric       InstructionCost ScalarCost = 0;
41850b57cec5SDimitry Andric       if (NeedToShuffleReuses) {
41860b57cec5SDimitry Andric         for (unsigned Idx : E->ReuseShuffleIndices) {
41870b57cec5SDimitry Andric           Instruction *I = cast<Instruction>(VL[Idx]);
41885f7ddb14SDimitry Andric           CommonCost -= TTI->getInstructionCost(I, CostKind);
41890b57cec5SDimitry Andric         }
41900b57cec5SDimitry Andric         for (Value *V : VL) {
41910b57cec5SDimitry Andric           Instruction *I = cast<Instruction>(V);
41925f7ddb14SDimitry Andric           CommonCost += TTI->getInstructionCost(I, CostKind);
41930b57cec5SDimitry Andric         }
41940b57cec5SDimitry Andric       }
41958bcb0991SDimitry Andric       for (Value *V : VL) {
41968bcb0991SDimitry Andric         Instruction *I = cast<Instruction>(V);
41978bcb0991SDimitry Andric         assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
41985ffd83dbSDimitry Andric         ScalarCost += TTI->getInstructionCost(I, CostKind);
41990b57cec5SDimitry Andric       }
42000b57cec5SDimitry Andric       // VecCost is equal to sum of the cost of creating 2 vectors
42010b57cec5SDimitry Andric       // and the cost of creating shuffle.
4202af732203SDimitry Andric       InstructionCost VecCost = 0;
42038bcb0991SDimitry Andric       if (Instruction::isBinaryOp(E->getOpcode())) {
42045ffd83dbSDimitry Andric         VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
42055ffd83dbSDimitry Andric         VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
42065ffd83dbSDimitry Andric                                                CostKind);
42070b57cec5SDimitry Andric       } else {
42088bcb0991SDimitry Andric         Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
42098bcb0991SDimitry Andric         Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
42105ffd83dbSDimitry Andric         auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
42115ffd83dbSDimitry Andric         auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
42125ffd83dbSDimitry Andric         VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
4213af732203SDimitry Andric                                         TTI::CastContextHint::None, CostKind);
42145ffd83dbSDimitry Andric         VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
4215af732203SDimitry Andric                                          TTI::CastContextHint::None, CostKind);
42160b57cec5SDimitry Andric       }
42175f7ddb14SDimitry Andric 
42185f7ddb14SDimitry Andric       SmallVector<int> Mask(E->Scalars.size());
42195f7ddb14SDimitry Andric       for (unsigned I = 0, End = E->Scalars.size(); I < End; ++I) {
42205f7ddb14SDimitry Andric         auto *OpInst = cast<Instruction>(E->Scalars[I]);
42215f7ddb14SDimitry Andric         assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
42225f7ddb14SDimitry Andric         Mask[I] = I + (OpInst->getOpcode() == E->getAltOpcode() ? End : 0);
42235f7ddb14SDimitry Andric       }
42245f7ddb14SDimitry Andric       VecCost +=
42255f7ddb14SDimitry Andric           TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, 0);
42265f7ddb14SDimitry Andric       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
42275f7ddb14SDimitry Andric       return CommonCost + VecCost - ScalarCost;
42280b57cec5SDimitry Andric     }
42290b57cec5SDimitry Andric     default:
42300b57cec5SDimitry Andric       llvm_unreachable("Unknown instruction");
42310b57cec5SDimitry Andric   }
42320b57cec5SDimitry Andric }
42330b57cec5SDimitry Andric 
isFullyVectorizableTinyTree() const42340b57cec5SDimitry Andric bool BoUpSLP::isFullyVectorizableTinyTree() const {
42350b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
42360b57cec5SDimitry Andric                     << VectorizableTree.size() << " is fully vectorizable .\n");
42370b57cec5SDimitry Andric 
42380b57cec5SDimitry Andric   // We only handle trees of heights 1 and 2.
4239480093f4SDimitry Andric   if (VectorizableTree.size() == 1 &&
4240480093f4SDimitry Andric       VectorizableTree[0]->State == TreeEntry::Vectorize)
42410b57cec5SDimitry Andric     return true;
42420b57cec5SDimitry Andric 
42430b57cec5SDimitry Andric   if (VectorizableTree.size() != 2)
42440b57cec5SDimitry Andric     return false;
42450b57cec5SDimitry Andric 
42465f7ddb14SDimitry Andric   // Handle splat and all-constants stores. Also try to vectorize tiny trees
42475f7ddb14SDimitry Andric   // with the second gather nodes if they have less scalar operands rather than
42485f7ddb14SDimitry Andric   // the initial tree element (may be profitable to shuffle the second gather)
42495f7ddb14SDimitry Andric   // or they are extractelements, which form shuffle.
42505f7ddb14SDimitry Andric   SmallVector<int> Mask;
4251480093f4SDimitry Andric   if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
42520b57cec5SDimitry Andric       (allConstant(VectorizableTree[1]->Scalars) ||
42535f7ddb14SDimitry Andric        isSplat(VectorizableTree[1]->Scalars) ||
42545f7ddb14SDimitry Andric        (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
42555f7ddb14SDimitry Andric         VectorizableTree[1]->Scalars.size() <
42565f7ddb14SDimitry Andric             VectorizableTree[0]->Scalars.size()) ||
42575f7ddb14SDimitry Andric        (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
42585f7ddb14SDimitry Andric         VectorizableTree[1]->getOpcode() == Instruction::ExtractElement &&
42595f7ddb14SDimitry Andric         isShuffle(VectorizableTree[1]->Scalars, Mask))))
42600b57cec5SDimitry Andric     return true;
42610b57cec5SDimitry Andric 
42620b57cec5SDimitry Andric   // Gathering cost would be too much for tiny trees.
4263480093f4SDimitry Andric   if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
4264480093f4SDimitry Andric       VectorizableTree[1]->State == TreeEntry::NeedToGather)
42650b57cec5SDimitry Andric     return false;
42660b57cec5SDimitry Andric 
42670b57cec5SDimitry Andric   return true;
42680b57cec5SDimitry Andric }
42690b57cec5SDimitry Andric 
isLoadCombineCandidateImpl(Value * Root,unsigned NumElts,TargetTransformInfo * TTI,bool MustMatchOrInst)42705ffd83dbSDimitry Andric static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
42715f7ddb14SDimitry Andric                                        TargetTransformInfo *TTI,
42725f7ddb14SDimitry Andric                                        bool MustMatchOrInst) {
42735ffd83dbSDimitry Andric   // Look past the root to find a source value. Arbitrarily follow the
42748bcb0991SDimitry Andric   // path through operand 0 of any 'or'. Also, peek through optional
4275af732203SDimitry Andric   // shift-left-by-multiple-of-8-bits.
42765ffd83dbSDimitry Andric   Value *ZextLoad = Root;
4277af732203SDimitry Andric   const APInt *ShAmtC;
42785f7ddb14SDimitry Andric   bool FoundOr = false;
42795ffd83dbSDimitry Andric   while (!isa<ConstantExpr>(ZextLoad) &&
42805ffd83dbSDimitry Andric          (match(ZextLoad, m_Or(m_Value(), m_Value())) ||
4281af732203SDimitry Andric           (match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
42825f7ddb14SDimitry Andric            ShAmtC->urem(8) == 0))) {
42835f7ddb14SDimitry Andric     auto *BinOp = cast<BinaryOperator>(ZextLoad);
42845f7ddb14SDimitry Andric     ZextLoad = BinOp->getOperand(0);
42855f7ddb14SDimitry Andric     if (BinOp->getOpcode() == Instruction::Or)
42865f7ddb14SDimitry Andric       FoundOr = true;
42875f7ddb14SDimitry Andric   }
42885ffd83dbSDimitry Andric   // Check if the input is an extended load of the required or/shift expression.
42898bcb0991SDimitry Andric   Value *LoadPtr;
42905f7ddb14SDimitry Andric   if ((MustMatchOrInst && !FoundOr) || ZextLoad == Root ||
42915f7ddb14SDimitry Andric       !match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr)))))
42928bcb0991SDimitry Andric     return false;
42938bcb0991SDimitry Andric 
42948bcb0991SDimitry Andric   // Require that the total load bit width is a legal integer type.
42958bcb0991SDimitry Andric   // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
42968bcb0991SDimitry Andric   // But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
42978bcb0991SDimitry Andric   Type *SrcTy = LoadPtr->getType()->getPointerElementType();
42988bcb0991SDimitry Andric   unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
42995ffd83dbSDimitry Andric   if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth)))
43008bcb0991SDimitry Andric     return false;
43018bcb0991SDimitry Andric 
43028bcb0991SDimitry Andric   // Everything matched - assume that we can fold the whole sequence using
43038bcb0991SDimitry Andric   // load combining.
43045ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Assume load combining for tree starting at "
43055ffd83dbSDimitry Andric              << *(cast<Instruction>(Root)) << "\n");
43068bcb0991SDimitry Andric 
43078bcb0991SDimitry Andric   return true;
43088bcb0991SDimitry Andric }
43098bcb0991SDimitry Andric 
isLoadCombineReductionCandidate(RecurKind RdxKind) const4310af732203SDimitry Andric bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
4311af732203SDimitry Andric   if (RdxKind != RecurKind::Or)
43125ffd83dbSDimitry Andric     return false;
43135ffd83dbSDimitry Andric 
43145ffd83dbSDimitry Andric   unsigned NumElts = VectorizableTree[0]->Scalars.size();
43155ffd83dbSDimitry Andric   Value *FirstReduced = VectorizableTree[0]->Scalars[0];
43165f7ddb14SDimitry Andric   return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI,
43175f7ddb14SDimitry Andric                                     /* MatchOr */ false);
43185ffd83dbSDimitry Andric }
43195ffd83dbSDimitry Andric 
isLoadCombineCandidate() const43205ffd83dbSDimitry Andric bool BoUpSLP::isLoadCombineCandidate() const {
43215ffd83dbSDimitry Andric   // Peek through a final sequence of stores and check if all operations are
43225ffd83dbSDimitry Andric   // likely to be load-combined.
43235ffd83dbSDimitry Andric   unsigned NumElts = VectorizableTree[0]->Scalars.size();
43245ffd83dbSDimitry Andric   for (Value *Scalar : VectorizableTree[0]->Scalars) {
43255ffd83dbSDimitry Andric     Value *X;
43265ffd83dbSDimitry Andric     if (!match(Scalar, m_Store(m_Value(X), m_Value())) ||
43275f7ddb14SDimitry Andric         !isLoadCombineCandidateImpl(X, NumElts, TTI, /* MatchOr */ true))
43285ffd83dbSDimitry Andric       return false;
43295ffd83dbSDimitry Andric   }
43305ffd83dbSDimitry Andric   return true;
43315ffd83dbSDimitry Andric }
43325ffd83dbSDimitry Andric 
isTreeTinyAndNotFullyVectorizable() const43330b57cec5SDimitry Andric bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
43345f7ddb14SDimitry Andric   // No need to vectorize inserts of gathered values.
43355f7ddb14SDimitry Andric   if (VectorizableTree.size() == 2 &&
43365f7ddb14SDimitry Andric       isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
43375f7ddb14SDimitry Andric       VectorizableTree[1]->State == TreeEntry::NeedToGather)
43385f7ddb14SDimitry Andric     return true;
43395f7ddb14SDimitry Andric 
43400b57cec5SDimitry Andric   // We can vectorize the tree if its size is greater than or equal to the
43410b57cec5SDimitry Andric   // minimum size specified by the MinTreeSize command line option.
43420b57cec5SDimitry Andric   if (VectorizableTree.size() >= MinTreeSize)
43430b57cec5SDimitry Andric     return false;
43440b57cec5SDimitry Andric 
43450b57cec5SDimitry Andric   // If we have a tiny tree (a tree whose size is less than MinTreeSize), we
43460b57cec5SDimitry Andric   // can vectorize it if we can prove it fully vectorizable.
43470b57cec5SDimitry Andric   if (isFullyVectorizableTinyTree())
43480b57cec5SDimitry Andric     return false;
43490b57cec5SDimitry Andric 
43500b57cec5SDimitry Andric   assert(VectorizableTree.empty()
43510b57cec5SDimitry Andric              ? ExternalUses.empty()
43520b57cec5SDimitry Andric              : true && "We shouldn't have any external users");
43530b57cec5SDimitry Andric 
43540b57cec5SDimitry Andric   // Otherwise, we can't vectorize the tree. It is both tiny and not fully
43550b57cec5SDimitry Andric   // vectorizable.
43560b57cec5SDimitry Andric   return true;
43570b57cec5SDimitry Andric }
43580b57cec5SDimitry Andric 
getSpillCost() const4359af732203SDimitry Andric InstructionCost BoUpSLP::getSpillCost() const {
43600b57cec5SDimitry Andric   // Walk from the bottom of the tree to the top, tracking which values are
43610b57cec5SDimitry Andric   // live. When we see a call instruction that is not part of our tree,
43620b57cec5SDimitry Andric   // query TTI to see if there is a cost to keeping values live over it
43630b57cec5SDimitry Andric   // (for example, if spills and fills are required).
43640b57cec5SDimitry Andric   unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
4365af732203SDimitry Andric   InstructionCost Cost = 0;
43660b57cec5SDimitry Andric 
43670b57cec5SDimitry Andric   SmallPtrSet<Instruction*, 4> LiveValues;
43680b57cec5SDimitry Andric   Instruction *PrevInst = nullptr;
43690b57cec5SDimitry Andric 
4370af732203SDimitry Andric   // The entries in VectorizableTree are not necessarily ordered by their
4371af732203SDimitry Andric   // position in basic blocks. Collect them and order them by dominance so later
4372af732203SDimitry Andric   // instructions are guaranteed to be visited first. For instructions in
4373af732203SDimitry Andric   // different basic blocks, we only scan to the beginning of the block, so
4374af732203SDimitry Andric   // their order does not matter, as long as all instructions in a basic block
4375af732203SDimitry Andric   // are grouped together. Using dominance ensures a deterministic order.
4376af732203SDimitry Andric   SmallVector<Instruction *, 16> OrderedScalars;
43770b57cec5SDimitry Andric   for (const auto &TEPtr : VectorizableTree) {
43780b57cec5SDimitry Andric     Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
43790b57cec5SDimitry Andric     if (!Inst)
43800b57cec5SDimitry Andric       continue;
4381af732203SDimitry Andric     OrderedScalars.push_back(Inst);
4382af732203SDimitry Andric   }
43835f7ddb14SDimitry Andric   llvm::sort(OrderedScalars, [&](Instruction *A, Instruction *B) {
43845f7ddb14SDimitry Andric     auto *NodeA = DT->getNode(A->getParent());
43855f7ddb14SDimitry Andric     auto *NodeB = DT->getNode(B->getParent());
43865f7ddb14SDimitry Andric     assert(NodeA && "Should only process reachable instructions");
43875f7ddb14SDimitry Andric     assert(NodeB && "Should only process reachable instructions");
43885f7ddb14SDimitry Andric     assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
43895f7ddb14SDimitry Andric            "Different nodes should have different DFS numbers");
43905f7ddb14SDimitry Andric     if (NodeA != NodeB)
43915f7ddb14SDimitry Andric       return NodeA->getDFSNumIn() < NodeB->getDFSNumIn();
43925f7ddb14SDimitry Andric     return B->comesBefore(A);
4393af732203SDimitry Andric   });
43940b57cec5SDimitry Andric 
4395af732203SDimitry Andric   for (Instruction *Inst : OrderedScalars) {
43960b57cec5SDimitry Andric     if (!PrevInst) {
43970b57cec5SDimitry Andric       PrevInst = Inst;
43980b57cec5SDimitry Andric       continue;
43990b57cec5SDimitry Andric     }
44000b57cec5SDimitry Andric 
44010b57cec5SDimitry Andric     // Update LiveValues.
44020b57cec5SDimitry Andric     LiveValues.erase(PrevInst);
44030b57cec5SDimitry Andric     for (auto &J : PrevInst->operands()) {
44040b57cec5SDimitry Andric       if (isa<Instruction>(&*J) && getTreeEntry(&*J))
44050b57cec5SDimitry Andric         LiveValues.insert(cast<Instruction>(&*J));
44060b57cec5SDimitry Andric     }
44070b57cec5SDimitry Andric 
44080b57cec5SDimitry Andric     LLVM_DEBUG({
44090b57cec5SDimitry Andric       dbgs() << "SLP: #LV: " << LiveValues.size();
44100b57cec5SDimitry Andric       for (auto *X : LiveValues)
44110b57cec5SDimitry Andric         dbgs() << " " << X->getName();
44120b57cec5SDimitry Andric       dbgs() << ", Looking at ";
44130b57cec5SDimitry Andric       Inst->dump();
44140b57cec5SDimitry Andric     });
44150b57cec5SDimitry Andric 
44160b57cec5SDimitry Andric     // Now find the sequence of instructions between PrevInst and Inst.
44170b57cec5SDimitry Andric     unsigned NumCalls = 0;
44180b57cec5SDimitry Andric     BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
44190b57cec5SDimitry Andric                                  PrevInstIt =
44200b57cec5SDimitry Andric                                      PrevInst->getIterator().getReverse();
44210b57cec5SDimitry Andric     while (InstIt != PrevInstIt) {
44220b57cec5SDimitry Andric       if (PrevInstIt == PrevInst->getParent()->rend()) {
44230b57cec5SDimitry Andric         PrevInstIt = Inst->getParent()->rbegin();
44240b57cec5SDimitry Andric         continue;
44250b57cec5SDimitry Andric       }
44260b57cec5SDimitry Andric 
4427480093f4SDimitry Andric       // Debug information does not impact spill cost.
44280b57cec5SDimitry Andric       if ((isa<CallInst>(&*PrevInstIt) &&
44290b57cec5SDimitry Andric            !isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
44300b57cec5SDimitry Andric           &*PrevInstIt != PrevInst)
44310b57cec5SDimitry Andric         NumCalls++;
44320b57cec5SDimitry Andric 
44330b57cec5SDimitry Andric       ++PrevInstIt;
44340b57cec5SDimitry Andric     }
44350b57cec5SDimitry Andric 
44360b57cec5SDimitry Andric     if (NumCalls) {
44370b57cec5SDimitry Andric       SmallVector<Type*, 4> V;
44385f7ddb14SDimitry Andric       for (auto *II : LiveValues) {
44395f7ddb14SDimitry Andric         auto *ScalarTy = II->getType();
44405f7ddb14SDimitry Andric         if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
44415f7ddb14SDimitry Andric           ScalarTy = VectorTy->getElementType();
44425f7ddb14SDimitry Andric         V.push_back(FixedVectorType::get(ScalarTy, BundleWidth));
44435f7ddb14SDimitry Andric       }
44440b57cec5SDimitry Andric       Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
44450b57cec5SDimitry Andric     }
44460b57cec5SDimitry Andric 
44470b57cec5SDimitry Andric     PrevInst = Inst;
44480b57cec5SDimitry Andric   }
44490b57cec5SDimitry Andric 
44500b57cec5SDimitry Andric   return Cost;
44510b57cec5SDimitry Andric }
44520b57cec5SDimitry Andric 
getTreeCost(ArrayRef<Value * > VectorizedVals)44535f7ddb14SDimitry Andric InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
4454af732203SDimitry Andric   InstructionCost Cost = 0;
44550b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
44560b57cec5SDimitry Andric                     << VectorizableTree.size() << ".\n");
44570b57cec5SDimitry Andric 
44580b57cec5SDimitry Andric   unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
44590b57cec5SDimitry Andric 
44600b57cec5SDimitry Andric   for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
44610b57cec5SDimitry Andric     TreeEntry &TE = *VectorizableTree[I].get();
44620b57cec5SDimitry Andric 
44635f7ddb14SDimitry Andric     InstructionCost C = getEntryCost(&TE, VectorizedVals);
4464af732203SDimitry Andric     Cost += C;
44650b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
44660b57cec5SDimitry Andric                       << " for bundle that starts with " << *TE.Scalars[0]
4467af732203SDimitry Andric                       << ".\n"
4468af732203SDimitry Andric                       << "SLP: Current total cost = " << Cost << "\n");
44690b57cec5SDimitry Andric   }
44700b57cec5SDimitry Andric 
44710b57cec5SDimitry Andric   SmallPtrSet<Value *, 16> ExtractCostCalculated;
4472af732203SDimitry Andric   InstructionCost ExtractCost = 0;
44735f7ddb14SDimitry Andric   SmallVector<unsigned> VF;
44745f7ddb14SDimitry Andric   SmallVector<SmallVector<int>> ShuffleMask;
44755f7ddb14SDimitry Andric   SmallVector<Value *> FirstUsers;
44765f7ddb14SDimitry Andric   SmallVector<APInt> DemandedElts;
44770b57cec5SDimitry Andric   for (ExternalUser &EU : ExternalUses) {
44780b57cec5SDimitry Andric     // We only add extract cost once for the same scalar.
44790b57cec5SDimitry Andric     if (!ExtractCostCalculated.insert(EU.Scalar).second)
44800b57cec5SDimitry Andric       continue;
44810b57cec5SDimitry Andric 
44820b57cec5SDimitry Andric     // Uses by ephemeral values are free (because the ephemeral value will be
44830b57cec5SDimitry Andric     // removed prior to code generation, and so the extraction will be
44840b57cec5SDimitry Andric     // removed as well).
44850b57cec5SDimitry Andric     if (EphValues.count(EU.User))
44860b57cec5SDimitry Andric       continue;
44870b57cec5SDimitry Andric 
44885f7ddb14SDimitry Andric     // No extract cost for vector "scalar"
44895f7ddb14SDimitry Andric     if (isa<FixedVectorType>(EU.Scalar->getType()))
44905f7ddb14SDimitry Andric       continue;
44915f7ddb14SDimitry Andric 
44925f7ddb14SDimitry Andric     // Already counted the cost for external uses when tried to adjust the cost
44935f7ddb14SDimitry Andric     // for extractelements, no need to add it again.
44945f7ddb14SDimitry Andric     if (isa<ExtractElementInst>(EU.Scalar))
44955f7ddb14SDimitry Andric       continue;
44965f7ddb14SDimitry Andric 
44975f7ddb14SDimitry Andric     // If found user is an insertelement, do not calculate extract cost but try
44985f7ddb14SDimitry Andric     // to detect it as a final shuffled/identity match.
44995f7ddb14SDimitry Andric     if (EU.User && isa<InsertElementInst>(EU.User)) {
45005f7ddb14SDimitry Andric       if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) {
45015f7ddb14SDimitry Andric         Optional<int> InsertIdx = getInsertIndex(EU.User, 0);
45025f7ddb14SDimitry Andric         if (!InsertIdx || *InsertIdx == UndefMaskElem)
45035f7ddb14SDimitry Andric           continue;
45045f7ddb14SDimitry Andric         Value *VU = EU.User;
45055f7ddb14SDimitry Andric         auto *It = find_if(FirstUsers, [VU](Value *V) {
45065f7ddb14SDimitry Andric           // Checks if 2 insertelements are from the same buildvector.
45075f7ddb14SDimitry Andric           if (VU->getType() != V->getType())
45085f7ddb14SDimitry Andric             return false;
45095f7ddb14SDimitry Andric           auto *IE1 = cast<InsertElementInst>(VU);
45105f7ddb14SDimitry Andric           auto *IE2 = cast<InsertElementInst>(V);
45115f7ddb14SDimitry Andric           // Go though of insertelement instructions trying to find either VU as
45125f7ddb14SDimitry Andric           // the original vector for IE2 or V as the original vector for IE1.
45135f7ddb14SDimitry Andric           do {
45145f7ddb14SDimitry Andric             if (IE1 == VU || IE2 == V)
45155f7ddb14SDimitry Andric               return true;
45165f7ddb14SDimitry Andric             if (IE1)
45175f7ddb14SDimitry Andric               IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
45185f7ddb14SDimitry Andric             if (IE2)
45195f7ddb14SDimitry Andric               IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
45205f7ddb14SDimitry Andric           } while (IE1 || IE2);
45215f7ddb14SDimitry Andric           return false;
45225f7ddb14SDimitry Andric         });
45235f7ddb14SDimitry Andric         int VecId = -1;
45245f7ddb14SDimitry Andric         if (It == FirstUsers.end()) {
45255f7ddb14SDimitry Andric           VF.push_back(FTy->getNumElements());
45265f7ddb14SDimitry Andric           ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
45275f7ddb14SDimitry Andric           FirstUsers.push_back(EU.User);
45285f7ddb14SDimitry Andric           DemandedElts.push_back(APInt::getNullValue(VF.back()));
45295f7ddb14SDimitry Andric           VecId = FirstUsers.size() - 1;
45305f7ddb14SDimitry Andric         } else {
45315f7ddb14SDimitry Andric           VecId = std::distance(FirstUsers.begin(), It);
45325f7ddb14SDimitry Andric         }
45335f7ddb14SDimitry Andric         int Idx = *InsertIdx;
45345f7ddb14SDimitry Andric         ShuffleMask[VecId][Idx] = EU.Lane;
45355f7ddb14SDimitry Andric         DemandedElts[VecId].setBit(Idx);
45365f7ddb14SDimitry Andric       }
45375f7ddb14SDimitry Andric     }
45385f7ddb14SDimitry Andric 
45390b57cec5SDimitry Andric     // If we plan to rewrite the tree in a smaller type, we will need to sign
45400b57cec5SDimitry Andric     // extend the extracted value back to the original type. Here, we account
45410b57cec5SDimitry Andric     // for the extract and the added cost of the sign extend if needed.
45425ffd83dbSDimitry Andric     auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
45430b57cec5SDimitry Andric     auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
45440b57cec5SDimitry Andric     if (MinBWs.count(ScalarRoot)) {
45450b57cec5SDimitry Andric       auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
45460b57cec5SDimitry Andric       auto Extend =
45470b57cec5SDimitry Andric           MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt;
45485ffd83dbSDimitry Andric       VecTy = FixedVectorType::get(MinTy, BundleWidth);
45490b57cec5SDimitry Andric       ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
45500b57cec5SDimitry Andric                                                    VecTy, EU.Lane);
45510b57cec5SDimitry Andric     } else {
45520b57cec5SDimitry Andric       ExtractCost +=
45530b57cec5SDimitry Andric           TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
45540b57cec5SDimitry Andric     }
45550b57cec5SDimitry Andric   }
45560b57cec5SDimitry Andric 
4557af732203SDimitry Andric   InstructionCost SpillCost = getSpillCost();
45580b57cec5SDimitry Andric   Cost += SpillCost + ExtractCost;
45595f7ddb14SDimitry Andric   for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
45605f7ddb14SDimitry Andric     // For the very first element - simple shuffle of the source vector.
45615f7ddb14SDimitry Andric     int Limit = ShuffleMask[I].size() * 2;
45625f7ddb14SDimitry Andric     if (I == 0 &&
45635f7ddb14SDimitry Andric         all_of(ShuffleMask[I], [Limit](int Idx) { return Idx < Limit; }) &&
45645f7ddb14SDimitry Andric         !ShuffleVectorInst::isIdentityMask(ShuffleMask[I])) {
45655f7ddb14SDimitry Andric       InstructionCost C = TTI->getShuffleCost(
45665f7ddb14SDimitry Andric           TTI::SK_PermuteSingleSrc,
45675f7ddb14SDimitry Andric           cast<FixedVectorType>(FirstUsers[I]->getType()), ShuffleMask[I]);
45685f7ddb14SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
45695f7ddb14SDimitry Andric                         << " for final shuffle of insertelement external users "
45705f7ddb14SDimitry Andric                         << *VectorizableTree.front()->Scalars.front() << ".\n"
45715f7ddb14SDimitry Andric                         << "SLP: Current total cost = " << Cost << "\n");
45725f7ddb14SDimitry Andric       Cost += C;
45735f7ddb14SDimitry Andric       continue;
45745f7ddb14SDimitry Andric     }
45755f7ddb14SDimitry Andric     // Other elements - permutation of 2 vectors (the initial one and the next
45765f7ddb14SDimitry Andric     // Ith incoming vector).
45775f7ddb14SDimitry Andric     unsigned VF = ShuffleMask[I].size();
45785f7ddb14SDimitry Andric     for (unsigned Idx = 0; Idx < VF; ++Idx) {
45795f7ddb14SDimitry Andric       int &Mask = ShuffleMask[I][Idx];
45805f7ddb14SDimitry Andric       Mask = Mask == UndefMaskElem ? Idx : VF + Mask;
45815f7ddb14SDimitry Andric     }
45825f7ddb14SDimitry Andric     InstructionCost C = TTI->getShuffleCost(
45835f7ddb14SDimitry Andric         TTI::SK_PermuteTwoSrc, cast<FixedVectorType>(FirstUsers[I]->getType()),
45845f7ddb14SDimitry Andric         ShuffleMask[I]);
45855f7ddb14SDimitry Andric     LLVM_DEBUG(
45865f7ddb14SDimitry Andric         dbgs()
45875f7ddb14SDimitry Andric         << "SLP: Adding cost " << C
45885f7ddb14SDimitry Andric         << " for final shuffle of vector node and external insertelement users "
45895f7ddb14SDimitry Andric         << *VectorizableTree.front()->Scalars.front() << ".\n"
45905f7ddb14SDimitry Andric         << "SLP: Current total cost = " << Cost << "\n");
45915f7ddb14SDimitry Andric     Cost += C;
45925f7ddb14SDimitry Andric     InstructionCost InsertCost = TTI->getScalarizationOverhead(
45935f7ddb14SDimitry Andric         cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
45945f7ddb14SDimitry Andric         /*Insert*/ true,
45955f7ddb14SDimitry Andric         /*Extract*/ false);
45965f7ddb14SDimitry Andric     Cost -= InsertCost;
45975f7ddb14SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
45985f7ddb14SDimitry Andric                       << " for insertelements gather.\n"
45995f7ddb14SDimitry Andric                       << "SLP: Current total cost = " << Cost << "\n");
46005f7ddb14SDimitry Andric   }
46010b57cec5SDimitry Andric 
4602af732203SDimitry Andric #ifndef NDEBUG
4603af732203SDimitry Andric   SmallString<256> Str;
46040b57cec5SDimitry Andric   {
4605af732203SDimitry Andric     raw_svector_ostream OS(Str);
46060b57cec5SDimitry Andric     OS << "SLP: Spill Cost = " << SpillCost << ".\n"
46070b57cec5SDimitry Andric        << "SLP: Extract Cost = " << ExtractCost << ".\n"
46080b57cec5SDimitry Andric        << "SLP: Total Cost = " << Cost << ".\n";
46090b57cec5SDimitry Andric   }
46100b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << Str);
46110b57cec5SDimitry Andric   if (ViewSLPTree)
46120b57cec5SDimitry Andric     ViewGraph(this, "SLP" + F->getName(), false, Str);
4613af732203SDimitry Andric #endif
46140b57cec5SDimitry Andric 
46150b57cec5SDimitry Andric   return Cost;
46160b57cec5SDimitry Andric }
46170b57cec5SDimitry Andric 
46185f7ddb14SDimitry Andric Optional<TargetTransformInfo::ShuffleKind>
isGatherShuffledEntry(const TreeEntry * TE,SmallVectorImpl<int> & Mask,SmallVectorImpl<const TreeEntry * > & Entries)46195f7ddb14SDimitry Andric BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
46205f7ddb14SDimitry Andric                                SmallVectorImpl<const TreeEntry *> &Entries) {
46215f7ddb14SDimitry Andric   // TODO: currently checking only for Scalars in the tree entry, need to count
46225f7ddb14SDimitry Andric   // reused elements too for better cost estimation.
46235f7ddb14SDimitry Andric   Mask.assign(TE->Scalars.size(), UndefMaskElem);
46245f7ddb14SDimitry Andric   Entries.clear();
46255f7ddb14SDimitry Andric   // Build a lists of values to tree entries.
46265f7ddb14SDimitry Andric   DenseMap<Value *, SmallPtrSet<const TreeEntry *, 4>> ValueToTEs;
46275f7ddb14SDimitry Andric   for (const std::unique_ptr<TreeEntry> &EntryPtr : VectorizableTree) {
46285f7ddb14SDimitry Andric     if (EntryPtr.get() == TE)
46295f7ddb14SDimitry Andric       break;
46305f7ddb14SDimitry Andric     if (EntryPtr->State != TreeEntry::NeedToGather)
46315f7ddb14SDimitry Andric       continue;
46325f7ddb14SDimitry Andric     for (Value *V : EntryPtr->Scalars)
46335f7ddb14SDimitry Andric       ValueToTEs.try_emplace(V).first->getSecond().insert(EntryPtr.get());
46345f7ddb14SDimitry Andric   }
46355f7ddb14SDimitry Andric   // Find all tree entries used by the gathered values. If no common entries
46365f7ddb14SDimitry Andric   // found - not a shuffle.
46375f7ddb14SDimitry Andric   // Here we build a set of tree nodes for each gathered value and trying to
46385f7ddb14SDimitry Andric   // find the intersection between these sets. If we have at least one common
46395f7ddb14SDimitry Andric   // tree node for each gathered value - we have just a permutation of the
46405f7ddb14SDimitry Andric   // single vector. If we have 2 different sets, we're in situation where we
46415f7ddb14SDimitry Andric   // have a permutation of 2 input vectors.
46425f7ddb14SDimitry Andric   SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
46435f7ddb14SDimitry Andric   DenseMap<Value *, int> UsedValuesEntry;
46445f7ddb14SDimitry Andric   for (Value *V : TE->Scalars) {
46455f7ddb14SDimitry Andric     if (isa<UndefValue>(V))
46465f7ddb14SDimitry Andric       continue;
46475f7ddb14SDimitry Andric     // Build a list of tree entries where V is used.
46485f7ddb14SDimitry Andric     SmallPtrSet<const TreeEntry *, 4> VToTEs;
46495f7ddb14SDimitry Andric     auto It = ValueToTEs.find(V);
46505f7ddb14SDimitry Andric     if (It != ValueToTEs.end())
46515f7ddb14SDimitry Andric       VToTEs = It->second;
46525f7ddb14SDimitry Andric     if (const TreeEntry *VTE = getTreeEntry(V))
46535f7ddb14SDimitry Andric       VToTEs.insert(VTE);
46545f7ddb14SDimitry Andric     if (VToTEs.empty())
46555f7ddb14SDimitry Andric       return None;
46565f7ddb14SDimitry Andric     if (UsedTEs.empty()) {
46575f7ddb14SDimitry Andric       // The first iteration, just insert the list of nodes to vector.
46585f7ddb14SDimitry Andric       UsedTEs.push_back(VToTEs);
46595f7ddb14SDimitry Andric     } else {
46605f7ddb14SDimitry Andric       // Need to check if there are any previously used tree nodes which use V.
46615f7ddb14SDimitry Andric       // If there are no such nodes, consider that we have another one input
46625f7ddb14SDimitry Andric       // vector.
46635f7ddb14SDimitry Andric       SmallPtrSet<const TreeEntry *, 4> SavedVToTEs(VToTEs);
46645f7ddb14SDimitry Andric       unsigned Idx = 0;
46655f7ddb14SDimitry Andric       for (SmallPtrSet<const TreeEntry *, 4> &Set : UsedTEs) {
46665f7ddb14SDimitry Andric         // Do we have a non-empty intersection of previously listed tree entries
46675f7ddb14SDimitry Andric         // and tree entries using current V?
46685f7ddb14SDimitry Andric         set_intersect(VToTEs, Set);
46695f7ddb14SDimitry Andric         if (!VToTEs.empty()) {
46705f7ddb14SDimitry Andric           // Yes, write the new subset and continue analysis for the next
46715f7ddb14SDimitry Andric           // scalar.
46725f7ddb14SDimitry Andric           Set.swap(VToTEs);
46735f7ddb14SDimitry Andric           break;
46745f7ddb14SDimitry Andric         }
46755f7ddb14SDimitry Andric         VToTEs = SavedVToTEs;
46765f7ddb14SDimitry Andric         ++Idx;
46775f7ddb14SDimitry Andric       }
46785f7ddb14SDimitry Andric       // No non-empty intersection found - need to add a second set of possible
46795f7ddb14SDimitry Andric       // source vectors.
46805f7ddb14SDimitry Andric       if (Idx == UsedTEs.size()) {
46815f7ddb14SDimitry Andric         // If the number of input vectors is greater than 2 - not a permutation,
46825f7ddb14SDimitry Andric         // fallback to the regular gather.
46835f7ddb14SDimitry Andric         if (UsedTEs.size() == 2)
46845f7ddb14SDimitry Andric           return None;
46855f7ddb14SDimitry Andric         UsedTEs.push_back(SavedVToTEs);
46865f7ddb14SDimitry Andric         Idx = UsedTEs.size() - 1;
46875f7ddb14SDimitry Andric       }
46885f7ddb14SDimitry Andric       UsedValuesEntry.try_emplace(V, Idx);
46895f7ddb14SDimitry Andric     }
46905f7ddb14SDimitry Andric   }
46915f7ddb14SDimitry Andric 
46925f7ddb14SDimitry Andric   unsigned VF = 0;
46935f7ddb14SDimitry Andric   if (UsedTEs.size() == 1) {
46945f7ddb14SDimitry Andric     // Try to find the perfect match in another gather node at first.
46955f7ddb14SDimitry Andric     auto It = find_if(UsedTEs.front(), [TE](const TreeEntry *EntryPtr) {
46965f7ddb14SDimitry Andric       return EntryPtr->isSame(TE->Scalars);
46975f7ddb14SDimitry Andric     });
46985f7ddb14SDimitry Andric     if (It != UsedTEs.front().end()) {
46995f7ddb14SDimitry Andric       Entries.push_back(*It);
47005f7ddb14SDimitry Andric       std::iota(Mask.begin(), Mask.end(), 0);
47015f7ddb14SDimitry Andric       return TargetTransformInfo::SK_PermuteSingleSrc;
47025f7ddb14SDimitry Andric     }
47035f7ddb14SDimitry Andric     // No perfect match, just shuffle, so choose the first tree node.
47045f7ddb14SDimitry Andric     Entries.push_back(*UsedTEs.front().begin());
47055f7ddb14SDimitry Andric   } else {
47065f7ddb14SDimitry Andric     // Try to find nodes with the same vector factor.
47075f7ddb14SDimitry Andric     assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
47085f7ddb14SDimitry Andric     // FIXME: Shall be replaced by GetVF function once non-power-2 patch is
47095f7ddb14SDimitry Andric     // landed.
47105f7ddb14SDimitry Andric     auto &&GetVF = [](const TreeEntry *TE) {
47115f7ddb14SDimitry Andric       if (!TE->ReuseShuffleIndices.empty())
47125f7ddb14SDimitry Andric         return TE->ReuseShuffleIndices.size();
47135f7ddb14SDimitry Andric       return TE->Scalars.size();
47145f7ddb14SDimitry Andric     };
47155f7ddb14SDimitry Andric     DenseMap<int, const TreeEntry *> VFToTE;
47165f7ddb14SDimitry Andric     for (const TreeEntry *TE : UsedTEs.front())
47175f7ddb14SDimitry Andric       VFToTE.try_emplace(GetVF(TE), TE);
47185f7ddb14SDimitry Andric     for (const TreeEntry *TE : UsedTEs.back()) {
47195f7ddb14SDimitry Andric       auto It = VFToTE.find(GetVF(TE));
47205f7ddb14SDimitry Andric       if (It != VFToTE.end()) {
47215f7ddb14SDimitry Andric         VF = It->first;
47225f7ddb14SDimitry Andric         Entries.push_back(It->second);
47235f7ddb14SDimitry Andric         Entries.push_back(TE);
47245f7ddb14SDimitry Andric         break;
47255f7ddb14SDimitry Andric       }
47265f7ddb14SDimitry Andric     }
47275f7ddb14SDimitry Andric     // No 2 source vectors with the same vector factor - give up and do regular
47285f7ddb14SDimitry Andric     // gather.
47295f7ddb14SDimitry Andric     if (Entries.empty())
47305f7ddb14SDimitry Andric       return None;
47315f7ddb14SDimitry Andric   }
47325f7ddb14SDimitry Andric 
47335f7ddb14SDimitry Andric   // Build a shuffle mask for better cost estimation and vector emission.
47345f7ddb14SDimitry Andric   for (int I = 0, E = TE->Scalars.size(); I < E; ++I) {
47355f7ddb14SDimitry Andric     Value *V = TE->Scalars[I];
47365f7ddb14SDimitry Andric     if (isa<UndefValue>(V))
47375f7ddb14SDimitry Andric       continue;
47385f7ddb14SDimitry Andric     unsigned Idx = UsedValuesEntry.lookup(V);
47395f7ddb14SDimitry Andric     const TreeEntry *VTE = Entries[Idx];
47405f7ddb14SDimitry Andric     int FoundLane = VTE->findLaneForValue(V);
47415f7ddb14SDimitry Andric     Mask[I] = Idx * VF + FoundLane;
47425f7ddb14SDimitry Andric     // Extra check required by isSingleSourceMaskImpl function (called by
47435f7ddb14SDimitry Andric     // ShuffleVectorInst::isSingleSourceMask).
47445f7ddb14SDimitry Andric     if (Mask[I] >= 2 * E)
47455f7ddb14SDimitry Andric       return None;
47465f7ddb14SDimitry Andric   }
47475f7ddb14SDimitry Andric   switch (Entries.size()) {
47485f7ddb14SDimitry Andric   case 1:
47495f7ddb14SDimitry Andric     return TargetTransformInfo::SK_PermuteSingleSrc;
47505f7ddb14SDimitry Andric   case 2:
47515f7ddb14SDimitry Andric     return TargetTransformInfo::SK_PermuteTwoSrc;
47525f7ddb14SDimitry Andric   default:
47535f7ddb14SDimitry Andric     break;
47545f7ddb14SDimitry Andric   }
47555f7ddb14SDimitry Andric   return None;
47565f7ddb14SDimitry Andric }
47575f7ddb14SDimitry Andric 
4758af732203SDimitry Andric InstructionCost
getGatherCost(FixedVectorType * Ty,const DenseSet<unsigned> & ShuffledIndices) const4759af732203SDimitry Andric BoUpSLP::getGatherCost(FixedVectorType *Ty,
47600b57cec5SDimitry Andric                        const DenseSet<unsigned> &ShuffledIndices) const {
47615ffd83dbSDimitry Andric   unsigned NumElts = Ty->getNumElements();
47625ffd83dbSDimitry Andric   APInt DemandedElts = APInt::getNullValue(NumElts);
4763af732203SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I)
4764af732203SDimitry Andric     if (!ShuffledIndices.count(I))
4765af732203SDimitry Andric       DemandedElts.setBit(I);
4766af732203SDimitry Andric   InstructionCost Cost =
4767af732203SDimitry Andric       TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
47685ffd83dbSDimitry Andric                                     /*Extract*/ false);
47690b57cec5SDimitry Andric   if (!ShuffledIndices.empty())
47700b57cec5SDimitry Andric     Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
47710b57cec5SDimitry Andric   return Cost;
47720b57cec5SDimitry Andric }
47730b57cec5SDimitry Andric 
getGatherCost(ArrayRef<Value * > VL) const4774af732203SDimitry Andric InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
47750b57cec5SDimitry Andric   // Find the type of the operands in VL.
47760b57cec5SDimitry Andric   Type *ScalarTy = VL[0]->getType();
47770b57cec5SDimitry Andric   if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
47780b57cec5SDimitry Andric     ScalarTy = SI->getValueOperand()->getType();
47795ffd83dbSDimitry Andric   auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
47800b57cec5SDimitry Andric   // Find the cost of inserting/extracting values from the vector.
47810b57cec5SDimitry Andric   // Check if the same elements are inserted several times and count them as
47820b57cec5SDimitry Andric   // shuffle candidates.
47830b57cec5SDimitry Andric   DenseSet<unsigned> ShuffledElements;
47840b57cec5SDimitry Andric   DenseSet<Value *> UniqueElements;
47850b57cec5SDimitry Andric   // Iterate in reverse order to consider insert elements with the high cost.
47860b57cec5SDimitry Andric   for (unsigned I = VL.size(); I > 0; --I) {
47870b57cec5SDimitry Andric     unsigned Idx = I - 1;
47885f7ddb14SDimitry Andric     if (isConstant(VL[Idx]))
47895f7ddb14SDimitry Andric       continue;
47900b57cec5SDimitry Andric     if (!UniqueElements.insert(VL[Idx]).second)
47910b57cec5SDimitry Andric       ShuffledElements.insert(Idx);
47920b57cec5SDimitry Andric   }
47930b57cec5SDimitry Andric   return getGatherCost(VecTy, ShuffledElements);
47940b57cec5SDimitry Andric }
47950b57cec5SDimitry Andric 
47960b57cec5SDimitry Andric // Perform operand reordering on the instructions in VL and return the reordered
47970b57cec5SDimitry Andric // operands in Left and Right.
reorderInputsAccordingToOpcode(ArrayRef<Value * > VL,SmallVectorImpl<Value * > & Left,SmallVectorImpl<Value * > & Right,const DataLayout & DL,ScalarEvolution & SE,const BoUpSLP & R)4798480093f4SDimitry Andric void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
4799480093f4SDimitry Andric                                              SmallVectorImpl<Value *> &Left,
4800480093f4SDimitry Andric                                              SmallVectorImpl<Value *> &Right,
4801480093f4SDimitry Andric                                              const DataLayout &DL,
4802480093f4SDimitry Andric                                              ScalarEvolution &SE,
4803480093f4SDimitry Andric                                              const BoUpSLP &R) {
48040b57cec5SDimitry Andric   if (VL.empty())
48050b57cec5SDimitry Andric     return;
4806480093f4SDimitry Andric   VLOperands Ops(VL, DL, SE, R);
48070b57cec5SDimitry Andric   // Reorder the operands in place.
48080b57cec5SDimitry Andric   Ops.reorder();
48090b57cec5SDimitry Andric   Left = Ops.getVL(0);
48100b57cec5SDimitry Andric   Right = Ops.getVL(1);
48110b57cec5SDimitry Andric }
48120b57cec5SDimitry Andric 
setInsertPointAfterBundle(const TreeEntry * E)48135f7ddb14SDimitry Andric void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
48140b57cec5SDimitry Andric   // Get the basic block this bundle is in. All instructions in the bundle
48150b57cec5SDimitry Andric   // should be in this block.
48168bcb0991SDimitry Andric   auto *Front = E->getMainOp();
48170b57cec5SDimitry Andric   auto *BB = Front->getParent();
4818af732203SDimitry Andric   assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
48190b57cec5SDimitry Andric     auto *I = cast<Instruction>(V);
48208bcb0991SDimitry Andric     return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
48210b57cec5SDimitry Andric   }));
48220b57cec5SDimitry Andric 
48230b57cec5SDimitry Andric   // The last instruction in the bundle in program order.
48240b57cec5SDimitry Andric   Instruction *LastInst = nullptr;
48250b57cec5SDimitry Andric 
48260b57cec5SDimitry Andric   // Find the last instruction. The common case should be that BB has been
48270b57cec5SDimitry Andric   // scheduled, and the last instruction is VL.back(). So we start with
48280b57cec5SDimitry Andric   // VL.back() and iterate over schedule data until we reach the end of the
48290b57cec5SDimitry Andric   // bundle. The end of the bundle is marked by null ScheduleData.
48300b57cec5SDimitry Andric   if (BlocksSchedules.count(BB)) {
48310b57cec5SDimitry Andric     auto *Bundle =
48328bcb0991SDimitry Andric         BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
48330b57cec5SDimitry Andric     if (Bundle && Bundle->isPartOfBundle())
48340b57cec5SDimitry Andric       for (; Bundle; Bundle = Bundle->NextInBundle)
48350b57cec5SDimitry Andric         if (Bundle->OpValue == Bundle->Inst)
48360b57cec5SDimitry Andric           LastInst = Bundle->Inst;
48370b57cec5SDimitry Andric   }
48380b57cec5SDimitry Andric 
48390b57cec5SDimitry Andric   // LastInst can still be null at this point if there's either not an entry
48400b57cec5SDimitry Andric   // for BB in BlocksSchedules or there's no ScheduleData available for
48410b57cec5SDimitry Andric   // VL.back(). This can be the case if buildTree_rec aborts for various
48420b57cec5SDimitry Andric   // reasons (e.g., the maximum recursion depth is reached, the maximum region
48430b57cec5SDimitry Andric   // size is reached, etc.). ScheduleData is initialized in the scheduling
48440b57cec5SDimitry Andric   // "dry-run".
48450b57cec5SDimitry Andric   //
48460b57cec5SDimitry Andric   // If this happens, we can still find the last instruction by brute force. We
48470b57cec5SDimitry Andric   // iterate forwards from Front (inclusive) until we either see all
48480b57cec5SDimitry Andric   // instructions in the bundle or reach the end of the block. If Front is the
48490b57cec5SDimitry Andric   // last instruction in program order, LastInst will be set to Front, and we
48500b57cec5SDimitry Andric   // will visit all the remaining instructions in the block.
48510b57cec5SDimitry Andric   //
48520b57cec5SDimitry Andric   // One of the reasons we exit early from buildTree_rec is to place an upper
48530b57cec5SDimitry Andric   // bound on compile-time. Thus, taking an additional compile-time hit here is
48540b57cec5SDimitry Andric   // not ideal. However, this should be exceedingly rare since it requires that
48550b57cec5SDimitry Andric   // we both exit early from buildTree_rec and that the bundle be out-of-order
48560b57cec5SDimitry Andric   // (causing us to iterate all the way to the end of the block).
48570b57cec5SDimitry Andric   if (!LastInst) {
48588bcb0991SDimitry Andric     SmallPtrSet<Value *, 16> Bundle(E->Scalars.begin(), E->Scalars.end());
48590b57cec5SDimitry Andric     for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
48608bcb0991SDimitry Andric       if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I))
48610b57cec5SDimitry Andric         LastInst = &I;
48620b57cec5SDimitry Andric       if (Bundle.empty())
48630b57cec5SDimitry Andric         break;
48640b57cec5SDimitry Andric     }
48650b57cec5SDimitry Andric   }
48668bcb0991SDimitry Andric   assert(LastInst && "Failed to find last instruction in bundle");
48670b57cec5SDimitry Andric 
48680b57cec5SDimitry Andric   // Set the insertion point after the last instruction in the bundle. Set the
48690b57cec5SDimitry Andric   // debug location to Front.
48700b57cec5SDimitry Andric   Builder.SetInsertPoint(BB, ++LastInst->getIterator());
48710b57cec5SDimitry Andric   Builder.SetCurrentDebugLocation(Front->getDebugLoc());
48720b57cec5SDimitry Andric }
48730b57cec5SDimitry Andric 
gather(ArrayRef<Value * > VL)4874af732203SDimitry Andric Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
48755f7ddb14SDimitry Andric   // List of instructions/lanes from current block and/or the blocks which are
48765f7ddb14SDimitry Andric   // part of the current loop. These instructions will be inserted at the end to
48775f7ddb14SDimitry Andric   // make it possible to optimize loops and hoist invariant instructions out of
48785f7ddb14SDimitry Andric   // the loops body with better chances for success.
48795f7ddb14SDimitry Andric   SmallVector<std::pair<Value *, unsigned>, 4> PostponedInsts;
48805f7ddb14SDimitry Andric   SmallSet<int, 4> PostponedIndices;
48815f7ddb14SDimitry Andric   Loop *L = LI->getLoopFor(Builder.GetInsertBlock());
48825f7ddb14SDimitry Andric   auto &&CheckPredecessor = [](BasicBlock *InstBB, BasicBlock *InsertBB) {
48835f7ddb14SDimitry Andric     SmallPtrSet<BasicBlock *, 4> Visited;
48845f7ddb14SDimitry Andric     while (InsertBB && InsertBB != InstBB && Visited.insert(InsertBB).second)
48855f7ddb14SDimitry Andric       InsertBB = InsertBB->getSinglePredecessor();
48865f7ddb14SDimitry Andric     return InsertBB && InsertBB == InstBB;
48875f7ddb14SDimitry Andric   };
48885f7ddb14SDimitry Andric   for (int I = 0, E = VL.size(); I < E; ++I) {
48895f7ddb14SDimitry Andric     if (auto *Inst = dyn_cast<Instruction>(VL[I]))
48905f7ddb14SDimitry Andric       if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
48915f7ddb14SDimitry Andric            getTreeEntry(Inst) || (L && (L->contains(Inst)))) &&
48925f7ddb14SDimitry Andric           PostponedIndices.insert(I).second)
48935f7ddb14SDimitry Andric         PostponedInsts.emplace_back(Inst, I);
48945f7ddb14SDimitry Andric   }
48955f7ddb14SDimitry Andric 
48965f7ddb14SDimitry Andric   auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) {
48975f7ddb14SDimitry Andric     Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos));
48985f7ddb14SDimitry Andric     auto *InsElt = dyn_cast<InsertElementInst>(Vec);
48995f7ddb14SDimitry Andric     if (!InsElt)
49005f7ddb14SDimitry Andric       return Vec;
49015f7ddb14SDimitry Andric     GatherSeq.insert(InsElt);
49025f7ddb14SDimitry Andric     CSEBlocks.insert(InsElt->getParent());
49035f7ddb14SDimitry Andric     // Add to our 'need-to-extract' list.
49045f7ddb14SDimitry Andric     if (TreeEntry *Entry = getTreeEntry(V)) {
49055f7ddb14SDimitry Andric       // Find which lane we need to extract.
49065f7ddb14SDimitry Andric       unsigned FoundLane = Entry->findLaneForValue(V);
49075f7ddb14SDimitry Andric       ExternalUses.emplace_back(V, InsElt, FoundLane);
49085f7ddb14SDimitry Andric     }
49095f7ddb14SDimitry Andric     return Vec;
49105f7ddb14SDimitry Andric   };
4911af732203SDimitry Andric   Value *Val0 =
4912af732203SDimitry Andric       isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
4913af732203SDimitry Andric   FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
4914af732203SDimitry Andric   Value *Vec = PoisonValue::get(VecTy);
49155f7ddb14SDimitry Andric   SmallVector<int> NonConsts;
49165f7ddb14SDimitry Andric   // Insert constant values at first.
49175f7ddb14SDimitry Andric   for (int I = 0, E = VL.size(); I < E; ++I) {
49185f7ddb14SDimitry Andric     if (PostponedIndices.contains(I))
4919af732203SDimitry Andric       continue;
49205f7ddb14SDimitry Andric     if (!isConstant(VL[I])) {
49215f7ddb14SDimitry Andric       NonConsts.push_back(I);
49225f7ddb14SDimitry Andric       continue;
49230b57cec5SDimitry Andric     }
49245f7ddb14SDimitry Andric     Vec = CreateInsertElement(Vec, VL[I], I);
49250b57cec5SDimitry Andric   }
49265f7ddb14SDimitry Andric   // Insert non-constant values.
49275f7ddb14SDimitry Andric   for (int I : NonConsts)
49285f7ddb14SDimitry Andric     Vec = CreateInsertElement(Vec, VL[I], I);
49295f7ddb14SDimitry Andric   // Append instructions, which are/may be part of the loop, in the end to make
49305f7ddb14SDimitry Andric   // it possible to hoist non-loop-based instructions.
49315f7ddb14SDimitry Andric   for (const std::pair<Value *, unsigned> &Pair : PostponedInsts)
49325f7ddb14SDimitry Andric     Vec = CreateInsertElement(Vec, Pair.first, Pair.second);
49330b57cec5SDimitry Andric 
49340b57cec5SDimitry Andric   return Vec;
49350b57cec5SDimitry Andric }
49360b57cec5SDimitry Andric 
49375f7ddb14SDimitry Andric namespace {
49385f7ddb14SDimitry Andric /// Merges shuffle masks and emits final shuffle instruction, if required.
49395f7ddb14SDimitry Andric class ShuffleInstructionBuilder {
49405f7ddb14SDimitry Andric   IRBuilderBase &Builder;
49415f7ddb14SDimitry Andric   const unsigned VF = 0;
49425f7ddb14SDimitry Andric   bool IsFinalized = false;
49435f7ddb14SDimitry Andric   SmallVector<int, 4> Mask;
49445f7ddb14SDimitry Andric 
49455f7ddb14SDimitry Andric public:
ShuffleInstructionBuilder(IRBuilderBase & Builder,unsigned VF)49465f7ddb14SDimitry Andric   ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF)
49475f7ddb14SDimitry Andric       : Builder(Builder), VF(VF) {}
49485f7ddb14SDimitry Andric 
49495f7ddb14SDimitry Andric   /// Adds a mask, inverting it before applying.
addInversedMask(ArrayRef<unsigned> SubMask)49505f7ddb14SDimitry Andric   void addInversedMask(ArrayRef<unsigned> SubMask) {
49515f7ddb14SDimitry Andric     if (SubMask.empty())
49525f7ddb14SDimitry Andric       return;
49535f7ddb14SDimitry Andric     SmallVector<int, 4> NewMask;
49545f7ddb14SDimitry Andric     inversePermutation(SubMask, NewMask);
49555f7ddb14SDimitry Andric     addMask(NewMask);
49565f7ddb14SDimitry Andric   }
49575f7ddb14SDimitry Andric 
49585f7ddb14SDimitry Andric   /// Functions adds masks, merging them into  single one.
addMask(ArrayRef<unsigned> SubMask)49595f7ddb14SDimitry Andric   void addMask(ArrayRef<unsigned> SubMask) {
49605f7ddb14SDimitry Andric     SmallVector<int, 4> NewMask(SubMask.begin(), SubMask.end());
49615f7ddb14SDimitry Andric     addMask(NewMask);
49625f7ddb14SDimitry Andric   }
49635f7ddb14SDimitry Andric 
addMask(ArrayRef<int> SubMask)49645f7ddb14SDimitry Andric   void addMask(ArrayRef<int> SubMask) { ::addMask(Mask, SubMask); }
49655f7ddb14SDimitry Andric 
finalize(Value * V)49665f7ddb14SDimitry Andric   Value *finalize(Value *V) {
49675f7ddb14SDimitry Andric     IsFinalized = true;
49685f7ddb14SDimitry Andric     unsigned ValueVF = cast<FixedVectorType>(V->getType())->getNumElements();
49695f7ddb14SDimitry Andric     if (VF == ValueVF && Mask.empty())
49705f7ddb14SDimitry Andric       return V;
49715f7ddb14SDimitry Andric     SmallVector<int, 4> NormalizedMask(VF, UndefMaskElem);
49725f7ddb14SDimitry Andric     std::iota(NormalizedMask.begin(), NormalizedMask.end(), 0);
49735f7ddb14SDimitry Andric     addMask(NormalizedMask);
49745f7ddb14SDimitry Andric 
49755f7ddb14SDimitry Andric     if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask))
49765f7ddb14SDimitry Andric       return V;
49775f7ddb14SDimitry Andric     return Builder.CreateShuffleVector(V, Mask, "shuffle");
49785f7ddb14SDimitry Andric   }
49795f7ddb14SDimitry Andric 
~ShuffleInstructionBuilder()49805f7ddb14SDimitry Andric   ~ShuffleInstructionBuilder() {
49815f7ddb14SDimitry Andric     assert((IsFinalized || Mask.empty()) &&
49825f7ddb14SDimitry Andric            "Shuffle construction must be finalized.");
49835f7ddb14SDimitry Andric   }
49845f7ddb14SDimitry Andric };
49855f7ddb14SDimitry Andric } // namespace
49865f7ddb14SDimitry Andric 
vectorizeTree(ArrayRef<Value * > VL)49870b57cec5SDimitry Andric Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
49885f7ddb14SDimitry Andric   unsigned VF = VL.size();
49890b57cec5SDimitry Andric   InstructionsState S = getSameOpcode(VL);
49900b57cec5SDimitry Andric   if (S.getOpcode()) {
49915f7ddb14SDimitry Andric     if (TreeEntry *E = getTreeEntry(S.OpValue))
49920b57cec5SDimitry Andric       if (E->isSame(VL)) {
49930b57cec5SDimitry Andric         Value *V = vectorizeTree(E);
49945f7ddb14SDimitry Andric         if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
49955f7ddb14SDimitry Andric           if (!E->ReuseShuffleIndices.empty()) {
49960b57cec5SDimitry Andric             // Reshuffle to get only unique values.
49975f7ddb14SDimitry Andric             // If some of the scalars are duplicated in the vectorization tree
49985f7ddb14SDimitry Andric             // entry, we do not vectorize them but instead generate a mask for
49995f7ddb14SDimitry Andric             // the reuses. But if there are several users of the same entry,
50005f7ddb14SDimitry Andric             // they may have different vectorization factors. This is especially
50015f7ddb14SDimitry Andric             // important for PHI nodes. In this case, we need to adapt the
50025f7ddb14SDimitry Andric             // resulting instruction for the user vectorization factor and have
50035f7ddb14SDimitry Andric             // to reshuffle it again to take only unique elements of the vector.
50045f7ddb14SDimitry Andric             // Without this code the function incorrectly returns reduced vector
50055f7ddb14SDimitry Andric             // instruction with the same elements, not with the unique ones.
50065f7ddb14SDimitry Andric 
50075f7ddb14SDimitry Andric             // block:
50085f7ddb14SDimitry Andric             // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
50095f7ddb14SDimitry Andric             // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
50105f7ddb14SDimitry Andric             // ... (use %2)
50115f7ddb14SDimitry Andric             // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
50125f7ddb14SDimitry Andric             // br %block
50135f7ddb14SDimitry Andric             SmallVector<int> UniqueIdxs;
50145ffd83dbSDimitry Andric             SmallSet<int, 4> UsedIdxs;
50155f7ddb14SDimitry Andric             int Pos = 0;
50165f7ddb14SDimitry Andric             int Sz = VL.size();
50175f7ddb14SDimitry Andric             for (int Idx : E->ReuseShuffleIndices) {
50185f7ddb14SDimitry Andric               if (Idx != Sz && UsedIdxs.insert(Idx).second)
50195f7ddb14SDimitry Andric                 UniqueIdxs.emplace_back(Pos);
50205f7ddb14SDimitry Andric               ++Pos;
50215f7ddb14SDimitry Andric             }
50225f7ddb14SDimitry Andric             assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
50235f7ddb14SDimitry Andric                                             "less than original vector size.");
50245f7ddb14SDimitry Andric             UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
50255f7ddb14SDimitry Andric             V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
50265f7ddb14SDimitry Andric           } else {
50275f7ddb14SDimitry Andric             assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
50285f7ddb14SDimitry Andric                    "Expected vectorization factor less "
50295f7ddb14SDimitry Andric                    "than original vector size.");
50305f7ddb14SDimitry Andric             SmallVector<int> UniformMask(VF, 0);
50315f7ddb14SDimitry Andric             std::iota(UniformMask.begin(), UniformMask.end(), 0);
50325f7ddb14SDimitry Andric             V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
50330b57cec5SDimitry Andric           }
50340b57cec5SDimitry Andric         }
50350b57cec5SDimitry Andric         return V;
50360b57cec5SDimitry Andric       }
50370b57cec5SDimitry Andric   }
50380b57cec5SDimitry Andric 
50390b57cec5SDimitry Andric   // Check that every instruction appears once in this bundle.
50405f7ddb14SDimitry Andric   SmallVector<int> ReuseShuffleIndicies;
50415f7ddb14SDimitry Andric   SmallVector<Value *> UniqueValues;
50420b57cec5SDimitry Andric   if (VL.size() > 2) {
50430b57cec5SDimitry Andric     DenseMap<Value *, unsigned> UniquePositions;
50445f7ddb14SDimitry Andric     unsigned NumValues =
50455f7ddb14SDimitry Andric         std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) {
50465f7ddb14SDimitry Andric                                     return !isa<UndefValue>(V);
50475f7ddb14SDimitry Andric                                   }).base());
50485f7ddb14SDimitry Andric     VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
50495f7ddb14SDimitry Andric     int UniqueVals = 0;
50505f7ddb14SDimitry Andric     bool HasUndefs = false;
50515f7ddb14SDimitry Andric     for (Value *V : VL.drop_back(VL.size() - VF)) {
50525f7ddb14SDimitry Andric       if (isa<UndefValue>(V)) {
50535f7ddb14SDimitry Andric         ReuseShuffleIndicies.emplace_back(UndefMaskElem);
50545f7ddb14SDimitry Andric         HasUndefs = true;
50555f7ddb14SDimitry Andric         continue;
50565f7ddb14SDimitry Andric       }
50575f7ddb14SDimitry Andric       if (isConstant(V)) {
50585f7ddb14SDimitry Andric         ReuseShuffleIndicies.emplace_back(UniqueValues.size());
50595f7ddb14SDimitry Andric         UniqueValues.emplace_back(V);
50605f7ddb14SDimitry Andric         continue;
50615f7ddb14SDimitry Andric       }
50620b57cec5SDimitry Andric       auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
50630b57cec5SDimitry Andric       ReuseShuffleIndicies.emplace_back(Res.first->second);
50645f7ddb14SDimitry Andric       if (Res.second) {
50650b57cec5SDimitry Andric         UniqueValues.emplace_back(V);
50665f7ddb14SDimitry Andric         ++UniqueVals;
50670b57cec5SDimitry Andric       }
50685f7ddb14SDimitry Andric     }
50695f7ddb14SDimitry Andric     if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) {
50705f7ddb14SDimitry Andric       // Emit pure splat vector.
50715f7ddb14SDimitry Andric       // FIXME: why it is not identified as an identity.
50725f7ddb14SDimitry Andric       unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem);
50735f7ddb14SDimitry Andric       if (NumUndefs == ReuseShuffleIndicies.size() - 1)
50745f7ddb14SDimitry Andric         ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
50755f7ddb14SDimitry Andric                                     UndefMaskElem);
50760b57cec5SDimitry Andric       else
50775f7ddb14SDimitry Andric         ReuseShuffleIndicies.assign(VF, 0);
50785f7ddb14SDimitry Andric     } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
50795f7ddb14SDimitry Andric       ReuseShuffleIndicies.clear();
50805f7ddb14SDimitry Andric       UniqueValues.clear();
50815f7ddb14SDimitry Andric       UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
50825f7ddb14SDimitry Andric     }
50835f7ddb14SDimitry Andric     UniqueValues.append(VF - UniqueValues.size(),
50845f7ddb14SDimitry Andric                         PoisonValue::get(VL[0]->getType()));
50850b57cec5SDimitry Andric     VL = UniqueValues;
50860b57cec5SDimitry Andric   }
50870b57cec5SDimitry Andric 
50885f7ddb14SDimitry Andric   ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
5089af732203SDimitry Andric   Value *Vec = gather(VL);
50900b57cec5SDimitry Andric   if (!ReuseShuffleIndicies.empty()) {
50915f7ddb14SDimitry Andric     ShuffleBuilder.addMask(ReuseShuffleIndicies);
50925f7ddb14SDimitry Andric     Vec = ShuffleBuilder.finalize(Vec);
5093af732203SDimitry Andric     if (auto *I = dyn_cast<Instruction>(Vec)) {
50940b57cec5SDimitry Andric       GatherSeq.insert(I);
50950b57cec5SDimitry Andric       CSEBlocks.insert(I->getParent());
50960b57cec5SDimitry Andric     }
50970b57cec5SDimitry Andric   }
5098af732203SDimitry Andric   return Vec;
50990b57cec5SDimitry Andric }
51000b57cec5SDimitry Andric 
vectorizeTree(TreeEntry * E)51010b57cec5SDimitry Andric Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
51020b57cec5SDimitry Andric   IRBuilder<>::InsertPointGuard Guard(Builder);
51030b57cec5SDimitry Andric 
51040b57cec5SDimitry Andric   if (E->VectorizedValue) {
51050b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
51060b57cec5SDimitry Andric     return E->VectorizedValue;
51070b57cec5SDimitry Andric   }
51080b57cec5SDimitry Andric 
51090b57cec5SDimitry Andric   bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
51105f7ddb14SDimitry Andric   unsigned VF = E->Scalars.size();
51115f7ddb14SDimitry Andric   if (NeedToShuffleReuses)
51125f7ddb14SDimitry Andric     VF = E->ReuseShuffleIndices.size();
51135f7ddb14SDimitry Andric   ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
5114480093f4SDimitry Andric   if (E->State == TreeEntry::NeedToGather) {
51158bcb0991SDimitry Andric     setInsertPointAfterBundle(E);
51165f7ddb14SDimitry Andric     Value *Vec;
51175f7ddb14SDimitry Andric     SmallVector<int> Mask;
51185f7ddb14SDimitry Andric     SmallVector<const TreeEntry *> Entries;
51195f7ddb14SDimitry Andric     Optional<TargetTransformInfo::ShuffleKind> Shuffle =
51205f7ddb14SDimitry Andric         isGatherShuffledEntry(E, Mask, Entries);
51215f7ddb14SDimitry Andric     if (Shuffle.hasValue()) {
51225f7ddb14SDimitry Andric       assert((Entries.size() == 1 || Entries.size() == 2) &&
51235f7ddb14SDimitry Andric              "Expected shuffle of 1 or 2 entries.");
51245f7ddb14SDimitry Andric       Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
51255f7ddb14SDimitry Andric                                         Entries.back()->VectorizedValue, Mask);
51265f7ddb14SDimitry Andric     } else {
51275f7ddb14SDimitry Andric       Vec = gather(E->Scalars);
51285f7ddb14SDimitry Andric     }
51290b57cec5SDimitry Andric     if (NeedToShuffleReuses) {
51305f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
51315f7ddb14SDimitry Andric       Vec = ShuffleBuilder.finalize(Vec);
5132af732203SDimitry Andric       if (auto *I = dyn_cast<Instruction>(Vec)) {
51330b57cec5SDimitry Andric         GatherSeq.insert(I);
51340b57cec5SDimitry Andric         CSEBlocks.insert(I->getParent());
51350b57cec5SDimitry Andric       }
51360b57cec5SDimitry Andric     }
5137af732203SDimitry Andric     E->VectorizedValue = Vec;
5138af732203SDimitry Andric     return Vec;
51390b57cec5SDimitry Andric   }
51400b57cec5SDimitry Andric 
5141af732203SDimitry Andric   assert((E->State == TreeEntry::Vectorize ||
5142af732203SDimitry Andric           E->State == TreeEntry::ScatterVectorize) &&
5143af732203SDimitry Andric          "Unhandled state");
51448bcb0991SDimitry Andric   unsigned ShuffleOrOp =
51458bcb0991SDimitry Andric       E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
5146af732203SDimitry Andric   Instruction *VL0 = E->getMainOp();
5147af732203SDimitry Andric   Type *ScalarTy = VL0->getType();
5148af732203SDimitry Andric   if (auto *Store = dyn_cast<StoreInst>(VL0))
5149af732203SDimitry Andric     ScalarTy = Store->getValueOperand()->getType();
51505f7ddb14SDimitry Andric   else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
51515f7ddb14SDimitry Andric     ScalarTy = IE->getOperand(1)->getType();
5152af732203SDimitry Andric   auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
51530b57cec5SDimitry Andric   switch (ShuffleOrOp) {
51540b57cec5SDimitry Andric     case Instruction::PHI: {
51558bcb0991SDimitry Andric       auto *PH = cast<PHINode>(VL0);
51560b57cec5SDimitry Andric       Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
51570b57cec5SDimitry Andric       Builder.SetCurrentDebugLocation(PH->getDebugLoc());
51580b57cec5SDimitry Andric       PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
51590b57cec5SDimitry Andric       Value *V = NewPhi;
5160af732203SDimitry Andric       if (NeedToShuffleReuses)
5161af732203SDimitry Andric         V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
5162af732203SDimitry Andric 
51630b57cec5SDimitry Andric       E->VectorizedValue = V;
51640b57cec5SDimitry Andric 
51650b57cec5SDimitry Andric       // PHINodes may have multiple entries from the same block. We want to
51660b57cec5SDimitry Andric       // visit every block once.
51670b57cec5SDimitry Andric       SmallPtrSet<BasicBlock*, 4> VisitedBBs;
51680b57cec5SDimitry Andric 
51690b57cec5SDimitry Andric       for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
51700b57cec5SDimitry Andric         ValueList Operands;
51710b57cec5SDimitry Andric         BasicBlock *IBB = PH->getIncomingBlock(i);
51720b57cec5SDimitry Andric 
51730b57cec5SDimitry Andric         if (!VisitedBBs.insert(IBB).second) {
51740b57cec5SDimitry Andric           NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
51750b57cec5SDimitry Andric           continue;
51760b57cec5SDimitry Andric         }
51770b57cec5SDimitry Andric 
51780b57cec5SDimitry Andric         Builder.SetInsertPoint(IBB->getTerminator());
51790b57cec5SDimitry Andric         Builder.SetCurrentDebugLocation(PH->getDebugLoc());
51800b57cec5SDimitry Andric         Value *Vec = vectorizeTree(E->getOperand(i));
51810b57cec5SDimitry Andric         NewPhi->addIncoming(Vec, IBB);
51820b57cec5SDimitry Andric       }
51830b57cec5SDimitry Andric 
51840b57cec5SDimitry Andric       assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
51850b57cec5SDimitry Andric              "Invalid number of incoming values");
51860b57cec5SDimitry Andric       return V;
51870b57cec5SDimitry Andric     }
51880b57cec5SDimitry Andric 
51890b57cec5SDimitry Andric     case Instruction::ExtractElement: {
51900b57cec5SDimitry Andric       Value *V = E->getSingleOperand(0);
51910b57cec5SDimitry Andric       Builder.SetInsertPoint(VL0);
51925f7ddb14SDimitry Andric       ShuffleBuilder.addInversedMask(E->ReorderIndices);
51935f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
51945f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
51950b57cec5SDimitry Andric       E->VectorizedValue = V;
51960b57cec5SDimitry Andric       return V;
51970b57cec5SDimitry Andric     }
51980b57cec5SDimitry Andric     case Instruction::ExtractValue: {
5199af732203SDimitry Andric       auto *LI = cast<LoadInst>(E->getSingleOperand(0));
52000b57cec5SDimitry Andric       Builder.SetInsertPoint(LI);
5201af732203SDimitry Andric       auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
52020b57cec5SDimitry Andric       Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
52035ffd83dbSDimitry Andric       LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
52040b57cec5SDimitry Andric       Value *NewV = propagateMetadata(V, E->Scalars);
52055f7ddb14SDimitry Andric       ShuffleBuilder.addInversedMask(E->ReorderIndices);
52065f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
52075f7ddb14SDimitry Andric       NewV = ShuffleBuilder.finalize(NewV);
52080b57cec5SDimitry Andric       E->VectorizedValue = NewV;
52090b57cec5SDimitry Andric       return NewV;
52100b57cec5SDimitry Andric     }
52115f7ddb14SDimitry Andric     case Instruction::InsertElement: {
52125f7ddb14SDimitry Andric       Builder.SetInsertPoint(VL0);
52135f7ddb14SDimitry Andric       Value *V = vectorizeTree(E->getOperand(1));
52145f7ddb14SDimitry Andric 
52155f7ddb14SDimitry Andric       const unsigned NumElts =
52165f7ddb14SDimitry Andric           cast<FixedVectorType>(VL0->getType())->getNumElements();
52175f7ddb14SDimitry Andric       const unsigned NumScalars = E->Scalars.size();
52185f7ddb14SDimitry Andric 
52195f7ddb14SDimitry Andric       // Create InsertVector shuffle if necessary
52205f7ddb14SDimitry Andric       Instruction *FirstInsert = nullptr;
52215f7ddb14SDimitry Andric       bool IsIdentity = true;
52225f7ddb14SDimitry Andric       unsigned Offset = UINT_MAX;
52235f7ddb14SDimitry Andric       for (unsigned I = 0; I < NumScalars; ++I) {
52245f7ddb14SDimitry Andric         Value *Scalar = E->Scalars[I];
52255f7ddb14SDimitry Andric         if (!FirstInsert &&
52265f7ddb14SDimitry Andric             !is_contained(E->Scalars, cast<Instruction>(Scalar)->getOperand(0)))
52275f7ddb14SDimitry Andric           FirstInsert = cast<Instruction>(Scalar);
52285f7ddb14SDimitry Andric         Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
52295f7ddb14SDimitry Andric         if (!InsertIdx || *InsertIdx == UndefMaskElem)
52305f7ddb14SDimitry Andric           continue;
52315f7ddb14SDimitry Andric         unsigned Idx = *InsertIdx;
52325f7ddb14SDimitry Andric         if (Idx < Offset) {
52335f7ddb14SDimitry Andric           Offset = Idx;
52345f7ddb14SDimitry Andric           IsIdentity &= I == 0;
52355f7ddb14SDimitry Andric         } else {
52365f7ddb14SDimitry Andric           assert(Idx >= Offset && "Failed to find vector index offset");
52375f7ddb14SDimitry Andric           IsIdentity &= Idx - Offset == I;
52385f7ddb14SDimitry Andric         }
52395f7ddb14SDimitry Andric       }
52405f7ddb14SDimitry Andric       assert(Offset < NumElts && "Failed to find vector index offset");
52415f7ddb14SDimitry Andric 
52425f7ddb14SDimitry Andric       // Create shuffle to resize vector
52435f7ddb14SDimitry Andric       SmallVector<int> Mask(NumElts, UndefMaskElem);
52445f7ddb14SDimitry Andric       if (!IsIdentity) {
52455f7ddb14SDimitry Andric         for (unsigned I = 0; I < NumScalars; ++I) {
52465f7ddb14SDimitry Andric           Value *Scalar = E->Scalars[I];
52475f7ddb14SDimitry Andric           Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
52485f7ddb14SDimitry Andric           if (!InsertIdx || *InsertIdx == UndefMaskElem)
52495f7ddb14SDimitry Andric             continue;
52505f7ddb14SDimitry Andric           Mask[*InsertIdx - Offset] = I;
52515f7ddb14SDimitry Andric         }
52525f7ddb14SDimitry Andric       } else {
52535f7ddb14SDimitry Andric         std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
52545f7ddb14SDimitry Andric       }
52555f7ddb14SDimitry Andric       if (!IsIdentity || NumElts != NumScalars)
52565f7ddb14SDimitry Andric         V = Builder.CreateShuffleVector(V, Mask);
52575f7ddb14SDimitry Andric 
52585f7ddb14SDimitry Andric       if (NumElts != NumScalars) {
52595f7ddb14SDimitry Andric         SmallVector<int> InsertMask(NumElts);
52605f7ddb14SDimitry Andric         std::iota(InsertMask.begin(), InsertMask.end(), 0);
52615f7ddb14SDimitry Andric         for (unsigned I = 0; I < NumElts; I++) {
52625f7ddb14SDimitry Andric           if (Mask[I] != UndefMaskElem)
52635f7ddb14SDimitry Andric             InsertMask[Offset + I] = NumElts + I;
52645f7ddb14SDimitry Andric         }
52655f7ddb14SDimitry Andric 
52665f7ddb14SDimitry Andric         V = Builder.CreateShuffleVector(
52675f7ddb14SDimitry Andric             FirstInsert->getOperand(0), V, InsertMask,
52685f7ddb14SDimitry Andric             cast<Instruction>(E->Scalars.back())->getName());
52695f7ddb14SDimitry Andric       }
52705f7ddb14SDimitry Andric 
52715f7ddb14SDimitry Andric       ++NumVectorInstructions;
52725f7ddb14SDimitry Andric       E->VectorizedValue = V;
52735f7ddb14SDimitry Andric       return V;
52745f7ddb14SDimitry Andric     }
52750b57cec5SDimitry Andric     case Instruction::ZExt:
52760b57cec5SDimitry Andric     case Instruction::SExt:
52770b57cec5SDimitry Andric     case Instruction::FPToUI:
52780b57cec5SDimitry Andric     case Instruction::FPToSI:
52790b57cec5SDimitry Andric     case Instruction::FPExt:
52800b57cec5SDimitry Andric     case Instruction::PtrToInt:
52810b57cec5SDimitry Andric     case Instruction::IntToPtr:
52820b57cec5SDimitry Andric     case Instruction::SIToFP:
52830b57cec5SDimitry Andric     case Instruction::UIToFP:
52840b57cec5SDimitry Andric     case Instruction::Trunc:
52850b57cec5SDimitry Andric     case Instruction::FPTrunc:
52860b57cec5SDimitry Andric     case Instruction::BitCast: {
52878bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
52880b57cec5SDimitry Andric 
52890b57cec5SDimitry Andric       Value *InVec = vectorizeTree(E->getOperand(0));
52900b57cec5SDimitry Andric 
52910b57cec5SDimitry Andric       if (E->VectorizedValue) {
52920b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
52930b57cec5SDimitry Andric         return E->VectorizedValue;
52940b57cec5SDimitry Andric       }
52950b57cec5SDimitry Andric 
52968bcb0991SDimitry Andric       auto *CI = cast<CastInst>(VL0);
52970b57cec5SDimitry Andric       Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
52985f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
52995f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5300af732203SDimitry Andric 
53010b57cec5SDimitry Andric       E->VectorizedValue = V;
53020b57cec5SDimitry Andric       ++NumVectorInstructions;
53030b57cec5SDimitry Andric       return V;
53040b57cec5SDimitry Andric     }
53050b57cec5SDimitry Andric     case Instruction::FCmp:
53060b57cec5SDimitry Andric     case Instruction::ICmp: {
53078bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
53080b57cec5SDimitry Andric 
53090b57cec5SDimitry Andric       Value *L = vectorizeTree(E->getOperand(0));
53100b57cec5SDimitry Andric       Value *R = vectorizeTree(E->getOperand(1));
53110b57cec5SDimitry Andric 
53120b57cec5SDimitry Andric       if (E->VectorizedValue) {
53130b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53140b57cec5SDimitry Andric         return E->VectorizedValue;
53150b57cec5SDimitry Andric       }
53160b57cec5SDimitry Andric 
53170b57cec5SDimitry Andric       CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
53185ffd83dbSDimitry Andric       Value *V = Builder.CreateCmp(P0, L, R);
53190b57cec5SDimitry Andric       propagateIRFlags(V, E->Scalars, VL0);
53205f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
53215f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5322af732203SDimitry Andric 
53230b57cec5SDimitry Andric       E->VectorizedValue = V;
53240b57cec5SDimitry Andric       ++NumVectorInstructions;
53250b57cec5SDimitry Andric       return V;
53260b57cec5SDimitry Andric     }
53270b57cec5SDimitry Andric     case Instruction::Select: {
53288bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
53290b57cec5SDimitry Andric 
53300b57cec5SDimitry Andric       Value *Cond = vectorizeTree(E->getOperand(0));
53310b57cec5SDimitry Andric       Value *True = vectorizeTree(E->getOperand(1));
53320b57cec5SDimitry Andric       Value *False = vectorizeTree(E->getOperand(2));
53330b57cec5SDimitry Andric 
53340b57cec5SDimitry Andric       if (E->VectorizedValue) {
53350b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53360b57cec5SDimitry Andric         return E->VectorizedValue;
53370b57cec5SDimitry Andric       }
53380b57cec5SDimitry Andric 
53390b57cec5SDimitry Andric       Value *V = Builder.CreateSelect(Cond, True, False);
53405f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
53415f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5342af732203SDimitry Andric 
53430b57cec5SDimitry Andric       E->VectorizedValue = V;
53440b57cec5SDimitry Andric       ++NumVectorInstructions;
53450b57cec5SDimitry Andric       return V;
53460b57cec5SDimitry Andric     }
53470b57cec5SDimitry Andric     case Instruction::FNeg: {
53488bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
53490b57cec5SDimitry Andric 
53500b57cec5SDimitry Andric       Value *Op = vectorizeTree(E->getOperand(0));
53510b57cec5SDimitry Andric 
53520b57cec5SDimitry Andric       if (E->VectorizedValue) {
53530b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53540b57cec5SDimitry Andric         return E->VectorizedValue;
53550b57cec5SDimitry Andric       }
53560b57cec5SDimitry Andric 
53570b57cec5SDimitry Andric       Value *V = Builder.CreateUnOp(
53588bcb0991SDimitry Andric           static_cast<Instruction::UnaryOps>(E->getOpcode()), Op);
53590b57cec5SDimitry Andric       propagateIRFlags(V, E->Scalars, VL0);
53600b57cec5SDimitry Andric       if (auto *I = dyn_cast<Instruction>(V))
53610b57cec5SDimitry Andric         V = propagateMetadata(I, E->Scalars);
53620b57cec5SDimitry Andric 
53635f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
53645f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5365af732203SDimitry Andric 
53660b57cec5SDimitry Andric       E->VectorizedValue = V;
53670b57cec5SDimitry Andric       ++NumVectorInstructions;
53680b57cec5SDimitry Andric 
53690b57cec5SDimitry Andric       return V;
53700b57cec5SDimitry Andric     }
53710b57cec5SDimitry Andric     case Instruction::Add:
53720b57cec5SDimitry Andric     case Instruction::FAdd:
53730b57cec5SDimitry Andric     case Instruction::Sub:
53740b57cec5SDimitry Andric     case Instruction::FSub:
53750b57cec5SDimitry Andric     case Instruction::Mul:
53760b57cec5SDimitry Andric     case Instruction::FMul:
53770b57cec5SDimitry Andric     case Instruction::UDiv:
53780b57cec5SDimitry Andric     case Instruction::SDiv:
53790b57cec5SDimitry Andric     case Instruction::FDiv:
53800b57cec5SDimitry Andric     case Instruction::URem:
53810b57cec5SDimitry Andric     case Instruction::SRem:
53820b57cec5SDimitry Andric     case Instruction::FRem:
53830b57cec5SDimitry Andric     case Instruction::Shl:
53840b57cec5SDimitry Andric     case Instruction::LShr:
53850b57cec5SDimitry Andric     case Instruction::AShr:
53860b57cec5SDimitry Andric     case Instruction::And:
53870b57cec5SDimitry Andric     case Instruction::Or:
53880b57cec5SDimitry Andric     case Instruction::Xor: {
53898bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
53900b57cec5SDimitry Andric 
53910b57cec5SDimitry Andric       Value *LHS = vectorizeTree(E->getOperand(0));
53920b57cec5SDimitry Andric       Value *RHS = vectorizeTree(E->getOperand(1));
53930b57cec5SDimitry Andric 
53940b57cec5SDimitry Andric       if (E->VectorizedValue) {
53950b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53960b57cec5SDimitry Andric         return E->VectorizedValue;
53970b57cec5SDimitry Andric       }
53980b57cec5SDimitry Andric 
53990b57cec5SDimitry Andric       Value *V = Builder.CreateBinOp(
54008bcb0991SDimitry Andric           static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
54018bcb0991SDimitry Andric           RHS);
54020b57cec5SDimitry Andric       propagateIRFlags(V, E->Scalars, VL0);
54030b57cec5SDimitry Andric       if (auto *I = dyn_cast<Instruction>(V))
54040b57cec5SDimitry Andric         V = propagateMetadata(I, E->Scalars);
54050b57cec5SDimitry Andric 
54065f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
54075f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5408af732203SDimitry Andric 
54090b57cec5SDimitry Andric       E->VectorizedValue = V;
54100b57cec5SDimitry Andric       ++NumVectorInstructions;
54110b57cec5SDimitry Andric 
54120b57cec5SDimitry Andric       return V;
54130b57cec5SDimitry Andric     }
54140b57cec5SDimitry Andric     case Instruction::Load: {
54150b57cec5SDimitry Andric       // Loads are inserted at the head of the tree because we don't want to
54160b57cec5SDimitry Andric       // sink them all the way down past store instructions.
54178bcb0991SDimitry Andric       bool IsReorder = E->updateStateIfReorder();
54188bcb0991SDimitry Andric       if (IsReorder)
54198bcb0991SDimitry Andric         VL0 = E->getMainOp();
54208bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
54210b57cec5SDimitry Andric 
54220b57cec5SDimitry Andric       LoadInst *LI = cast<LoadInst>(VL0);
5423af732203SDimitry Andric       Instruction *NewLI;
54240b57cec5SDimitry Andric       unsigned AS = LI->getPointerAddressSpace();
54250b57cec5SDimitry Andric       Value *PO = LI->getPointerOperand();
5426af732203SDimitry Andric       if (E->State == TreeEntry::Vectorize) {
54270b57cec5SDimitry Andric 
5428af732203SDimitry Andric         Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
5429af732203SDimitry Andric 
5430af732203SDimitry Andric         // The pointer operand uses an in-tree scalar so we add the new BitCast
5431af732203SDimitry Andric         // to ExternalUses list to make sure that an extract will be generated
5432af732203SDimitry Andric         // in the future.
5433*1aaf10a9SDimitry Andric         if (TreeEntry *Entry = getTreeEntry(PO)) {
5434*1aaf10a9SDimitry Andric           // Find which lane we need to extract.
5435*1aaf10a9SDimitry Andric           unsigned FoundLane = Entry->findLaneForValue(PO);
5436*1aaf10a9SDimitry Andric           ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
5437*1aaf10a9SDimitry Andric         }
5438af732203SDimitry Andric 
5439af732203SDimitry Andric         NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
5440af732203SDimitry Andric       } else {
5441af732203SDimitry Andric         assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
5442af732203SDimitry Andric         Value *VecPtr = vectorizeTree(E->getOperand(0));
5443af732203SDimitry Andric         // Use the minimum alignment of the gathered loads.
5444af732203SDimitry Andric         Align CommonAlignment = LI->getAlign();
5445af732203SDimitry Andric         for (Value *V : E->Scalars)
5446af732203SDimitry Andric           CommonAlignment =
5447af732203SDimitry Andric               commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
54485f7ddb14SDimitry Andric         NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
5449af732203SDimitry Andric       }
5450af732203SDimitry Andric       Value *V = propagateMetadata(NewLI, E->Scalars);
5451af732203SDimitry Andric 
54525f7ddb14SDimitry Andric       ShuffleBuilder.addInversedMask(E->ReorderIndices);
54535f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
54545f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
54550b57cec5SDimitry Andric       E->VectorizedValue = V;
54560b57cec5SDimitry Andric       ++NumVectorInstructions;
54570b57cec5SDimitry Andric       return V;
54580b57cec5SDimitry Andric     }
54590b57cec5SDimitry Andric     case Instruction::Store: {
5460480093f4SDimitry Andric       bool IsReorder = !E->ReorderIndices.empty();
5461480093f4SDimitry Andric       auto *SI = cast<StoreInst>(
5462480093f4SDimitry Andric           IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
54630b57cec5SDimitry Andric       unsigned AS = SI->getPointerAddressSpace();
54640b57cec5SDimitry Andric 
54658bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
54660b57cec5SDimitry Andric 
54670b57cec5SDimitry Andric       Value *VecValue = vectorizeTree(E->getOperand(0));
54685f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReorderIndices);
54695f7ddb14SDimitry Andric       VecValue = ShuffleBuilder.finalize(VecValue);
54705f7ddb14SDimitry Andric 
54710b57cec5SDimitry Andric       Value *ScalarPtr = SI->getPointerOperand();
5472480093f4SDimitry Andric       Value *VecPtr = Builder.CreateBitCast(
5473480093f4SDimitry Andric           ScalarPtr, VecValue->getType()->getPointerTo(AS));
54745ffd83dbSDimitry Andric       StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr,
54755ffd83dbSDimitry Andric                                                  SI->getAlign());
54760b57cec5SDimitry Andric 
54770b57cec5SDimitry Andric       // The pointer operand uses an in-tree scalar, so add the new BitCast to
54780b57cec5SDimitry Andric       // ExternalUses to make sure that an extract will be generated in the
54790b57cec5SDimitry Andric       // future.
5480*1aaf10a9SDimitry Andric       if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
5481*1aaf10a9SDimitry Andric         // Find which lane we need to extract.
5482*1aaf10a9SDimitry Andric         unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
5483*1aaf10a9SDimitry Andric         ExternalUses.push_back(
5484*1aaf10a9SDimitry Andric             ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
5485*1aaf10a9SDimitry Andric       }
54860b57cec5SDimitry Andric 
54870b57cec5SDimitry Andric       Value *V = propagateMetadata(ST, E->Scalars);
5488af732203SDimitry Andric 
54890b57cec5SDimitry Andric       E->VectorizedValue = V;
54900b57cec5SDimitry Andric       ++NumVectorInstructions;
54910b57cec5SDimitry Andric       return V;
54920b57cec5SDimitry Andric     }
54930b57cec5SDimitry Andric     case Instruction::GetElementPtr: {
54948bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
54950b57cec5SDimitry Andric 
54960b57cec5SDimitry Andric       Value *Op0 = vectorizeTree(E->getOperand(0));
54970b57cec5SDimitry Andric 
54980b57cec5SDimitry Andric       std::vector<Value *> OpVecs;
54990b57cec5SDimitry Andric       for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
55000b57cec5SDimitry Andric            ++j) {
5501480093f4SDimitry Andric         ValueList &VL = E->getOperand(j);
5502480093f4SDimitry Andric         // Need to cast all elements to the same type before vectorization to
5503480093f4SDimitry Andric         // avoid crash.
5504480093f4SDimitry Andric         Type *VL0Ty = VL0->getOperand(j)->getType();
5505480093f4SDimitry Andric         Type *Ty = llvm::all_of(
5506480093f4SDimitry Andric                        VL, [VL0Ty](Value *V) { return VL0Ty == V->getType(); })
5507480093f4SDimitry Andric                        ? VL0Ty
5508480093f4SDimitry Andric                        : DL->getIndexType(cast<GetElementPtrInst>(VL0)
5509480093f4SDimitry Andric                                               ->getPointerOperandType()
5510480093f4SDimitry Andric                                               ->getScalarType());
5511480093f4SDimitry Andric         for (Value *&V : VL) {
5512480093f4SDimitry Andric           auto *CI = cast<ConstantInt>(V);
5513480093f4SDimitry Andric           V = ConstantExpr::getIntegerCast(CI, Ty,
5514480093f4SDimitry Andric                                            CI->getValue().isSignBitSet());
5515480093f4SDimitry Andric         }
5516480093f4SDimitry Andric         Value *OpVec = vectorizeTree(VL);
55170b57cec5SDimitry Andric         OpVecs.push_back(OpVec);
55180b57cec5SDimitry Andric       }
55190b57cec5SDimitry Andric 
55200b57cec5SDimitry Andric       Value *V = Builder.CreateGEP(
55210b57cec5SDimitry Andric           cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
55220b57cec5SDimitry Andric       if (Instruction *I = dyn_cast<Instruction>(V))
55230b57cec5SDimitry Andric         V = propagateMetadata(I, E->Scalars);
55240b57cec5SDimitry Andric 
55255f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
55265f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5527af732203SDimitry Andric 
55280b57cec5SDimitry Andric       E->VectorizedValue = V;
55290b57cec5SDimitry Andric       ++NumVectorInstructions;
55300b57cec5SDimitry Andric 
55310b57cec5SDimitry Andric       return V;
55320b57cec5SDimitry Andric     }
55330b57cec5SDimitry Andric     case Instruction::Call: {
55340b57cec5SDimitry Andric       CallInst *CI = cast<CallInst>(VL0);
55358bcb0991SDimitry Andric       setInsertPointAfterBundle(E);
55368bcb0991SDimitry Andric 
55370b57cec5SDimitry Andric       Intrinsic::ID IID  = Intrinsic::not_intrinsic;
55388bcb0991SDimitry Andric       if (Function *FI = CI->getCalledFunction())
55390b57cec5SDimitry Andric         IID = FI->getIntrinsicID();
55408bcb0991SDimitry Andric 
55415ffd83dbSDimitry Andric       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
55425ffd83dbSDimitry Andric 
55435ffd83dbSDimitry Andric       auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
55445ffd83dbSDimitry Andric       bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
55455ffd83dbSDimitry Andric                           VecCallCosts.first <= VecCallCosts.second;
55465ffd83dbSDimitry Andric 
55478bcb0991SDimitry Andric       Value *ScalarArg = nullptr;
55480b57cec5SDimitry Andric       std::vector<Value *> OpVecs;
55495f7ddb14SDimitry Andric       SmallVector<Type *, 2> TysForDecl =
55505f7ddb14SDimitry Andric           {FixedVectorType::get(CI->getType(), E->Scalars.size())};
55510b57cec5SDimitry Andric       for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
55520b57cec5SDimitry Andric         ValueList OpVL;
55530b57cec5SDimitry Andric         // Some intrinsics have scalar arguments. This argument should not be
55540b57cec5SDimitry Andric         // vectorized.
55555ffd83dbSDimitry Andric         if (UseIntrinsic && hasVectorInstrinsicScalarOpd(IID, j)) {
55560b57cec5SDimitry Andric           CallInst *CEI = cast<CallInst>(VL0);
55570b57cec5SDimitry Andric           ScalarArg = CEI->getArgOperand(j);
55580b57cec5SDimitry Andric           OpVecs.push_back(CEI->getArgOperand(j));
55595f7ddb14SDimitry Andric           if (hasVectorInstrinsicOverloadedScalarOpd(IID, j))
55605f7ddb14SDimitry Andric             TysForDecl.push_back(ScalarArg->getType());
55610b57cec5SDimitry Andric           continue;
55620b57cec5SDimitry Andric         }
55630b57cec5SDimitry Andric 
55640b57cec5SDimitry Andric         Value *OpVec = vectorizeTree(E->getOperand(j));
55650b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
55660b57cec5SDimitry Andric         OpVecs.push_back(OpVec);
55670b57cec5SDimitry Andric       }
55680b57cec5SDimitry Andric 
55695ffd83dbSDimitry Andric       Function *CF;
55705ffd83dbSDimitry Andric       if (!UseIntrinsic) {
5571af732203SDimitry Andric         VFShape Shape =
5572af732203SDimitry Andric             VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
5573af732203SDimitry Andric                                   VecTy->getNumElements())),
55745ffd83dbSDimitry Andric                          false /*HasGlobalPred*/);
55755ffd83dbSDimitry Andric         CF = VFDatabase(*CI).getVectorizedFunction(Shape);
55765ffd83dbSDimitry Andric       } else {
55775f7ddb14SDimitry Andric         CF = Intrinsic::getDeclaration(F->getParent(), ID, TysForDecl);
55785ffd83dbSDimitry Andric       }
55795ffd83dbSDimitry Andric 
55800b57cec5SDimitry Andric       SmallVector<OperandBundleDef, 1> OpBundles;
55810b57cec5SDimitry Andric       CI->getOperandBundlesAsDefs(OpBundles);
55820b57cec5SDimitry Andric       Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
55830b57cec5SDimitry Andric 
55840b57cec5SDimitry Andric       // The scalar argument uses an in-tree scalar so we add the new vectorized
55850b57cec5SDimitry Andric       // call to ExternalUses list to make sure that an extract will be
55860b57cec5SDimitry Andric       // generated in the future.
5587*1aaf10a9SDimitry Andric       if (ScalarArg) {
5588*1aaf10a9SDimitry Andric         if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
5589*1aaf10a9SDimitry Andric           // Find which lane we need to extract.
5590*1aaf10a9SDimitry Andric           unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
5591*1aaf10a9SDimitry Andric           ExternalUses.push_back(
5592*1aaf10a9SDimitry Andric               ExternalUser(ScalarArg, cast<User>(V), FoundLane));
5593*1aaf10a9SDimitry Andric         }
5594*1aaf10a9SDimitry Andric       }
55950b57cec5SDimitry Andric 
55960b57cec5SDimitry Andric       propagateIRFlags(V, E->Scalars, VL0);
55975f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
55985f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5599af732203SDimitry Andric 
56000b57cec5SDimitry Andric       E->VectorizedValue = V;
56010b57cec5SDimitry Andric       ++NumVectorInstructions;
56020b57cec5SDimitry Andric       return V;
56030b57cec5SDimitry Andric     }
56040b57cec5SDimitry Andric     case Instruction::ShuffleVector: {
56058bcb0991SDimitry Andric       assert(E->isAltShuffle() &&
56068bcb0991SDimitry Andric              ((Instruction::isBinaryOp(E->getOpcode()) &&
56078bcb0991SDimitry Andric                Instruction::isBinaryOp(E->getAltOpcode())) ||
56088bcb0991SDimitry Andric               (Instruction::isCast(E->getOpcode()) &&
56098bcb0991SDimitry Andric                Instruction::isCast(E->getAltOpcode()))) &&
56100b57cec5SDimitry Andric              "Invalid Shuffle Vector Operand");
56110b57cec5SDimitry Andric 
56128bcb0991SDimitry Andric       Value *LHS = nullptr, *RHS = nullptr;
56138bcb0991SDimitry Andric       if (Instruction::isBinaryOp(E->getOpcode())) {
56148bcb0991SDimitry Andric         setInsertPointAfterBundle(E);
56150b57cec5SDimitry Andric         LHS = vectorizeTree(E->getOperand(0));
56160b57cec5SDimitry Andric         RHS = vectorizeTree(E->getOperand(1));
56170b57cec5SDimitry Andric       } else {
56188bcb0991SDimitry Andric         setInsertPointAfterBundle(E);
56190b57cec5SDimitry Andric         LHS = vectorizeTree(E->getOperand(0));
56200b57cec5SDimitry Andric       }
56210b57cec5SDimitry Andric 
56220b57cec5SDimitry Andric       if (E->VectorizedValue) {
56230b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
56240b57cec5SDimitry Andric         return E->VectorizedValue;
56250b57cec5SDimitry Andric       }
56260b57cec5SDimitry Andric 
56270b57cec5SDimitry Andric       Value *V0, *V1;
56288bcb0991SDimitry Andric       if (Instruction::isBinaryOp(E->getOpcode())) {
56290b57cec5SDimitry Andric         V0 = Builder.CreateBinOp(
56308bcb0991SDimitry Andric             static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
56310b57cec5SDimitry Andric         V1 = Builder.CreateBinOp(
56328bcb0991SDimitry Andric             static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
56330b57cec5SDimitry Andric       } else {
56340b57cec5SDimitry Andric         V0 = Builder.CreateCast(
56358bcb0991SDimitry Andric             static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
56360b57cec5SDimitry Andric         V1 = Builder.CreateCast(
56378bcb0991SDimitry Andric             static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
56380b57cec5SDimitry Andric       }
56390b57cec5SDimitry Andric 
56400b57cec5SDimitry Andric       // Create shuffle to take alternate operations from the vector.
56410b57cec5SDimitry Andric       // Also, gather up main and alt scalar ops to propagate IR flags to
56420b57cec5SDimitry Andric       // each vector operation.
56430b57cec5SDimitry Andric       ValueList OpScalars, AltScalars;
56445f7ddb14SDimitry Andric       unsigned Sz = E->Scalars.size();
56455f7ddb14SDimitry Andric       SmallVector<int> Mask(Sz);
56465f7ddb14SDimitry Andric       for (unsigned I = 0; I < Sz; ++I) {
56475f7ddb14SDimitry Andric         auto *OpInst = cast<Instruction>(E->Scalars[I]);
56488bcb0991SDimitry Andric         assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
56498bcb0991SDimitry Andric         if (OpInst->getOpcode() == E->getAltOpcode()) {
56505f7ddb14SDimitry Andric           Mask[I] = Sz + I;
56515f7ddb14SDimitry Andric           AltScalars.push_back(E->Scalars[I]);
56520b57cec5SDimitry Andric         } else {
56535f7ddb14SDimitry Andric           Mask[I] = I;
56545f7ddb14SDimitry Andric           OpScalars.push_back(E->Scalars[I]);
56550b57cec5SDimitry Andric         }
56560b57cec5SDimitry Andric       }
56570b57cec5SDimitry Andric 
56580b57cec5SDimitry Andric       propagateIRFlags(V0, OpScalars);
56590b57cec5SDimitry Andric       propagateIRFlags(V1, AltScalars);
56600b57cec5SDimitry Andric 
56615ffd83dbSDimitry Andric       Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
56620b57cec5SDimitry Andric       if (Instruction *I = dyn_cast<Instruction>(V))
56630b57cec5SDimitry Andric         V = propagateMetadata(I, E->Scalars);
56645f7ddb14SDimitry Andric       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
56655f7ddb14SDimitry Andric       V = ShuffleBuilder.finalize(V);
5666af732203SDimitry Andric 
56670b57cec5SDimitry Andric       E->VectorizedValue = V;
56680b57cec5SDimitry Andric       ++NumVectorInstructions;
56690b57cec5SDimitry Andric 
56700b57cec5SDimitry Andric       return V;
56710b57cec5SDimitry Andric     }
56720b57cec5SDimitry Andric     default:
56730b57cec5SDimitry Andric     llvm_unreachable("unknown inst");
56740b57cec5SDimitry Andric   }
56750b57cec5SDimitry Andric   return nullptr;
56760b57cec5SDimitry Andric }
56770b57cec5SDimitry Andric 
vectorizeTree()56780b57cec5SDimitry Andric Value *BoUpSLP::vectorizeTree() {
56790b57cec5SDimitry Andric   ExtraValueToDebugLocsMap ExternallyUsedValues;
56800b57cec5SDimitry Andric   return vectorizeTree(ExternallyUsedValues);
56810b57cec5SDimitry Andric }
56820b57cec5SDimitry Andric 
56830b57cec5SDimitry Andric Value *
vectorizeTree(ExtraValueToDebugLocsMap & ExternallyUsedValues)56840b57cec5SDimitry Andric BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
56850b57cec5SDimitry Andric   // All blocks must be scheduled before any instructions are inserted.
56860b57cec5SDimitry Andric   for (auto &BSIter : BlocksSchedules) {
56870b57cec5SDimitry Andric     scheduleBlock(BSIter.second.get());
56880b57cec5SDimitry Andric   }
56890b57cec5SDimitry Andric 
56900b57cec5SDimitry Andric   Builder.SetInsertPoint(&F->getEntryBlock().front());
56910b57cec5SDimitry Andric   auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
56920b57cec5SDimitry Andric 
56930b57cec5SDimitry Andric   // If the vectorized tree can be rewritten in a smaller type, we truncate the
56940b57cec5SDimitry Andric   // vectorized root. InstCombine will then rewrite the entire expression. We
56950b57cec5SDimitry Andric   // sign extend the extracted values below.
56960b57cec5SDimitry Andric   auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
56970b57cec5SDimitry Andric   if (MinBWs.count(ScalarRoot)) {
56985f7ddb14SDimitry Andric     if (auto *I = dyn_cast<Instruction>(VectorRoot)) {
56995f7ddb14SDimitry Andric       // If current instr is a phi and not the last phi, insert it after the
57005f7ddb14SDimitry Andric       // last phi node.
57015f7ddb14SDimitry Andric       if (isa<PHINode>(I))
57025f7ddb14SDimitry Andric         Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt());
57035f7ddb14SDimitry Andric       else
57040b57cec5SDimitry Andric         Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
57055f7ddb14SDimitry Andric     }
57060b57cec5SDimitry Andric     auto BundleWidth = VectorizableTree[0]->Scalars.size();
57070b57cec5SDimitry Andric     auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
57085ffd83dbSDimitry Andric     auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
57090b57cec5SDimitry Andric     auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
57100b57cec5SDimitry Andric     VectorizableTree[0]->VectorizedValue = Trunc;
57110b57cec5SDimitry Andric   }
57120b57cec5SDimitry Andric 
57130b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
57140b57cec5SDimitry Andric                     << " values .\n");
57150b57cec5SDimitry Andric 
57160b57cec5SDimitry Andric   // Extract all of the elements with the external uses.
57170b57cec5SDimitry Andric   for (const auto &ExternalUse : ExternalUses) {
57180b57cec5SDimitry Andric     Value *Scalar = ExternalUse.Scalar;
57190b57cec5SDimitry Andric     llvm::User *User = ExternalUse.User;
57200b57cec5SDimitry Andric 
57210b57cec5SDimitry Andric     // Skip users that we already RAUW. This happens when one instruction
57220b57cec5SDimitry Andric     // has multiple uses of the same value.
57230b57cec5SDimitry Andric     if (User && !is_contained(Scalar->users(), User))
57240b57cec5SDimitry Andric       continue;
57250b57cec5SDimitry Andric     TreeEntry *E = getTreeEntry(Scalar);
57260b57cec5SDimitry Andric     assert(E && "Invalid scalar");
5727af732203SDimitry Andric     assert(E->State != TreeEntry::NeedToGather &&
5728af732203SDimitry Andric            "Extracting from a gather list");
57290b57cec5SDimitry Andric 
57300b57cec5SDimitry Andric     Value *Vec = E->VectorizedValue;
57310b57cec5SDimitry Andric     assert(Vec && "Can't find vectorizable value");
57320b57cec5SDimitry Andric 
57330b57cec5SDimitry Andric     Value *Lane = Builder.getInt32(ExternalUse.Lane);
57345f7ddb14SDimitry Andric     auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
57355f7ddb14SDimitry Andric       if (Scalar->getType() != Vec->getType()) {
57365f7ddb14SDimitry Andric         Value *Ex;
57375f7ddb14SDimitry Andric         // "Reuse" the existing extract to improve final codegen.
57385f7ddb14SDimitry Andric         if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
57395f7ddb14SDimitry Andric           Ex = Builder.CreateExtractElement(ES->getOperand(0),
57405f7ddb14SDimitry Andric                                             ES->getOperand(1));
57415f7ddb14SDimitry Andric         } else {
57425f7ddb14SDimitry Andric           Ex = Builder.CreateExtractElement(Vec, Lane);
57435f7ddb14SDimitry Andric         }
57445f7ddb14SDimitry Andric         // If necessary, sign-extend or zero-extend ScalarRoot
57455f7ddb14SDimitry Andric         // to the larger type.
57465f7ddb14SDimitry Andric         if (!MinBWs.count(ScalarRoot))
57475f7ddb14SDimitry Andric           return Ex;
57485f7ddb14SDimitry Andric         if (MinBWs[ScalarRoot].second)
57495f7ddb14SDimitry Andric           return Builder.CreateSExt(Ex, Scalar->getType());
57505f7ddb14SDimitry Andric         return Builder.CreateZExt(Ex, Scalar->getType());
57515f7ddb14SDimitry Andric       }
57525f7ddb14SDimitry Andric       assert(isa<FixedVectorType>(Scalar->getType()) &&
57535f7ddb14SDimitry Andric              isa<InsertElementInst>(Scalar) &&
57545f7ddb14SDimitry Andric              "In-tree scalar of vector type is not insertelement?");
57555f7ddb14SDimitry Andric       return Vec;
57565f7ddb14SDimitry Andric     };
57570b57cec5SDimitry Andric     // If User == nullptr, the Scalar is used as extra arg. Generate
57580b57cec5SDimitry Andric     // ExtractElement instruction and update the record for this scalar in
57590b57cec5SDimitry Andric     // ExternallyUsedValues.
57600b57cec5SDimitry Andric     if (!User) {
57610b57cec5SDimitry Andric       assert(ExternallyUsedValues.count(Scalar) &&
57620b57cec5SDimitry Andric              "Scalar with nullptr as an external user must be registered in "
57630b57cec5SDimitry Andric              "ExternallyUsedValues map");
57640b57cec5SDimitry Andric       if (auto *VecI = dyn_cast<Instruction>(Vec)) {
57650b57cec5SDimitry Andric         Builder.SetInsertPoint(VecI->getParent(),
57660b57cec5SDimitry Andric                                std::next(VecI->getIterator()));
57670b57cec5SDimitry Andric       } else {
57680b57cec5SDimitry Andric         Builder.SetInsertPoint(&F->getEntryBlock().front());
57690b57cec5SDimitry Andric       }
57705f7ddb14SDimitry Andric       Value *NewInst = ExtractAndExtendIfNeeded(Vec);
57710b57cec5SDimitry Andric       CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
57725f7ddb14SDimitry Andric       auto &NewInstLocs = ExternallyUsedValues[NewInst];
57735f7ddb14SDimitry Andric       auto It = ExternallyUsedValues.find(Scalar);
57745f7ddb14SDimitry Andric       assert(It != ExternallyUsedValues.end() &&
57755f7ddb14SDimitry Andric              "Externally used scalar is not found in ExternallyUsedValues");
57765f7ddb14SDimitry Andric       NewInstLocs.append(It->second);
57770b57cec5SDimitry Andric       ExternallyUsedValues.erase(Scalar);
57780b57cec5SDimitry Andric       // Required to update internally referenced instructions.
57795f7ddb14SDimitry Andric       Scalar->replaceAllUsesWith(NewInst);
57800b57cec5SDimitry Andric       continue;
57810b57cec5SDimitry Andric     }
57820b57cec5SDimitry Andric 
57830b57cec5SDimitry Andric     // Generate extracts for out-of-tree users.
57840b57cec5SDimitry Andric     // Find the insertion point for the extractelement lane.
57850b57cec5SDimitry Andric     if (auto *VecI = dyn_cast<Instruction>(Vec)) {
57860b57cec5SDimitry Andric       if (PHINode *PH = dyn_cast<PHINode>(User)) {
57870b57cec5SDimitry Andric         for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
57880b57cec5SDimitry Andric           if (PH->getIncomingValue(i) == Scalar) {
57890b57cec5SDimitry Andric             Instruction *IncomingTerminator =
57900b57cec5SDimitry Andric                 PH->getIncomingBlock(i)->getTerminator();
57910b57cec5SDimitry Andric             if (isa<CatchSwitchInst>(IncomingTerminator)) {
57920b57cec5SDimitry Andric               Builder.SetInsertPoint(VecI->getParent(),
57930b57cec5SDimitry Andric                                      std::next(VecI->getIterator()));
57940b57cec5SDimitry Andric             } else {
57950b57cec5SDimitry Andric               Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
57960b57cec5SDimitry Andric             }
57975f7ddb14SDimitry Andric             Value *NewInst = ExtractAndExtendIfNeeded(Vec);
57980b57cec5SDimitry Andric             CSEBlocks.insert(PH->getIncomingBlock(i));
57995f7ddb14SDimitry Andric             PH->setOperand(i, NewInst);
58000b57cec5SDimitry Andric           }
58010b57cec5SDimitry Andric         }
58020b57cec5SDimitry Andric       } else {
58030b57cec5SDimitry Andric         Builder.SetInsertPoint(cast<Instruction>(User));
58045f7ddb14SDimitry Andric         Value *NewInst = ExtractAndExtendIfNeeded(Vec);
58050b57cec5SDimitry Andric         CSEBlocks.insert(cast<Instruction>(User)->getParent());
58065f7ddb14SDimitry Andric         User->replaceUsesOfWith(Scalar, NewInst);
58070b57cec5SDimitry Andric       }
58080b57cec5SDimitry Andric     } else {
58090b57cec5SDimitry Andric       Builder.SetInsertPoint(&F->getEntryBlock().front());
58105f7ddb14SDimitry Andric       Value *NewInst = ExtractAndExtendIfNeeded(Vec);
58110b57cec5SDimitry Andric       CSEBlocks.insert(&F->getEntryBlock());
58125f7ddb14SDimitry Andric       User->replaceUsesOfWith(Scalar, NewInst);
58130b57cec5SDimitry Andric     }
58140b57cec5SDimitry Andric 
58150b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
58160b57cec5SDimitry Andric   }
58170b57cec5SDimitry Andric 
58180b57cec5SDimitry Andric   // For each vectorized value:
58190b57cec5SDimitry Andric   for (auto &TEPtr : VectorizableTree) {
58200b57cec5SDimitry Andric     TreeEntry *Entry = TEPtr.get();
58210b57cec5SDimitry Andric 
58220b57cec5SDimitry Andric     // No need to handle users of gathered values.
5823480093f4SDimitry Andric     if (Entry->State == TreeEntry::NeedToGather)
58240b57cec5SDimitry Andric       continue;
58250b57cec5SDimitry Andric 
58260b57cec5SDimitry Andric     assert(Entry->VectorizedValue && "Can't find vectorizable value");
58270b57cec5SDimitry Andric 
58280b57cec5SDimitry Andric     // For each lane:
58290b57cec5SDimitry Andric     for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
58300b57cec5SDimitry Andric       Value *Scalar = Entry->Scalars[Lane];
58310b57cec5SDimitry Andric 
58328bcb0991SDimitry Andric #ifndef NDEBUG
58330b57cec5SDimitry Andric       Type *Ty = Scalar->getType();
58340b57cec5SDimitry Andric       if (!Ty->isVoidTy()) {
58350b57cec5SDimitry Andric         for (User *U : Scalar->users()) {
58360b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
58370b57cec5SDimitry Andric 
58388bcb0991SDimitry Andric           // It is legal to delete users in the ignorelist.
58390b57cec5SDimitry Andric           assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
58408bcb0991SDimitry Andric                  "Deleting out-of-tree value");
58418bcb0991SDimitry Andric         }
58420b57cec5SDimitry Andric       }
58430b57cec5SDimitry Andric #endif
58440b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
58450b57cec5SDimitry Andric       eraseInstruction(cast<Instruction>(Scalar));
58460b57cec5SDimitry Andric     }
58470b57cec5SDimitry Andric   }
58480b57cec5SDimitry Andric 
58490b57cec5SDimitry Andric   Builder.ClearInsertionPoint();
58505ffd83dbSDimitry Andric   InstrElementSize.clear();
58510b57cec5SDimitry Andric 
58520b57cec5SDimitry Andric   return VectorizableTree[0]->VectorizedValue;
58530b57cec5SDimitry Andric }
58540b57cec5SDimitry Andric 
optimizeGatherSequence()58550b57cec5SDimitry Andric void BoUpSLP::optimizeGatherSequence() {
58560b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
58570b57cec5SDimitry Andric                     << " gather sequences instructions.\n");
58580b57cec5SDimitry Andric   // LICM InsertElementInst sequences.
58590b57cec5SDimitry Andric   for (Instruction *I : GatherSeq) {
58608bcb0991SDimitry Andric     if (isDeleted(I))
58610b57cec5SDimitry Andric       continue;
58620b57cec5SDimitry Andric 
58630b57cec5SDimitry Andric     // Check if this block is inside a loop.
58640b57cec5SDimitry Andric     Loop *L = LI->getLoopFor(I->getParent());
58650b57cec5SDimitry Andric     if (!L)
58660b57cec5SDimitry Andric       continue;
58670b57cec5SDimitry Andric 
58680b57cec5SDimitry Andric     // Check if it has a preheader.
58690b57cec5SDimitry Andric     BasicBlock *PreHeader = L->getLoopPreheader();
58700b57cec5SDimitry Andric     if (!PreHeader)
58710b57cec5SDimitry Andric       continue;
58720b57cec5SDimitry Andric 
58730b57cec5SDimitry Andric     // If the vector or the element that we insert into it are
58740b57cec5SDimitry Andric     // instructions that are defined in this basic block then we can't
58750b57cec5SDimitry Andric     // hoist this instruction.
58760b57cec5SDimitry Andric     auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
58770b57cec5SDimitry Andric     auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
58780b57cec5SDimitry Andric     if (Op0 && L->contains(Op0))
58790b57cec5SDimitry Andric       continue;
58800b57cec5SDimitry Andric     if (Op1 && L->contains(Op1))
58810b57cec5SDimitry Andric       continue;
58820b57cec5SDimitry Andric 
58830b57cec5SDimitry Andric     // We can hoist this instruction. Move it to the pre-header.
58840b57cec5SDimitry Andric     I->moveBefore(PreHeader->getTerminator());
58850b57cec5SDimitry Andric   }
58860b57cec5SDimitry Andric 
58870b57cec5SDimitry Andric   // Make a list of all reachable blocks in our CSE queue.
58880b57cec5SDimitry Andric   SmallVector<const DomTreeNode *, 8> CSEWorkList;
58890b57cec5SDimitry Andric   CSEWorkList.reserve(CSEBlocks.size());
58900b57cec5SDimitry Andric   for (BasicBlock *BB : CSEBlocks)
58910b57cec5SDimitry Andric     if (DomTreeNode *N = DT->getNode(BB)) {
58920b57cec5SDimitry Andric       assert(DT->isReachableFromEntry(N));
58930b57cec5SDimitry Andric       CSEWorkList.push_back(N);
58940b57cec5SDimitry Andric     }
58950b57cec5SDimitry Andric 
58960b57cec5SDimitry Andric   // Sort blocks by domination. This ensures we visit a block after all blocks
58970b57cec5SDimitry Andric   // dominating it are visited.
58985f7ddb14SDimitry Andric   llvm::sort(CSEWorkList, [](const DomTreeNode *A, const DomTreeNode *B) {
58995f7ddb14SDimitry Andric     assert((A == B) == (A->getDFSNumIn() == B->getDFSNumIn()) &&
59005f7ddb14SDimitry Andric            "Different nodes should have different DFS numbers");
59015f7ddb14SDimitry Andric     return A->getDFSNumIn() < B->getDFSNumIn();
59020b57cec5SDimitry Andric   });
59030b57cec5SDimitry Andric 
59040b57cec5SDimitry Andric   // Perform O(N^2) search over the gather sequences and merge identical
59050b57cec5SDimitry Andric   // instructions. TODO: We can further optimize this scan if we split the
59060b57cec5SDimitry Andric   // instructions into different buckets based on the insert lane.
59070b57cec5SDimitry Andric   SmallVector<Instruction *, 16> Visited;
59080b57cec5SDimitry Andric   for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
5909af732203SDimitry Andric     assert(*I &&
5910af732203SDimitry Andric            (I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
59110b57cec5SDimitry Andric            "Worklist not sorted properly!");
59120b57cec5SDimitry Andric     BasicBlock *BB = (*I)->getBlock();
59130b57cec5SDimitry Andric     // For all instructions in blocks containing gather sequences:
59140b57cec5SDimitry Andric     for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
59150b57cec5SDimitry Andric       Instruction *In = &*it++;
59168bcb0991SDimitry Andric       if (isDeleted(In))
59178bcb0991SDimitry Andric         continue;
59180b57cec5SDimitry Andric       if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
59190b57cec5SDimitry Andric         continue;
59200b57cec5SDimitry Andric 
59210b57cec5SDimitry Andric       // Check if we can replace this instruction with any of the
59220b57cec5SDimitry Andric       // visited instructions.
59230b57cec5SDimitry Andric       for (Instruction *v : Visited) {
59240b57cec5SDimitry Andric         if (In->isIdenticalTo(v) &&
59250b57cec5SDimitry Andric             DT->dominates(v->getParent(), In->getParent())) {
59260b57cec5SDimitry Andric           In->replaceAllUsesWith(v);
59270b57cec5SDimitry Andric           eraseInstruction(In);
59280b57cec5SDimitry Andric           In = nullptr;
59290b57cec5SDimitry Andric           break;
59300b57cec5SDimitry Andric         }
59310b57cec5SDimitry Andric       }
59320b57cec5SDimitry Andric       if (In) {
59330b57cec5SDimitry Andric         assert(!is_contained(Visited, In));
59340b57cec5SDimitry Andric         Visited.push_back(In);
59350b57cec5SDimitry Andric       }
59360b57cec5SDimitry Andric     }
59370b57cec5SDimitry Andric   }
59380b57cec5SDimitry Andric   CSEBlocks.clear();
59390b57cec5SDimitry Andric   GatherSeq.clear();
59400b57cec5SDimitry Andric }
59410b57cec5SDimitry Andric 
59420b57cec5SDimitry Andric // Groups the instructions to a bundle (which is then a single scheduling entity)
59430b57cec5SDimitry Andric // and schedules instructions until the bundle gets ready.
59448bcb0991SDimitry Andric Optional<BoUpSLP::ScheduleData *>
tryScheduleBundle(ArrayRef<Value * > VL,BoUpSLP * SLP,const InstructionsState & S)59458bcb0991SDimitry Andric BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
59460b57cec5SDimitry Andric                                             const InstructionsState &S) {
59475f7ddb14SDimitry Andric   if (isa<PHINode>(S.OpValue) || isa<InsertElementInst>(S.OpValue))
59488bcb0991SDimitry Andric     return nullptr;
59490b57cec5SDimitry Andric 
59500b57cec5SDimitry Andric   // Initialize the instruction bundle.
59510b57cec5SDimitry Andric   Instruction *OldScheduleEnd = ScheduleEnd;
59520b57cec5SDimitry Andric   ScheduleData *PrevInBundle = nullptr;
59530b57cec5SDimitry Andric   ScheduleData *Bundle = nullptr;
59540b57cec5SDimitry Andric   bool ReSchedule = false;
59550b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.OpValue << "\n");
59560b57cec5SDimitry Andric 
59575f7ddb14SDimitry Andric   auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule,
59585f7ddb14SDimitry Andric                                                          ScheduleData *Bundle) {
59595f7ddb14SDimitry Andric     // The scheduling region got new instructions at the lower end (or it is a
59605f7ddb14SDimitry Andric     // new region for the first bundle). This makes it necessary to
59615f7ddb14SDimitry Andric     // recalculate all dependencies.
59625f7ddb14SDimitry Andric     // It is seldom that this needs to be done a second time after adding the
59635f7ddb14SDimitry Andric     // initial bundle to the region.
59645f7ddb14SDimitry Andric     if (ScheduleEnd != OldScheduleEnd) {
59655f7ddb14SDimitry Andric       for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode())
59665f7ddb14SDimitry Andric         doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
59675f7ddb14SDimitry Andric       ReSchedule = true;
59685f7ddb14SDimitry Andric     }
59695f7ddb14SDimitry Andric     if (ReSchedule) {
59705f7ddb14SDimitry Andric       resetSchedule();
59715f7ddb14SDimitry Andric       initialFillReadyList(ReadyInsts);
59725f7ddb14SDimitry Andric     }
59735f7ddb14SDimitry Andric     if (Bundle) {
59745f7ddb14SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
59755f7ddb14SDimitry Andric                         << " in block " << BB->getName() << "\n");
59765f7ddb14SDimitry Andric       calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
59775f7ddb14SDimitry Andric     }
59785f7ddb14SDimitry Andric 
59795f7ddb14SDimitry Andric     // Now try to schedule the new bundle or (if no bundle) just calculate
59805f7ddb14SDimitry Andric     // dependencies. As soon as the bundle is "ready" it means that there are no
59815f7ddb14SDimitry Andric     // cyclic dependencies and we can schedule it. Note that's important that we
59825f7ddb14SDimitry Andric     // don't "schedule" the bundle yet (see cancelScheduling).
59835f7ddb14SDimitry Andric     while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
59845f7ddb14SDimitry Andric            !ReadyInsts.empty()) {
59855f7ddb14SDimitry Andric       ScheduleData *Picked = ReadyInsts.pop_back_val();
59865f7ddb14SDimitry Andric       if (Picked->isSchedulingEntity() && Picked->isReady())
59875f7ddb14SDimitry Andric         schedule(Picked, ReadyInsts);
59885f7ddb14SDimitry Andric     }
59895f7ddb14SDimitry Andric   };
59905f7ddb14SDimitry Andric 
59910b57cec5SDimitry Andric   // Make sure that the scheduling region contains all
59920b57cec5SDimitry Andric   // instructions of the bundle.
59930b57cec5SDimitry Andric   for (Value *V : VL) {
59945f7ddb14SDimitry Andric     if (!extendSchedulingRegion(V, S)) {
59955f7ddb14SDimitry Andric       // If the scheduling region got new instructions at the lower end (or it
59965f7ddb14SDimitry Andric       // is a new region for the first bundle). This makes it necessary to
59975f7ddb14SDimitry Andric       // recalculate all dependencies.
59985f7ddb14SDimitry Andric       // Otherwise the compiler may crash trying to incorrectly calculate
59995f7ddb14SDimitry Andric       // dependencies and emit instruction in the wrong order at the actual
60005f7ddb14SDimitry Andric       // scheduling.
60015f7ddb14SDimitry Andric       TryScheduleBundle(/*ReSchedule=*/false, nullptr);
60028bcb0991SDimitry Andric       return None;
60030b57cec5SDimitry Andric     }
60045f7ddb14SDimitry Andric   }
60050b57cec5SDimitry Andric 
60060b57cec5SDimitry Andric   for (Value *V : VL) {
60070b57cec5SDimitry Andric     ScheduleData *BundleMember = getScheduleData(V);
60080b57cec5SDimitry Andric     assert(BundleMember &&
60090b57cec5SDimitry Andric            "no ScheduleData for bundle member (maybe not in same basic block)");
60100b57cec5SDimitry Andric     if (BundleMember->IsScheduled) {
60110b57cec5SDimitry Andric       // A bundle member was scheduled as single instruction before and now
60120b57cec5SDimitry Andric       // needs to be scheduled as part of the bundle. We just get rid of the
60130b57cec5SDimitry Andric       // existing schedule.
60140b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
60150b57cec5SDimitry Andric                         << " was already scheduled\n");
60160b57cec5SDimitry Andric       ReSchedule = true;
60170b57cec5SDimitry Andric     }
60180b57cec5SDimitry Andric     assert(BundleMember->isSchedulingEntity() &&
60190b57cec5SDimitry Andric            "bundle member already part of other bundle");
60200b57cec5SDimitry Andric     if (PrevInBundle) {
60210b57cec5SDimitry Andric       PrevInBundle->NextInBundle = BundleMember;
60220b57cec5SDimitry Andric     } else {
60230b57cec5SDimitry Andric       Bundle = BundleMember;
60240b57cec5SDimitry Andric     }
60250b57cec5SDimitry Andric     BundleMember->UnscheduledDepsInBundle = 0;
60260b57cec5SDimitry Andric     Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
60270b57cec5SDimitry Andric 
60280b57cec5SDimitry Andric     // Group the instructions to a bundle.
60290b57cec5SDimitry Andric     BundleMember->FirstInBundle = Bundle;
60300b57cec5SDimitry Andric     PrevInBundle = BundleMember;
60310b57cec5SDimitry Andric   }
60328bcb0991SDimitry Andric   assert(Bundle && "Failed to find schedule bundle");
60335f7ddb14SDimitry Andric   TryScheduleBundle(ReSchedule, Bundle);
60340b57cec5SDimitry Andric   if (!Bundle->isReady()) {
60350b57cec5SDimitry Andric     cancelScheduling(VL, S.OpValue);
60368bcb0991SDimitry Andric     return None;
60370b57cec5SDimitry Andric   }
60388bcb0991SDimitry Andric   return Bundle;
60390b57cec5SDimitry Andric }
60400b57cec5SDimitry Andric 
cancelScheduling(ArrayRef<Value * > VL,Value * OpValue)60410b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
60420b57cec5SDimitry Andric                                                 Value *OpValue) {
60435f7ddb14SDimitry Andric   if (isa<PHINode>(OpValue) || isa<InsertElementInst>(OpValue))
60440b57cec5SDimitry Andric     return;
60450b57cec5SDimitry Andric 
60460b57cec5SDimitry Andric   ScheduleData *Bundle = getScheduleData(OpValue);
60470b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP:  cancel scheduling of " << *Bundle << "\n");
60480b57cec5SDimitry Andric   assert(!Bundle->IsScheduled &&
60490b57cec5SDimitry Andric          "Can't cancel bundle which is already scheduled");
60500b57cec5SDimitry Andric   assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
60510b57cec5SDimitry Andric          "tried to unbundle something which is not a bundle");
60520b57cec5SDimitry Andric 
60530b57cec5SDimitry Andric   // Un-bundle: make single instructions out of the bundle.
60540b57cec5SDimitry Andric   ScheduleData *BundleMember = Bundle;
60550b57cec5SDimitry Andric   while (BundleMember) {
60560b57cec5SDimitry Andric     assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
60570b57cec5SDimitry Andric     BundleMember->FirstInBundle = BundleMember;
60580b57cec5SDimitry Andric     ScheduleData *Next = BundleMember->NextInBundle;
60590b57cec5SDimitry Andric     BundleMember->NextInBundle = nullptr;
60600b57cec5SDimitry Andric     BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
60610b57cec5SDimitry Andric     if (BundleMember->UnscheduledDepsInBundle == 0) {
60620b57cec5SDimitry Andric       ReadyInsts.insert(BundleMember);
60630b57cec5SDimitry Andric     }
60640b57cec5SDimitry Andric     BundleMember = Next;
60650b57cec5SDimitry Andric   }
60660b57cec5SDimitry Andric }
60670b57cec5SDimitry Andric 
allocateScheduleDataChunks()60680b57cec5SDimitry Andric BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
60690b57cec5SDimitry Andric   // Allocate a new ScheduleData for the instruction.
60700b57cec5SDimitry Andric   if (ChunkPos >= ChunkSize) {
60718bcb0991SDimitry Andric     ScheduleDataChunks.push_back(std::make_unique<ScheduleData[]>(ChunkSize));
60720b57cec5SDimitry Andric     ChunkPos = 0;
60730b57cec5SDimitry Andric   }
60740b57cec5SDimitry Andric   return &(ScheduleDataChunks.back()[ChunkPos++]);
60750b57cec5SDimitry Andric }
60760b57cec5SDimitry Andric 
extendSchedulingRegion(Value * V,const InstructionsState & S)60770b57cec5SDimitry Andric bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
60780b57cec5SDimitry Andric                                                       const InstructionsState &S) {
60790b57cec5SDimitry Andric   if (getScheduleData(V, isOneOf(S, V)))
60800b57cec5SDimitry Andric     return true;
60810b57cec5SDimitry Andric   Instruction *I = dyn_cast<Instruction>(V);
60820b57cec5SDimitry Andric   assert(I && "bundle member must be an instruction");
60835f7ddb14SDimitry Andric   assert(!isa<PHINode>(I) && !isa<InsertElementInst>(I) &&
60845f7ddb14SDimitry Andric          "phi nodes/insertelements don't need to be scheduled");
60850b57cec5SDimitry Andric   auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
60860b57cec5SDimitry Andric     ScheduleData *ISD = getScheduleData(I);
60870b57cec5SDimitry Andric     if (!ISD)
60880b57cec5SDimitry Andric       return false;
60890b57cec5SDimitry Andric     assert(isInSchedulingRegion(ISD) &&
60900b57cec5SDimitry Andric            "ScheduleData not in scheduling region");
60910b57cec5SDimitry Andric     ScheduleData *SD = allocateScheduleDataChunks();
60920b57cec5SDimitry Andric     SD->Inst = I;
60930b57cec5SDimitry Andric     SD->init(SchedulingRegionID, S.OpValue);
60940b57cec5SDimitry Andric     ExtraScheduleDataMap[I][S.OpValue] = SD;
60950b57cec5SDimitry Andric     return true;
60960b57cec5SDimitry Andric   };
60970b57cec5SDimitry Andric   if (CheckSheduleForI(I))
60980b57cec5SDimitry Andric     return true;
60990b57cec5SDimitry Andric   if (!ScheduleStart) {
61000b57cec5SDimitry Andric     // It's the first instruction in the new region.
61010b57cec5SDimitry Andric     initScheduleData(I, I->getNextNode(), nullptr, nullptr);
61020b57cec5SDimitry Andric     ScheduleStart = I;
61030b57cec5SDimitry Andric     ScheduleEnd = I->getNextNode();
61040b57cec5SDimitry Andric     if (isOneOf(S, I) != I)
61050b57cec5SDimitry Andric       CheckSheduleForI(I);
61060b57cec5SDimitry Andric     assert(ScheduleEnd && "tried to vectorize a terminator?");
61070b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
61080b57cec5SDimitry Andric     return true;
61090b57cec5SDimitry Andric   }
61100b57cec5SDimitry Andric   // Search up and down at the same time, because we don't know if the new
61110b57cec5SDimitry Andric   // instruction is above or below the existing scheduling region.
61120b57cec5SDimitry Andric   BasicBlock::reverse_iterator UpIter =
61130b57cec5SDimitry Andric       ++ScheduleStart->getIterator().getReverse();
61140b57cec5SDimitry Andric   BasicBlock::reverse_iterator UpperEnd = BB->rend();
61150b57cec5SDimitry Andric   BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
61160b57cec5SDimitry Andric   BasicBlock::iterator LowerEnd = BB->end();
61175f7ddb14SDimitry Andric   while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
61185f7ddb14SDimitry Andric          &*DownIter != I) {
61190b57cec5SDimitry Andric     if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
61200b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP:  exceeded schedule region size limit\n");
61210b57cec5SDimitry Andric       return false;
61220b57cec5SDimitry Andric     }
61230b57cec5SDimitry Andric 
61245f7ddb14SDimitry Andric     ++UpIter;
61255f7ddb14SDimitry Andric     ++DownIter;
61265f7ddb14SDimitry Andric   }
61275f7ddb14SDimitry Andric   if (DownIter == LowerEnd || (UpIter != UpperEnd && &*UpIter == I)) {
61285f7ddb14SDimitry Andric     assert(I->getParent() == ScheduleStart->getParent() &&
61295f7ddb14SDimitry Andric            "Instruction is in wrong basic block.");
61300b57cec5SDimitry Andric     initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
61310b57cec5SDimitry Andric     ScheduleStart = I;
61320b57cec5SDimitry Andric     if (isOneOf(S, I) != I)
61330b57cec5SDimitry Andric       CheckSheduleForI(I);
61340b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP:  extend schedule region start to " << *I
61350b57cec5SDimitry Andric                       << "\n");
61360b57cec5SDimitry Andric     return true;
61370b57cec5SDimitry Andric   }
61385f7ddb14SDimitry Andric   assert((UpIter == UpperEnd || (DownIter != LowerEnd && &*DownIter == I)) &&
61395f7ddb14SDimitry Andric          "Expected to reach top of the basic block or instruction down the "
61405f7ddb14SDimitry Andric          "lower end.");
61415f7ddb14SDimitry Andric   assert(I->getParent() == ScheduleEnd->getParent() &&
61425f7ddb14SDimitry Andric          "Instruction is in wrong basic block.");
61430b57cec5SDimitry Andric   initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
61440b57cec5SDimitry Andric                    nullptr);
61450b57cec5SDimitry Andric   ScheduleEnd = I->getNextNode();
61460b57cec5SDimitry Andric   if (isOneOf(S, I) != I)
61470b57cec5SDimitry Andric     CheckSheduleForI(I);
61480b57cec5SDimitry Andric   assert(ScheduleEnd && "tried to vectorize a terminator?");
61495f7ddb14SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I << "\n");
61500b57cec5SDimitry Andric   return true;
61510b57cec5SDimitry Andric }
61520b57cec5SDimitry Andric 
initScheduleData(Instruction * FromI,Instruction * ToI,ScheduleData * PrevLoadStore,ScheduleData * NextLoadStore)61530b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
61540b57cec5SDimitry Andric                                                 Instruction *ToI,
61550b57cec5SDimitry Andric                                                 ScheduleData *PrevLoadStore,
61560b57cec5SDimitry Andric                                                 ScheduleData *NextLoadStore) {
61570b57cec5SDimitry Andric   ScheduleData *CurrentLoadStore = PrevLoadStore;
61580b57cec5SDimitry Andric   for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
61590b57cec5SDimitry Andric     ScheduleData *SD = ScheduleDataMap[I];
61600b57cec5SDimitry Andric     if (!SD) {
61610b57cec5SDimitry Andric       SD = allocateScheduleDataChunks();
61620b57cec5SDimitry Andric       ScheduleDataMap[I] = SD;
61630b57cec5SDimitry Andric       SD->Inst = I;
61640b57cec5SDimitry Andric     }
61650b57cec5SDimitry Andric     assert(!isInSchedulingRegion(SD) &&
61660b57cec5SDimitry Andric            "new ScheduleData already in scheduling region");
61670b57cec5SDimitry Andric     SD->init(SchedulingRegionID, I);
61680b57cec5SDimitry Andric 
61690b57cec5SDimitry Andric     if (I->mayReadOrWriteMemory() &&
61700b57cec5SDimitry Andric         (!isa<IntrinsicInst>(I) ||
6171af732203SDimitry Andric          (cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect &&
6172af732203SDimitry Andric           cast<IntrinsicInst>(I)->getIntrinsicID() !=
6173af732203SDimitry Andric               Intrinsic::pseudoprobe))) {
61740b57cec5SDimitry Andric       // Update the linked list of memory accessing instructions.
61750b57cec5SDimitry Andric       if (CurrentLoadStore) {
61760b57cec5SDimitry Andric         CurrentLoadStore->NextLoadStore = SD;
61770b57cec5SDimitry Andric       } else {
61780b57cec5SDimitry Andric         FirstLoadStoreInRegion = SD;
61790b57cec5SDimitry Andric       }
61800b57cec5SDimitry Andric       CurrentLoadStore = SD;
61810b57cec5SDimitry Andric     }
61820b57cec5SDimitry Andric   }
61830b57cec5SDimitry Andric   if (NextLoadStore) {
61840b57cec5SDimitry Andric     if (CurrentLoadStore)
61850b57cec5SDimitry Andric       CurrentLoadStore->NextLoadStore = NextLoadStore;
61860b57cec5SDimitry Andric   } else {
61870b57cec5SDimitry Andric     LastLoadStoreInRegion = CurrentLoadStore;
61880b57cec5SDimitry Andric   }
61890b57cec5SDimitry Andric }
61900b57cec5SDimitry Andric 
calculateDependencies(ScheduleData * SD,bool InsertInReadyList,BoUpSLP * SLP)61910b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
61920b57cec5SDimitry Andric                                                      bool InsertInReadyList,
61930b57cec5SDimitry Andric                                                      BoUpSLP *SLP) {
61940b57cec5SDimitry Andric   assert(SD->isSchedulingEntity());
61950b57cec5SDimitry Andric 
61960b57cec5SDimitry Andric   SmallVector<ScheduleData *, 10> WorkList;
61970b57cec5SDimitry Andric   WorkList.push_back(SD);
61980b57cec5SDimitry Andric 
61990b57cec5SDimitry Andric   while (!WorkList.empty()) {
6200af732203SDimitry Andric     ScheduleData *SD = WorkList.pop_back_val();
62010b57cec5SDimitry Andric 
62020b57cec5SDimitry Andric     ScheduleData *BundleMember = SD;
62030b57cec5SDimitry Andric     while (BundleMember) {
62040b57cec5SDimitry Andric       assert(isInSchedulingRegion(BundleMember));
62050b57cec5SDimitry Andric       if (!BundleMember->hasValidDependencies()) {
62060b57cec5SDimitry Andric 
62070b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember
62080b57cec5SDimitry Andric                           << "\n");
62090b57cec5SDimitry Andric         BundleMember->Dependencies = 0;
62100b57cec5SDimitry Andric         BundleMember->resetUnscheduledDeps();
62110b57cec5SDimitry Andric 
62120b57cec5SDimitry Andric         // Handle def-use chain dependencies.
62130b57cec5SDimitry Andric         if (BundleMember->OpValue != BundleMember->Inst) {
62140b57cec5SDimitry Andric           ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
62150b57cec5SDimitry Andric           if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
62160b57cec5SDimitry Andric             BundleMember->Dependencies++;
62170b57cec5SDimitry Andric             ScheduleData *DestBundle = UseSD->FirstInBundle;
62180b57cec5SDimitry Andric             if (!DestBundle->IsScheduled)
62190b57cec5SDimitry Andric               BundleMember->incrementUnscheduledDeps(1);
62200b57cec5SDimitry Andric             if (!DestBundle->hasValidDependencies())
62210b57cec5SDimitry Andric               WorkList.push_back(DestBundle);
62220b57cec5SDimitry Andric           }
62230b57cec5SDimitry Andric         } else {
62240b57cec5SDimitry Andric           for (User *U : BundleMember->Inst->users()) {
62250b57cec5SDimitry Andric             if (isa<Instruction>(U)) {
62260b57cec5SDimitry Andric               ScheduleData *UseSD = getScheduleData(U);
62270b57cec5SDimitry Andric               if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
62280b57cec5SDimitry Andric                 BundleMember->Dependencies++;
62290b57cec5SDimitry Andric                 ScheduleData *DestBundle = UseSD->FirstInBundle;
62300b57cec5SDimitry Andric                 if (!DestBundle->IsScheduled)
62310b57cec5SDimitry Andric                   BundleMember->incrementUnscheduledDeps(1);
62320b57cec5SDimitry Andric                 if (!DestBundle->hasValidDependencies())
62330b57cec5SDimitry Andric                   WorkList.push_back(DestBundle);
62340b57cec5SDimitry Andric               }
62350b57cec5SDimitry Andric             } else {
62360b57cec5SDimitry Andric               // I'm not sure if this can ever happen. But we need to be safe.
62370b57cec5SDimitry Andric               // This lets the instruction/bundle never be scheduled and
62380b57cec5SDimitry Andric               // eventually disable vectorization.
62390b57cec5SDimitry Andric               BundleMember->Dependencies++;
62400b57cec5SDimitry Andric               BundleMember->incrementUnscheduledDeps(1);
62410b57cec5SDimitry Andric             }
62420b57cec5SDimitry Andric           }
62430b57cec5SDimitry Andric         }
62440b57cec5SDimitry Andric 
62450b57cec5SDimitry Andric         // Handle the memory dependencies.
62460b57cec5SDimitry Andric         ScheduleData *DepDest = BundleMember->NextLoadStore;
62470b57cec5SDimitry Andric         if (DepDest) {
62480b57cec5SDimitry Andric           Instruction *SrcInst = BundleMember->Inst;
62490b57cec5SDimitry Andric           MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
62500b57cec5SDimitry Andric           bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
62510b57cec5SDimitry Andric           unsigned numAliased = 0;
62520b57cec5SDimitry Andric           unsigned DistToSrc = 1;
62530b57cec5SDimitry Andric 
62540b57cec5SDimitry Andric           while (DepDest) {
62550b57cec5SDimitry Andric             assert(isInSchedulingRegion(DepDest));
62560b57cec5SDimitry Andric 
62570b57cec5SDimitry Andric             // We have two limits to reduce the complexity:
62580b57cec5SDimitry Andric             // 1) AliasedCheckLimit: It's a small limit to reduce calls to
62590b57cec5SDimitry Andric             //    SLP->isAliased (which is the expensive part in this loop).
62600b57cec5SDimitry Andric             // 2) MaxMemDepDistance: It's for very large blocks and it aborts
62610b57cec5SDimitry Andric             //    the whole loop (even if the loop is fast, it's quadratic).
62620b57cec5SDimitry Andric             //    It's important for the loop break condition (see below) to
62630b57cec5SDimitry Andric             //    check this limit even between two read-only instructions.
62640b57cec5SDimitry Andric             if (DistToSrc >= MaxMemDepDistance ||
62650b57cec5SDimitry Andric                     ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
62660b57cec5SDimitry Andric                      (numAliased >= AliasedCheckLimit ||
62670b57cec5SDimitry Andric                       SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
62680b57cec5SDimitry Andric 
62690b57cec5SDimitry Andric               // We increment the counter only if the locations are aliased
62700b57cec5SDimitry Andric               // (instead of counting all alias checks). This gives a better
62710b57cec5SDimitry Andric               // balance between reduced runtime and accurate dependencies.
62720b57cec5SDimitry Andric               numAliased++;
62730b57cec5SDimitry Andric 
62740b57cec5SDimitry Andric               DepDest->MemoryDependencies.push_back(BundleMember);
62750b57cec5SDimitry Andric               BundleMember->Dependencies++;
62760b57cec5SDimitry Andric               ScheduleData *DestBundle = DepDest->FirstInBundle;
62770b57cec5SDimitry Andric               if (!DestBundle->IsScheduled) {
62780b57cec5SDimitry Andric                 BundleMember->incrementUnscheduledDeps(1);
62790b57cec5SDimitry Andric               }
62800b57cec5SDimitry Andric               if (!DestBundle->hasValidDependencies()) {
62810b57cec5SDimitry Andric                 WorkList.push_back(DestBundle);
62820b57cec5SDimitry Andric               }
62830b57cec5SDimitry Andric             }
62840b57cec5SDimitry Andric             DepDest = DepDest->NextLoadStore;
62850b57cec5SDimitry Andric 
62860b57cec5SDimitry Andric             // Example, explaining the loop break condition: Let's assume our
62870b57cec5SDimitry Andric             // starting instruction is i0 and MaxMemDepDistance = 3.
62880b57cec5SDimitry Andric             //
62890b57cec5SDimitry Andric             //                      +--------v--v--v
62900b57cec5SDimitry Andric             //             i0,i1,i2,i3,i4,i5,i6,i7,i8
62910b57cec5SDimitry Andric             //             +--------^--^--^
62920b57cec5SDimitry Andric             //
62930b57cec5SDimitry Andric             // MaxMemDepDistance let us stop alias-checking at i3 and we add
62940b57cec5SDimitry Andric             // dependencies from i0 to i3,i4,.. (even if they are not aliased).
62950b57cec5SDimitry Andric             // Previously we already added dependencies from i3 to i6,i7,i8
62960b57cec5SDimitry Andric             // (because of MaxMemDepDistance). As we added a dependency from
62970b57cec5SDimitry Andric             // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
62980b57cec5SDimitry Andric             // and we can abort this loop at i6.
62990b57cec5SDimitry Andric             if (DistToSrc >= 2 * MaxMemDepDistance)
63000b57cec5SDimitry Andric               break;
63010b57cec5SDimitry Andric             DistToSrc++;
63020b57cec5SDimitry Andric           }
63030b57cec5SDimitry Andric         }
63040b57cec5SDimitry Andric       }
63050b57cec5SDimitry Andric       BundleMember = BundleMember->NextInBundle;
63060b57cec5SDimitry Andric     }
63070b57cec5SDimitry Andric     if (InsertInReadyList && SD->isReady()) {
63080b57cec5SDimitry Andric       ReadyInsts.push_back(SD);
63090b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP:     gets ready on update: " << *SD->Inst
63100b57cec5SDimitry Andric                         << "\n");
63110b57cec5SDimitry Andric     }
63120b57cec5SDimitry Andric   }
63130b57cec5SDimitry Andric }
63140b57cec5SDimitry Andric 
resetSchedule()63150b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::resetSchedule() {
63160b57cec5SDimitry Andric   assert(ScheduleStart &&
63170b57cec5SDimitry Andric          "tried to reset schedule on block which has not been scheduled");
63180b57cec5SDimitry Andric   for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
63190b57cec5SDimitry Andric     doForAllOpcodes(I, [&](ScheduleData *SD) {
63200b57cec5SDimitry Andric       assert(isInSchedulingRegion(SD) &&
63210b57cec5SDimitry Andric              "ScheduleData not in scheduling region");
63220b57cec5SDimitry Andric       SD->IsScheduled = false;
63230b57cec5SDimitry Andric       SD->resetUnscheduledDeps();
63240b57cec5SDimitry Andric     });
63250b57cec5SDimitry Andric   }
63260b57cec5SDimitry Andric   ReadyInsts.clear();
63270b57cec5SDimitry Andric }
63280b57cec5SDimitry Andric 
scheduleBlock(BlockScheduling * BS)63290b57cec5SDimitry Andric void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
63300b57cec5SDimitry Andric   if (!BS->ScheduleStart)
63310b57cec5SDimitry Andric     return;
63320b57cec5SDimitry Andric 
63330b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
63340b57cec5SDimitry Andric 
63350b57cec5SDimitry Andric   BS->resetSchedule();
63360b57cec5SDimitry Andric 
63370b57cec5SDimitry Andric   // For the real scheduling we use a more sophisticated ready-list: it is
63380b57cec5SDimitry Andric   // sorted by the original instruction location. This lets the final schedule
63390b57cec5SDimitry Andric   // be as  close as possible to the original instruction order.
63400b57cec5SDimitry Andric   struct ScheduleDataCompare {
63410b57cec5SDimitry Andric     bool operator()(ScheduleData *SD1, ScheduleData *SD2) const {
63420b57cec5SDimitry Andric       return SD2->SchedulingPriority < SD1->SchedulingPriority;
63430b57cec5SDimitry Andric     }
63440b57cec5SDimitry Andric   };
63450b57cec5SDimitry Andric   std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
63460b57cec5SDimitry Andric 
63470b57cec5SDimitry Andric   // Ensure that all dependency data is updated and fill the ready-list with
63480b57cec5SDimitry Andric   // initial instructions.
63490b57cec5SDimitry Andric   int Idx = 0;
63500b57cec5SDimitry Andric   int NumToSchedule = 0;
63510b57cec5SDimitry Andric   for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
63520b57cec5SDimitry Andric        I = I->getNextNode()) {
63530b57cec5SDimitry Andric     BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
63545f7ddb14SDimitry Andric       assert((isa<InsertElementInst>(SD->Inst) ||
63555f7ddb14SDimitry Andric               SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
63560b57cec5SDimitry Andric              "scheduler and vectorizer bundle mismatch");
63570b57cec5SDimitry Andric       SD->FirstInBundle->SchedulingPriority = Idx++;
63580b57cec5SDimitry Andric       if (SD->isSchedulingEntity()) {
63590b57cec5SDimitry Andric         BS->calculateDependencies(SD, false, this);
63600b57cec5SDimitry Andric         NumToSchedule++;
63610b57cec5SDimitry Andric       }
63620b57cec5SDimitry Andric     });
63630b57cec5SDimitry Andric   }
63640b57cec5SDimitry Andric   BS->initialFillReadyList(ReadyInsts);
63650b57cec5SDimitry Andric 
63660b57cec5SDimitry Andric   Instruction *LastScheduledInst = BS->ScheduleEnd;
63670b57cec5SDimitry Andric 
63680b57cec5SDimitry Andric   // Do the "real" scheduling.
63690b57cec5SDimitry Andric   while (!ReadyInsts.empty()) {
63700b57cec5SDimitry Andric     ScheduleData *picked = *ReadyInsts.begin();
63710b57cec5SDimitry Andric     ReadyInsts.erase(ReadyInsts.begin());
63720b57cec5SDimitry Andric 
63730b57cec5SDimitry Andric     // Move the scheduled instruction(s) to their dedicated places, if not
63740b57cec5SDimitry Andric     // there yet.
63750b57cec5SDimitry Andric     ScheduleData *BundleMember = picked;
63760b57cec5SDimitry Andric     while (BundleMember) {
63770b57cec5SDimitry Andric       Instruction *pickedInst = BundleMember->Inst;
63785f7ddb14SDimitry Andric       if (pickedInst->getNextNode() != LastScheduledInst) {
63790b57cec5SDimitry Andric         BS->BB->getInstList().remove(pickedInst);
63800b57cec5SDimitry Andric         BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
63810b57cec5SDimitry Andric                                      pickedInst);
63820b57cec5SDimitry Andric       }
63830b57cec5SDimitry Andric       LastScheduledInst = pickedInst;
63840b57cec5SDimitry Andric       BundleMember = BundleMember->NextInBundle;
63850b57cec5SDimitry Andric     }
63860b57cec5SDimitry Andric 
63870b57cec5SDimitry Andric     BS->schedule(picked, ReadyInsts);
63880b57cec5SDimitry Andric     NumToSchedule--;
63890b57cec5SDimitry Andric   }
63900b57cec5SDimitry Andric   assert(NumToSchedule == 0 && "could not schedule all instructions");
63910b57cec5SDimitry Andric 
63920b57cec5SDimitry Andric   // Avoid duplicate scheduling of the block.
63930b57cec5SDimitry Andric   BS->ScheduleStart = nullptr;
63940b57cec5SDimitry Andric }
63950b57cec5SDimitry Andric 
getVectorElementSize(Value * V)63965ffd83dbSDimitry Andric unsigned BoUpSLP::getVectorElementSize(Value *V) {
6397af732203SDimitry Andric   // If V is a store, just return the width of the stored value (or value
6398af732203SDimitry Andric   // truncated just before storing) without traversing the expression tree.
6399af732203SDimitry Andric   // This is the common case.
6400af732203SDimitry Andric   if (auto *Store = dyn_cast<StoreInst>(V)) {
6401af732203SDimitry Andric     if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
6402af732203SDimitry Andric       return DL->getTypeSizeInBits(Trunc->getSrcTy());
64030b57cec5SDimitry Andric     return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
6404af732203SDimitry Andric   }
64050b57cec5SDimitry Andric 
64065f7ddb14SDimitry Andric   if (auto *IEI = dyn_cast<InsertElementInst>(V))
64075f7ddb14SDimitry Andric     return getVectorElementSize(IEI->getOperand(1));
64085f7ddb14SDimitry Andric 
64095ffd83dbSDimitry Andric   auto E = InstrElementSize.find(V);
64105ffd83dbSDimitry Andric   if (E != InstrElementSize.end())
64115ffd83dbSDimitry Andric     return E->second;
64125ffd83dbSDimitry Andric 
64130b57cec5SDimitry Andric   // If V is not a store, we can traverse the expression tree to find loads
64140b57cec5SDimitry Andric   // that feed it. The type of the loaded value may indicate a more suitable
64150b57cec5SDimitry Andric   // width than V's type. We want to base the vector element size on the width
64160b57cec5SDimitry Andric   // of memory operations where possible.
64175f7ddb14SDimitry Andric   SmallVector<std::pair<Instruction *, BasicBlock *>, 16> Worklist;
64180b57cec5SDimitry Andric   SmallPtrSet<Instruction *, 16> Visited;
64195ffd83dbSDimitry Andric   if (auto *I = dyn_cast<Instruction>(V)) {
64205f7ddb14SDimitry Andric     Worklist.emplace_back(I, I->getParent());
64215ffd83dbSDimitry Andric     Visited.insert(I);
64225ffd83dbSDimitry Andric   }
64230b57cec5SDimitry Andric 
64240b57cec5SDimitry Andric   // Traverse the expression tree in bottom-up order looking for loads. If we
64250b57cec5SDimitry Andric   // encounter an instruction we don't yet handle, we give up.
64265f7ddb14SDimitry Andric   auto Width = 0u;
64275f7ddb14SDimitry Andric   while (!Worklist.empty()) {
64285f7ddb14SDimitry Andric     Instruction *I;
64295f7ddb14SDimitry Andric     BasicBlock *Parent;
64305f7ddb14SDimitry Andric     std::tie(I, Parent) = Worklist.pop_back_val();
64310b57cec5SDimitry Andric 
64320b57cec5SDimitry Andric     // We should only be looking at scalar instructions here. If the current
64335f7ddb14SDimitry Andric     // instruction has a vector type, skip.
64340b57cec5SDimitry Andric     auto *Ty = I->getType();
64350b57cec5SDimitry Andric     if (isa<VectorType>(Ty))
64365f7ddb14SDimitry Andric       continue;
64370b57cec5SDimitry Andric 
64380b57cec5SDimitry Andric     // If the current instruction is a load, update MaxWidth to reflect the
64390b57cec5SDimitry Andric     // width of the loaded value.
64405f7ddb14SDimitry Andric     if (isa<LoadInst>(I) || isa<ExtractElementInst>(I) ||
64415f7ddb14SDimitry Andric         isa<ExtractValueInst>(I))
64425f7ddb14SDimitry Andric       Width = std::max<unsigned>(Width, DL->getTypeSizeInBits(Ty));
64430b57cec5SDimitry Andric 
64440b57cec5SDimitry Andric     // Otherwise, we need to visit the operands of the instruction. We only
64450b57cec5SDimitry Andric     // handle the interesting cases from buildTree here. If an operand is an
64465f7ddb14SDimitry Andric     // instruction we haven't yet visited and from the same basic block as the
64475f7ddb14SDimitry Andric     // user or the use is a PHI node, we add it to the worklist.
64480b57cec5SDimitry Andric     else if (isa<PHINode>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
64495f7ddb14SDimitry Andric              isa<CmpInst>(I) || isa<SelectInst>(I) || isa<BinaryOperator>(I) ||
64505f7ddb14SDimitry Andric              isa<UnaryOperator>(I)) {
64510b57cec5SDimitry Andric       for (Use &U : I->operands())
64520b57cec5SDimitry Andric         if (auto *J = dyn_cast<Instruction>(U.get()))
64535f7ddb14SDimitry Andric           if (Visited.insert(J).second &&
64545f7ddb14SDimitry Andric               (isa<PHINode>(I) || J->getParent() == Parent))
64555f7ddb14SDimitry Andric             Worklist.emplace_back(J, J->getParent());
64565f7ddb14SDimitry Andric     } else {
64575f7ddb14SDimitry Andric       break;
64585f7ddb14SDimitry Andric     }
64590b57cec5SDimitry Andric   }
64600b57cec5SDimitry Andric 
64610b57cec5SDimitry Andric   // If we didn't encounter a memory access in the expression tree, or if we
64625ffd83dbSDimitry Andric   // gave up for some reason, just return the width of V. Otherwise, return the
64635ffd83dbSDimitry Andric   // maximum width we found.
64645f7ddb14SDimitry Andric   if (!Width) {
64655f7ddb14SDimitry Andric     if (auto *CI = dyn_cast<CmpInst>(V))
64665f7ddb14SDimitry Andric       V = CI->getOperand(0);
64675ffd83dbSDimitry Andric     Width = DL->getTypeSizeInBits(V->getType());
64685f7ddb14SDimitry Andric   }
64690b57cec5SDimitry Andric 
64705ffd83dbSDimitry Andric   for (Instruction *I : Visited)
64715ffd83dbSDimitry Andric     InstrElementSize[I] = Width;
64725ffd83dbSDimitry Andric 
64735ffd83dbSDimitry Andric   return Width;
64740b57cec5SDimitry Andric }
64750b57cec5SDimitry Andric 
64760b57cec5SDimitry Andric // Determine if a value V in a vectorizable expression Expr can be demoted to a
64770b57cec5SDimitry Andric // smaller type with a truncation. We collect the values that will be demoted
64780b57cec5SDimitry Andric // in ToDemote and additional roots that require investigating in Roots.
collectValuesToDemote(Value * V,SmallPtrSetImpl<Value * > & Expr,SmallVectorImpl<Value * > & ToDemote,SmallVectorImpl<Value * > & Roots)64790b57cec5SDimitry Andric static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
64800b57cec5SDimitry Andric                                   SmallVectorImpl<Value *> &ToDemote,
64810b57cec5SDimitry Andric                                   SmallVectorImpl<Value *> &Roots) {
64820b57cec5SDimitry Andric   // We can always demote constants.
64830b57cec5SDimitry Andric   if (isa<Constant>(V)) {
64840b57cec5SDimitry Andric     ToDemote.push_back(V);
64850b57cec5SDimitry Andric     return true;
64860b57cec5SDimitry Andric   }
64870b57cec5SDimitry Andric 
64880b57cec5SDimitry Andric   // If the value is not an instruction in the expression with only one use, it
64890b57cec5SDimitry Andric   // cannot be demoted.
64900b57cec5SDimitry Andric   auto *I = dyn_cast<Instruction>(V);
64910b57cec5SDimitry Andric   if (!I || !I->hasOneUse() || !Expr.count(I))
64920b57cec5SDimitry Andric     return false;
64930b57cec5SDimitry Andric 
64940b57cec5SDimitry Andric   switch (I->getOpcode()) {
64950b57cec5SDimitry Andric 
64960b57cec5SDimitry Andric   // We can always demote truncations and extensions. Since truncations can
64970b57cec5SDimitry Andric   // seed additional demotion, we save the truncated value.
64980b57cec5SDimitry Andric   case Instruction::Trunc:
64990b57cec5SDimitry Andric     Roots.push_back(I->getOperand(0));
65000b57cec5SDimitry Andric     break;
65010b57cec5SDimitry Andric   case Instruction::ZExt:
65020b57cec5SDimitry Andric   case Instruction::SExt:
65035f7ddb14SDimitry Andric     if (isa<ExtractElementInst>(I->getOperand(0)) ||
65045f7ddb14SDimitry Andric         isa<InsertElementInst>(I->getOperand(0)))
65055f7ddb14SDimitry Andric       return false;
65060b57cec5SDimitry Andric     break;
65070b57cec5SDimitry Andric 
65080b57cec5SDimitry Andric   // We can demote certain binary operations if we can demote both of their
65090b57cec5SDimitry Andric   // operands.
65100b57cec5SDimitry Andric   case Instruction::Add:
65110b57cec5SDimitry Andric   case Instruction::Sub:
65120b57cec5SDimitry Andric   case Instruction::Mul:
65130b57cec5SDimitry Andric   case Instruction::And:
65140b57cec5SDimitry Andric   case Instruction::Or:
65150b57cec5SDimitry Andric   case Instruction::Xor:
65160b57cec5SDimitry Andric     if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
65170b57cec5SDimitry Andric         !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
65180b57cec5SDimitry Andric       return false;
65190b57cec5SDimitry Andric     break;
65200b57cec5SDimitry Andric 
65210b57cec5SDimitry Andric   // We can demote selects if we can demote their true and false values.
65220b57cec5SDimitry Andric   case Instruction::Select: {
65230b57cec5SDimitry Andric     SelectInst *SI = cast<SelectInst>(I);
65240b57cec5SDimitry Andric     if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
65250b57cec5SDimitry Andric         !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
65260b57cec5SDimitry Andric       return false;
65270b57cec5SDimitry Andric     break;
65280b57cec5SDimitry Andric   }
65290b57cec5SDimitry Andric 
65300b57cec5SDimitry Andric   // We can demote phis if we can demote all their incoming operands. Note that
65310b57cec5SDimitry Andric   // we don't need to worry about cycles since we ensure single use above.
65320b57cec5SDimitry Andric   case Instruction::PHI: {
65330b57cec5SDimitry Andric     PHINode *PN = cast<PHINode>(I);
65340b57cec5SDimitry Andric     for (Value *IncValue : PN->incoming_values())
65350b57cec5SDimitry Andric       if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
65360b57cec5SDimitry Andric         return false;
65370b57cec5SDimitry Andric     break;
65380b57cec5SDimitry Andric   }
65390b57cec5SDimitry Andric 
65400b57cec5SDimitry Andric   // Otherwise, conservatively give up.
65410b57cec5SDimitry Andric   default:
65420b57cec5SDimitry Andric     return false;
65430b57cec5SDimitry Andric   }
65440b57cec5SDimitry Andric 
65450b57cec5SDimitry Andric   // Record the value that we can demote.
65460b57cec5SDimitry Andric   ToDemote.push_back(V);
65470b57cec5SDimitry Andric   return true;
65480b57cec5SDimitry Andric }
65490b57cec5SDimitry Andric 
computeMinimumValueSizes()65500b57cec5SDimitry Andric void BoUpSLP::computeMinimumValueSizes() {
65510b57cec5SDimitry Andric   // If there are no external uses, the expression tree must be rooted by a
65520b57cec5SDimitry Andric   // store. We can't demote in-memory values, so there is nothing to do here.
65530b57cec5SDimitry Andric   if (ExternalUses.empty())
65540b57cec5SDimitry Andric     return;
65550b57cec5SDimitry Andric 
65560b57cec5SDimitry Andric   // We only attempt to truncate integer expressions.
65570b57cec5SDimitry Andric   auto &TreeRoot = VectorizableTree[0]->Scalars;
65580b57cec5SDimitry Andric   auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
65590b57cec5SDimitry Andric   if (!TreeRootIT)
65600b57cec5SDimitry Andric     return;
65610b57cec5SDimitry Andric 
65620b57cec5SDimitry Andric   // If the expression is not rooted by a store, these roots should have
65630b57cec5SDimitry Andric   // external uses. We will rely on InstCombine to rewrite the expression in
65640b57cec5SDimitry Andric   // the narrower type. However, InstCombine only rewrites single-use values.
65650b57cec5SDimitry Andric   // This means that if a tree entry other than a root is used externally, it
65660b57cec5SDimitry Andric   // must have multiple uses and InstCombine will not rewrite it. The code
65670b57cec5SDimitry Andric   // below ensures that only the roots are used externally.
65680b57cec5SDimitry Andric   SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
65690b57cec5SDimitry Andric   for (auto &EU : ExternalUses)
65700b57cec5SDimitry Andric     if (!Expr.erase(EU.Scalar))
65710b57cec5SDimitry Andric       return;
65720b57cec5SDimitry Andric   if (!Expr.empty())
65730b57cec5SDimitry Andric     return;
65740b57cec5SDimitry Andric 
65750b57cec5SDimitry Andric   // Collect the scalar values of the vectorizable expression. We will use this
65760b57cec5SDimitry Andric   // context to determine which values can be demoted. If we see a truncation,
65770b57cec5SDimitry Andric   // we mark it as seeding another demotion.
65780b57cec5SDimitry Andric   for (auto &EntryPtr : VectorizableTree)
65790b57cec5SDimitry Andric     Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
65800b57cec5SDimitry Andric 
65810b57cec5SDimitry Andric   // Ensure the roots of the vectorizable tree don't form a cycle. They must
65820b57cec5SDimitry Andric   // have a single external user that is not in the vectorizable tree.
65830b57cec5SDimitry Andric   for (auto *Root : TreeRoot)
65840b57cec5SDimitry Andric     if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
65850b57cec5SDimitry Andric       return;
65860b57cec5SDimitry Andric 
65870b57cec5SDimitry Andric   // Conservatively determine if we can actually truncate the roots of the
65880b57cec5SDimitry Andric   // expression. Collect the values that can be demoted in ToDemote and
65890b57cec5SDimitry Andric   // additional roots that require investigating in Roots.
65900b57cec5SDimitry Andric   SmallVector<Value *, 32> ToDemote;
65910b57cec5SDimitry Andric   SmallVector<Value *, 4> Roots;
65920b57cec5SDimitry Andric   for (auto *Root : TreeRoot)
65930b57cec5SDimitry Andric     if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
65940b57cec5SDimitry Andric       return;
65950b57cec5SDimitry Andric 
65960b57cec5SDimitry Andric   // The maximum bit width required to represent all the values that can be
65970b57cec5SDimitry Andric   // demoted without loss of precision. It would be safe to truncate the roots
65980b57cec5SDimitry Andric   // of the expression to this width.
65990b57cec5SDimitry Andric   auto MaxBitWidth = 8u;
66000b57cec5SDimitry Andric 
66010b57cec5SDimitry Andric   // We first check if all the bits of the roots are demanded. If they're not,
66020b57cec5SDimitry Andric   // we can truncate the roots to this narrower type.
66030b57cec5SDimitry Andric   for (auto *Root : TreeRoot) {
66040b57cec5SDimitry Andric     auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
66050b57cec5SDimitry Andric     MaxBitWidth = std::max<unsigned>(
66060b57cec5SDimitry Andric         Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
66070b57cec5SDimitry Andric   }
66080b57cec5SDimitry Andric 
66090b57cec5SDimitry Andric   // True if the roots can be zero-extended back to their original type, rather
66100b57cec5SDimitry Andric   // than sign-extended. We know that if the leading bits are not demanded, we
66110b57cec5SDimitry Andric   // can safely zero-extend. So we initialize IsKnownPositive to True.
66120b57cec5SDimitry Andric   bool IsKnownPositive = true;
66130b57cec5SDimitry Andric 
66140b57cec5SDimitry Andric   // If all the bits of the roots are demanded, we can try a little harder to
66150b57cec5SDimitry Andric   // compute a narrower type. This can happen, for example, if the roots are
66160b57cec5SDimitry Andric   // getelementptr indices. InstCombine promotes these indices to the pointer
66170b57cec5SDimitry Andric   // width. Thus, all their bits are technically demanded even though the
66180b57cec5SDimitry Andric   // address computation might be vectorized in a smaller type.
66190b57cec5SDimitry Andric   //
66200b57cec5SDimitry Andric   // We start by looking at each entry that can be demoted. We compute the
66210b57cec5SDimitry Andric   // maximum bit width required to store the scalar by using ValueTracking to
66220b57cec5SDimitry Andric   // compute the number of high-order bits we can truncate.
66230b57cec5SDimitry Andric   if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
66240b57cec5SDimitry Andric       llvm::all_of(TreeRoot, [](Value *R) {
66250b57cec5SDimitry Andric         assert(R->hasOneUse() && "Root should have only one use!");
66260b57cec5SDimitry Andric         return isa<GetElementPtrInst>(R->user_back());
66270b57cec5SDimitry Andric       })) {
66280b57cec5SDimitry Andric     MaxBitWidth = 8u;
66290b57cec5SDimitry Andric 
66300b57cec5SDimitry Andric     // Determine if the sign bit of all the roots is known to be zero. If not,
66310b57cec5SDimitry Andric     // IsKnownPositive is set to False.
66320b57cec5SDimitry Andric     IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) {
66330b57cec5SDimitry Andric       KnownBits Known = computeKnownBits(R, *DL);
66340b57cec5SDimitry Andric       return Known.isNonNegative();
66350b57cec5SDimitry Andric     });
66360b57cec5SDimitry Andric 
66370b57cec5SDimitry Andric     // Determine the maximum number of bits required to store the scalar
66380b57cec5SDimitry Andric     // values.
66390b57cec5SDimitry Andric     for (auto *Scalar : ToDemote) {
66400b57cec5SDimitry Andric       auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT);
66410b57cec5SDimitry Andric       auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
66420b57cec5SDimitry Andric       MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
66430b57cec5SDimitry Andric     }
66440b57cec5SDimitry Andric 
66450b57cec5SDimitry Andric     // If we can't prove that the sign bit is zero, we must add one to the
66460b57cec5SDimitry Andric     // maximum bit width to account for the unknown sign bit. This preserves
66470b57cec5SDimitry Andric     // the existing sign bit so we can safely sign-extend the root back to the
66480b57cec5SDimitry Andric     // original type. Otherwise, if we know the sign bit is zero, we will
66490b57cec5SDimitry Andric     // zero-extend the root instead.
66500b57cec5SDimitry Andric     //
66510b57cec5SDimitry Andric     // FIXME: This is somewhat suboptimal, as there will be cases where adding
66520b57cec5SDimitry Andric     //        one to the maximum bit width will yield a larger-than-necessary
66530b57cec5SDimitry Andric     //        type. In general, we need to add an extra bit only if we can't
66540b57cec5SDimitry Andric     //        prove that the upper bit of the original type is equal to the
66550b57cec5SDimitry Andric     //        upper bit of the proposed smaller type. If these two bits are the
66560b57cec5SDimitry Andric     //        same (either zero or one) we know that sign-extending from the
66570b57cec5SDimitry Andric     //        smaller type will result in the same value. Here, since we can't
66580b57cec5SDimitry Andric     //        yet prove this, we are just making the proposed smaller type
66590b57cec5SDimitry Andric     //        larger to ensure correctness.
66600b57cec5SDimitry Andric     if (!IsKnownPositive)
66610b57cec5SDimitry Andric       ++MaxBitWidth;
66620b57cec5SDimitry Andric   }
66630b57cec5SDimitry Andric 
66640b57cec5SDimitry Andric   // Round MaxBitWidth up to the next power-of-two.
66650b57cec5SDimitry Andric   if (!isPowerOf2_64(MaxBitWidth))
66660b57cec5SDimitry Andric     MaxBitWidth = NextPowerOf2(MaxBitWidth);
66670b57cec5SDimitry Andric 
66680b57cec5SDimitry Andric   // If the maximum bit width we compute is less than the with of the roots'
66690b57cec5SDimitry Andric   // type, we can proceed with the narrowing. Otherwise, do nothing.
66700b57cec5SDimitry Andric   if (MaxBitWidth >= TreeRootIT->getBitWidth())
66710b57cec5SDimitry Andric     return;
66720b57cec5SDimitry Andric 
66730b57cec5SDimitry Andric   // If we can truncate the root, we must collect additional values that might
66740b57cec5SDimitry Andric   // be demoted as a result. That is, those seeded by truncations we will
66750b57cec5SDimitry Andric   // modify.
66760b57cec5SDimitry Andric   while (!Roots.empty())
66770b57cec5SDimitry Andric     collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
66780b57cec5SDimitry Andric 
66790b57cec5SDimitry Andric   // Finally, map the values we can demote to the maximum bit with we computed.
66800b57cec5SDimitry Andric   for (auto *Scalar : ToDemote)
66810b57cec5SDimitry Andric     MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
66820b57cec5SDimitry Andric }
66830b57cec5SDimitry Andric 
66840b57cec5SDimitry Andric namespace {
66850b57cec5SDimitry Andric 
66860b57cec5SDimitry Andric /// The SLPVectorizer Pass.
66870b57cec5SDimitry Andric struct SLPVectorizer : public FunctionPass {
66880b57cec5SDimitry Andric   SLPVectorizerPass Impl;
66890b57cec5SDimitry Andric 
66900b57cec5SDimitry Andric   /// Pass identification, replacement for typeid
66910b57cec5SDimitry Andric   static char ID;
66920b57cec5SDimitry Andric 
SLPVectorizer__anon75ab86282511::SLPVectorizer66930b57cec5SDimitry Andric   explicit SLPVectorizer() : FunctionPass(ID) {
66940b57cec5SDimitry Andric     initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
66950b57cec5SDimitry Andric   }
66960b57cec5SDimitry Andric 
doInitialization__anon75ab86282511::SLPVectorizer66970b57cec5SDimitry Andric   bool doInitialization(Module &M) override {
66980b57cec5SDimitry Andric     return false;
66990b57cec5SDimitry Andric   }
67000b57cec5SDimitry Andric 
runOnFunction__anon75ab86282511::SLPVectorizer67010b57cec5SDimitry Andric   bool runOnFunction(Function &F) override {
67020b57cec5SDimitry Andric     if (skipFunction(F))
67030b57cec5SDimitry Andric       return false;
67040b57cec5SDimitry Andric 
67050b57cec5SDimitry Andric     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
67060b57cec5SDimitry Andric     auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
67070b57cec5SDimitry Andric     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
67088bcb0991SDimitry Andric     auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
67090b57cec5SDimitry Andric     auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
67100b57cec5SDimitry Andric     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
67110b57cec5SDimitry Andric     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
67120b57cec5SDimitry Andric     auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
67130b57cec5SDimitry Andric     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
67140b57cec5SDimitry Andric     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
67150b57cec5SDimitry Andric 
67160b57cec5SDimitry Andric     return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
67170b57cec5SDimitry Andric   }
67180b57cec5SDimitry Andric 
getAnalysisUsage__anon75ab86282511::SLPVectorizer67190b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
67200b57cec5SDimitry Andric     FunctionPass::getAnalysisUsage(AU);
67210b57cec5SDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
67220b57cec5SDimitry Andric     AU.addRequired<ScalarEvolutionWrapperPass>();
67230b57cec5SDimitry Andric     AU.addRequired<AAResultsWrapperPass>();
67240b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
67250b57cec5SDimitry Andric     AU.addRequired<LoopInfoWrapperPass>();
67260b57cec5SDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
67270b57cec5SDimitry Andric     AU.addRequired<DemandedBitsWrapperPass>();
67280b57cec5SDimitry Andric     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
67295ffd83dbSDimitry Andric     AU.addRequired<InjectTLIMappingsLegacy>();
67300b57cec5SDimitry Andric     AU.addPreserved<LoopInfoWrapperPass>();
67310b57cec5SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
67320b57cec5SDimitry Andric     AU.addPreserved<AAResultsWrapperPass>();
67330b57cec5SDimitry Andric     AU.addPreserved<GlobalsAAWrapperPass>();
67340b57cec5SDimitry Andric     AU.setPreservesCFG();
67350b57cec5SDimitry Andric   }
67360b57cec5SDimitry Andric };
67370b57cec5SDimitry Andric 
67380b57cec5SDimitry Andric } // end anonymous namespace
67390b57cec5SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)67400b57cec5SDimitry Andric PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
67410b57cec5SDimitry Andric   auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
67420b57cec5SDimitry Andric   auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
67430b57cec5SDimitry Andric   auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
67440b57cec5SDimitry Andric   auto *AA = &AM.getResult<AAManager>(F);
67450b57cec5SDimitry Andric   auto *LI = &AM.getResult<LoopAnalysis>(F);
67460b57cec5SDimitry Andric   auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
67470b57cec5SDimitry Andric   auto *AC = &AM.getResult<AssumptionAnalysis>(F);
67480b57cec5SDimitry Andric   auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
67490b57cec5SDimitry Andric   auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
67500b57cec5SDimitry Andric 
67510b57cec5SDimitry Andric   bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
67520b57cec5SDimitry Andric   if (!Changed)
67530b57cec5SDimitry Andric     return PreservedAnalyses::all();
67540b57cec5SDimitry Andric 
67550b57cec5SDimitry Andric   PreservedAnalyses PA;
67560b57cec5SDimitry Andric   PA.preserveSet<CFGAnalyses>();
67570b57cec5SDimitry Andric   return PA;
67580b57cec5SDimitry Andric }
67590b57cec5SDimitry Andric 
runImpl(Function & F,ScalarEvolution * SE_,TargetTransformInfo * TTI_,TargetLibraryInfo * TLI_,AAResults * AA_,LoopInfo * LI_,DominatorTree * DT_,AssumptionCache * AC_,DemandedBits * DB_,OptimizationRemarkEmitter * ORE_)67600b57cec5SDimitry Andric bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
67610b57cec5SDimitry Andric                                 TargetTransformInfo *TTI_,
6762af732203SDimitry Andric                                 TargetLibraryInfo *TLI_, AAResults *AA_,
67630b57cec5SDimitry Andric                                 LoopInfo *LI_, DominatorTree *DT_,
67640b57cec5SDimitry Andric                                 AssumptionCache *AC_, DemandedBits *DB_,
67650b57cec5SDimitry Andric                                 OptimizationRemarkEmitter *ORE_) {
67665ffd83dbSDimitry Andric   if (!RunSLPVectorization)
67675ffd83dbSDimitry Andric     return false;
67680b57cec5SDimitry Andric   SE = SE_;
67690b57cec5SDimitry Andric   TTI = TTI_;
67700b57cec5SDimitry Andric   TLI = TLI_;
67710b57cec5SDimitry Andric   AA = AA_;
67720b57cec5SDimitry Andric   LI = LI_;
67730b57cec5SDimitry Andric   DT = DT_;
67740b57cec5SDimitry Andric   AC = AC_;
67750b57cec5SDimitry Andric   DB = DB_;
67760b57cec5SDimitry Andric   DL = &F.getParent()->getDataLayout();
67770b57cec5SDimitry Andric 
67780b57cec5SDimitry Andric   Stores.clear();
67790b57cec5SDimitry Andric   GEPs.clear();
67800b57cec5SDimitry Andric   bool Changed = false;
67810b57cec5SDimitry Andric 
67820b57cec5SDimitry Andric   // If the target claims to have no vector registers don't attempt
67830b57cec5SDimitry Andric   // vectorization.
67848bcb0991SDimitry Andric   if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
67850b57cec5SDimitry Andric     return false;
67860b57cec5SDimitry Andric 
67870b57cec5SDimitry Andric   // Don't vectorize when the attribute NoImplicitFloat is used.
67880b57cec5SDimitry Andric   if (F.hasFnAttribute(Attribute::NoImplicitFloat))
67890b57cec5SDimitry Andric     return false;
67900b57cec5SDimitry Andric 
67910b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
67920b57cec5SDimitry Andric 
67930b57cec5SDimitry Andric   // Use the bottom up slp vectorizer to construct chains that start with
67940b57cec5SDimitry Andric   // store instructions.
67950b57cec5SDimitry Andric   BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_);
67960b57cec5SDimitry Andric 
67970b57cec5SDimitry Andric   // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
67980b57cec5SDimitry Andric   // delete instructions.
67990b57cec5SDimitry Andric 
68005f7ddb14SDimitry Andric   // Update DFS numbers now so that we can use them for ordering.
68015f7ddb14SDimitry Andric   DT->updateDFSNumbers();
68025f7ddb14SDimitry Andric 
68030b57cec5SDimitry Andric   // Scan the blocks in the function in post order.
68040b57cec5SDimitry Andric   for (auto BB : post_order(&F.getEntryBlock())) {
68050b57cec5SDimitry Andric     collectSeedInstructions(BB);
68060b57cec5SDimitry Andric 
68070b57cec5SDimitry Andric     // Vectorize trees that end at stores.
68080b57cec5SDimitry Andric     if (!Stores.empty()) {
68090b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
68100b57cec5SDimitry Andric                         << " underlying objects.\n");
68110b57cec5SDimitry Andric       Changed |= vectorizeStoreChains(R);
68120b57cec5SDimitry Andric     }
68130b57cec5SDimitry Andric 
68140b57cec5SDimitry Andric     // Vectorize trees that end at reductions.
68150b57cec5SDimitry Andric     Changed |= vectorizeChainsInBlock(BB, R);
68160b57cec5SDimitry Andric 
68170b57cec5SDimitry Andric     // Vectorize the index computations of getelementptr instructions. This
68180b57cec5SDimitry Andric     // is primarily intended to catch gather-like idioms ending at
68190b57cec5SDimitry Andric     // non-consecutive loads.
68200b57cec5SDimitry Andric     if (!GEPs.empty()) {
68210b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
68220b57cec5SDimitry Andric                         << " underlying objects.\n");
68230b57cec5SDimitry Andric       Changed |= vectorizeGEPIndices(BB, R);
68240b57cec5SDimitry Andric     }
68250b57cec5SDimitry Andric   }
68260b57cec5SDimitry Andric 
68270b57cec5SDimitry Andric   if (Changed) {
68280b57cec5SDimitry Andric     R.optimizeGatherSequence();
68290b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
68300b57cec5SDimitry Andric   }
68310b57cec5SDimitry Andric   return Changed;
68320b57cec5SDimitry Andric }
68330b57cec5SDimitry Andric 
68345f7ddb14SDimitry Andric /// Order may have elements assigned special value (size) which is out of
68355f7ddb14SDimitry Andric /// bounds. Such indices only appear on places which correspond to undef values
68365f7ddb14SDimitry Andric /// (see canReuseExtract for details) and used in order to avoid undef values
68375f7ddb14SDimitry Andric /// have effect on operands ordering.
68385f7ddb14SDimitry Andric /// The first loop below simply finds all unused indices and then the next loop
68395f7ddb14SDimitry Andric /// nest assigns these indices for undef values positions.
68405f7ddb14SDimitry Andric /// As an example below Order has two undef positions and they have assigned
68415f7ddb14SDimitry Andric /// values 3 and 7 respectively:
68425f7ddb14SDimitry Andric /// before:  6 9 5 4 9 2 1 0
68435f7ddb14SDimitry Andric /// after:   6 3 5 4 7 2 1 0
68445f7ddb14SDimitry Andric /// \returns Fixed ordering.
fixupOrderingIndices(ArrayRef<unsigned> Order)68455f7ddb14SDimitry Andric static BoUpSLP::OrdersType fixupOrderingIndices(ArrayRef<unsigned> Order) {
68465f7ddb14SDimitry Andric   BoUpSLP::OrdersType NewOrder(Order.begin(), Order.end());
68475f7ddb14SDimitry Andric   const unsigned Sz = NewOrder.size();
68485f7ddb14SDimitry Andric   SmallBitVector UsedIndices(Sz);
68495f7ddb14SDimitry Andric   SmallVector<int> MaskedIndices;
68505f7ddb14SDimitry Andric   for (int I = 0, E = NewOrder.size(); I < E; ++I) {
68515f7ddb14SDimitry Andric     if (NewOrder[I] < Sz)
68525f7ddb14SDimitry Andric       UsedIndices.set(NewOrder[I]);
68535f7ddb14SDimitry Andric     else
68545f7ddb14SDimitry Andric       MaskedIndices.push_back(I);
68555f7ddb14SDimitry Andric   }
68565f7ddb14SDimitry Andric   if (MaskedIndices.empty())
68575f7ddb14SDimitry Andric     return NewOrder;
68585f7ddb14SDimitry Andric   SmallVector<int> AvailableIndices(MaskedIndices.size());
68595f7ddb14SDimitry Andric   unsigned Cnt = 0;
68605f7ddb14SDimitry Andric   int Idx = UsedIndices.find_first();
68615f7ddb14SDimitry Andric   do {
68625f7ddb14SDimitry Andric     AvailableIndices[Cnt] = Idx;
68635f7ddb14SDimitry Andric     Idx = UsedIndices.find_next(Idx);
68645f7ddb14SDimitry Andric     ++Cnt;
68655f7ddb14SDimitry Andric   } while (Idx > 0);
68665f7ddb14SDimitry Andric   assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
68675f7ddb14SDimitry Andric   for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
68685f7ddb14SDimitry Andric     NewOrder[MaskedIndices[I]] = AvailableIndices[I];
68695f7ddb14SDimitry Andric   return NewOrder;
68705f7ddb14SDimitry Andric }
68715f7ddb14SDimitry Andric 
vectorizeStoreChain(ArrayRef<Value * > Chain,BoUpSLP & R,unsigned Idx)68720b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
6873480093f4SDimitry Andric                                             unsigned Idx) {
6874480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
68750b57cec5SDimitry Andric                     << "\n");
68760b57cec5SDimitry Andric   const unsigned Sz = R.getVectorElementSize(Chain[0]);
6877480093f4SDimitry Andric   const unsigned MinVF = R.getMinVecRegSize() / Sz;
6878480093f4SDimitry Andric   unsigned VF = Chain.size();
68790b57cec5SDimitry Andric 
6880480093f4SDimitry Andric   if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
68810b57cec5SDimitry Andric     return false;
68820b57cec5SDimitry Andric 
6883480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
68840b57cec5SDimitry Andric                     << "\n");
68850b57cec5SDimitry Andric 
6886480093f4SDimitry Andric   R.buildTree(Chain);
6887480093f4SDimitry Andric   Optional<ArrayRef<unsigned>> Order = R.bestOrder();
6888480093f4SDimitry Andric   // TODO: Handle orders of size less than number of elements in the vector.
6889480093f4SDimitry Andric   if (Order && Order->size() == Chain.size()) {
6890480093f4SDimitry Andric     // TODO: reorder tree nodes without tree rebuilding.
68915f7ddb14SDimitry Andric     SmallVector<Value *, 4> ReorderedOps(Chain.size());
68925f7ddb14SDimitry Andric     transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
6893480093f4SDimitry Andric               [Chain](const unsigned Idx) { return Chain[Idx]; });
6894480093f4SDimitry Andric     R.buildTree(ReorderedOps);
6895480093f4SDimitry Andric   }
68960b57cec5SDimitry Andric   if (R.isTreeTinyAndNotFullyVectorizable())
6897480093f4SDimitry Andric     return false;
68985ffd83dbSDimitry Andric   if (R.isLoadCombineCandidate())
68995ffd83dbSDimitry Andric     return false;
69000b57cec5SDimitry Andric 
69010b57cec5SDimitry Andric   R.computeMinimumValueSizes();
69020b57cec5SDimitry Andric 
6903af732203SDimitry Andric   InstructionCost Cost = R.getTreeCost();
69040b57cec5SDimitry Andric 
6905480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n");
69060b57cec5SDimitry Andric   if (Cost < -SLPCostThreshold) {
69070b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");
69080b57cec5SDimitry Andric 
69090b57cec5SDimitry Andric     using namespace ore;
69100b57cec5SDimitry Andric 
69110b57cec5SDimitry Andric     R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
6912480093f4SDimitry Andric                                         cast<StoreInst>(Chain[0]))
69130b57cec5SDimitry Andric                      << "Stores SLP vectorized with cost " << NV("Cost", Cost)
69140b57cec5SDimitry Andric                      << " and with tree size "
69150b57cec5SDimitry Andric                      << NV("TreeSize", R.getTreeSize()));
69160b57cec5SDimitry Andric 
69170b57cec5SDimitry Andric     R.vectorizeTree();
6918480093f4SDimitry Andric     return true;
69190b57cec5SDimitry Andric   }
69200b57cec5SDimitry Andric 
6921480093f4SDimitry Andric   return false;
69220b57cec5SDimitry Andric }
69230b57cec5SDimitry Andric 
vectorizeStores(ArrayRef<StoreInst * > Stores,BoUpSLP & R)69240b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
69250b57cec5SDimitry Andric                                         BoUpSLP &R) {
69260b57cec5SDimitry Andric   // We may run into multiple chains that merge into a single chain. We mark the
69270b57cec5SDimitry Andric   // stores that we vectorized so that we don't visit the same store twice.
69280b57cec5SDimitry Andric   BoUpSLP::ValueSet VectorizedStores;
69290b57cec5SDimitry Andric   bool Changed = false;
69300b57cec5SDimitry Andric 
6931480093f4SDimitry Andric   int E = Stores.size();
6932480093f4SDimitry Andric   SmallBitVector Tails(E, false);
6933480093f4SDimitry Andric   int MaxIter = MaxStoreLookup.getValue();
69345f7ddb14SDimitry Andric   SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
69355f7ddb14SDimitry Andric       E, std::make_pair(E, INT_MAX));
69365f7ddb14SDimitry Andric   SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
6937480093f4SDimitry Andric   int IterCnt;
6938480093f4SDimitry Andric   auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
69395f7ddb14SDimitry Andric                                   &CheckedPairs,
6940480093f4SDimitry Andric                                   &ConsecutiveChain](int K, int Idx) {
6941480093f4SDimitry Andric     if (IterCnt >= MaxIter)
6942480093f4SDimitry Andric       return true;
69435f7ddb14SDimitry Andric     if (CheckedPairs[Idx].test(K))
69445f7ddb14SDimitry Andric       return ConsecutiveChain[K].second == 1 &&
69455f7ddb14SDimitry Andric              ConsecutiveChain[K].first == Idx;
6946480093f4SDimitry Andric     ++IterCnt;
69475f7ddb14SDimitry Andric     CheckedPairs[Idx].set(K);
69485f7ddb14SDimitry Andric     CheckedPairs[K].set(Idx);
69495f7ddb14SDimitry Andric     Optional<int> Diff = getPointersDiff(
69505f7ddb14SDimitry Andric         Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
69515f7ddb14SDimitry Andric         Stores[Idx]->getValueOperand()->getType(),
69525f7ddb14SDimitry Andric         Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
69535f7ddb14SDimitry Andric     if (!Diff || *Diff == 0)
69545f7ddb14SDimitry Andric       return false;
69555f7ddb14SDimitry Andric     int Val = *Diff;
69565f7ddb14SDimitry Andric     if (Val < 0) {
69575f7ddb14SDimitry Andric       if (ConsecutiveChain[Idx].second > -Val) {
69585f7ddb14SDimitry Andric         Tails.set(K);
69595f7ddb14SDimitry Andric         ConsecutiveChain[Idx] = std::make_pair(K, -Val);
69605f7ddb14SDimitry Andric       }
69615f7ddb14SDimitry Andric       return false;
69625f7ddb14SDimitry Andric     }
69635f7ddb14SDimitry Andric     if (ConsecutiveChain[K].second <= Val)
69640b57cec5SDimitry Andric       return false;
69650b57cec5SDimitry Andric 
6966480093f4SDimitry Andric     Tails.set(Idx);
69675f7ddb14SDimitry Andric     ConsecutiveChain[K] = std::make_pair(Idx, Val);
69685f7ddb14SDimitry Andric     return Val == 1;
69690b57cec5SDimitry Andric   };
69700b57cec5SDimitry Andric   // Do a quadratic search on all of the given stores in reverse order and find
69710b57cec5SDimitry Andric   // all of the pairs of stores that follow each other.
69720b57cec5SDimitry Andric   for (int Idx = E - 1; Idx >= 0; --Idx) {
69730b57cec5SDimitry Andric     // If a store has multiple consecutive store candidates, search according
69740b57cec5SDimitry Andric     // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
69750b57cec5SDimitry Andric     // This is because usually pairing with immediate succeeding or preceding
69760b57cec5SDimitry Andric     // candidate create the best chance to find slp vectorization opportunity.
6977480093f4SDimitry Andric     const int MaxLookDepth = std::max(E - Idx, Idx + 1);
6978480093f4SDimitry Andric     IterCnt = 0;
6979480093f4SDimitry Andric     for (int Offset = 1, F = MaxLookDepth; Offset < F; ++Offset)
69800b57cec5SDimitry Andric       if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
69810b57cec5SDimitry Andric           (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
69820b57cec5SDimitry Andric         break;
69830b57cec5SDimitry Andric   }
69840b57cec5SDimitry Andric 
69855f7ddb14SDimitry Andric   // Tracks if we tried to vectorize stores starting from the given tail
69865f7ddb14SDimitry Andric   // already.
69875f7ddb14SDimitry Andric   SmallBitVector TriedTails(E, false);
69880b57cec5SDimitry Andric   // For stores that start but don't end a link in the chain:
6989480093f4SDimitry Andric   for (int Cnt = E; Cnt > 0; --Cnt) {
6990480093f4SDimitry Andric     int I = Cnt - 1;
69915f7ddb14SDimitry Andric     if (ConsecutiveChain[I].first == E || Tails.test(I))
69920b57cec5SDimitry Andric       continue;
69930b57cec5SDimitry Andric     // We found a store instr that starts a chain. Now follow the chain and try
69940b57cec5SDimitry Andric     // to vectorize it.
69950b57cec5SDimitry Andric     BoUpSLP::ValueList Operands;
69960b57cec5SDimitry Andric     // Collect the chain into a list.
69975f7ddb14SDimitry Andric     while (I != E && !VectorizedStores.count(Stores[I])) {
6998480093f4SDimitry Andric       Operands.push_back(Stores[I]);
69995f7ddb14SDimitry Andric       Tails.set(I);
70005f7ddb14SDimitry Andric       if (ConsecutiveChain[I].second != 1) {
70015f7ddb14SDimitry Andric         // Mark the new end in the chain and go back, if required. It might be
70025f7ddb14SDimitry Andric         // required if the original stores come in reversed order, for example.
70035f7ddb14SDimitry Andric         if (ConsecutiveChain[I].first != E &&
70045f7ddb14SDimitry Andric             Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) &&
70055f7ddb14SDimitry Andric             !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
70065f7ddb14SDimitry Andric           TriedTails.set(I);
70075f7ddb14SDimitry Andric           Tails.reset(ConsecutiveChain[I].first);
70085f7ddb14SDimitry Andric           if (Cnt < ConsecutiveChain[I].first + 2)
70095f7ddb14SDimitry Andric             Cnt = ConsecutiveChain[I].first + 2;
70100b57cec5SDimitry Andric         }
70115f7ddb14SDimitry Andric         break;
70125f7ddb14SDimitry Andric       }
70135f7ddb14SDimitry Andric       // Move to the next value in the chain.
70145f7ddb14SDimitry Andric       I = ConsecutiveChain[I].first;
70155f7ddb14SDimitry Andric     }
70165f7ddb14SDimitry Andric     assert(!Operands.empty() && "Expected non-empty list of stores.");
70170b57cec5SDimitry Andric 
7018480093f4SDimitry Andric     unsigned MaxVecRegSize = R.getMaxVecRegSize();
7019af732203SDimitry Andric     unsigned EltSize = R.getVectorElementSize(Operands[0]);
70205f7ddb14SDimitry Andric     unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
7021480093f4SDimitry Andric 
70225f7ddb14SDimitry Andric     unsigned MinVF = std::max(2U, R.getMinVecRegSize() / EltSize);
70235f7ddb14SDimitry Andric     unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
70245f7ddb14SDimitry Andric                               MaxElts);
70255f7ddb14SDimitry Andric 
70260b57cec5SDimitry Andric     // FIXME: Is division-by-2 the correct step? Should we assert that the
70270b57cec5SDimitry Andric     // register size is a power-of-2?
7028480093f4SDimitry Andric     unsigned StartIdx = 0;
70295f7ddb14SDimitry Andric     for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
7030480093f4SDimitry Andric       for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
7031480093f4SDimitry Andric         ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size);
7032480093f4SDimitry Andric         if (!VectorizedStores.count(Slice.front()) &&
7033480093f4SDimitry Andric             !VectorizedStores.count(Slice.back()) &&
7034480093f4SDimitry Andric             vectorizeStoreChain(Slice, R, Cnt)) {
70350b57cec5SDimitry Andric           // Mark the vectorized stores so that we don't vectorize them again.
7036480093f4SDimitry Andric           VectorizedStores.insert(Slice.begin(), Slice.end());
70370b57cec5SDimitry Andric           Changed = true;
7038480093f4SDimitry Andric           // If we vectorized initial block, no need to try to vectorize it
7039480093f4SDimitry Andric           // again.
7040480093f4SDimitry Andric           if (Cnt == StartIdx)
7041480093f4SDimitry Andric             StartIdx += Size;
7042480093f4SDimitry Andric           Cnt += Size;
7043480093f4SDimitry Andric           continue;
70440b57cec5SDimitry Andric         }
7045480093f4SDimitry Andric         ++Cnt;
7046480093f4SDimitry Andric       }
7047480093f4SDimitry Andric       // Check if the whole array was vectorized already - exit.
7048480093f4SDimitry Andric       if (StartIdx >= Operands.size())
7049480093f4SDimitry Andric         break;
70500b57cec5SDimitry Andric     }
70510b57cec5SDimitry Andric   }
70520b57cec5SDimitry Andric 
70530b57cec5SDimitry Andric   return Changed;
70540b57cec5SDimitry Andric }
70550b57cec5SDimitry Andric 
collectSeedInstructions(BasicBlock * BB)70560b57cec5SDimitry Andric void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
70570b57cec5SDimitry Andric   // Initialize the collections. We will make a single pass over the block.
70580b57cec5SDimitry Andric   Stores.clear();
70590b57cec5SDimitry Andric   GEPs.clear();
70600b57cec5SDimitry Andric 
70610b57cec5SDimitry Andric   // Visit the store and getelementptr instructions in BB and organize them in
70620b57cec5SDimitry Andric   // Stores and GEPs according to the underlying objects of their pointer
70630b57cec5SDimitry Andric   // operands.
70640b57cec5SDimitry Andric   for (Instruction &I : *BB) {
70650b57cec5SDimitry Andric     // Ignore store instructions that are volatile or have a pointer operand
70660b57cec5SDimitry Andric     // that doesn't point to a scalar type.
70670b57cec5SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(&I)) {
70680b57cec5SDimitry Andric       if (!SI->isSimple())
70690b57cec5SDimitry Andric         continue;
70700b57cec5SDimitry Andric       if (!isValidElementType(SI->getValueOperand()->getType()))
70710b57cec5SDimitry Andric         continue;
7072af732203SDimitry Andric       Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
70730b57cec5SDimitry Andric     }
70740b57cec5SDimitry Andric 
70750b57cec5SDimitry Andric     // Ignore getelementptr instructions that have more than one index, a
70760b57cec5SDimitry Andric     // constant index, or a pointer operand that doesn't point to a scalar
70770b57cec5SDimitry Andric     // type.
70780b57cec5SDimitry Andric     else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
70790b57cec5SDimitry Andric       auto Idx = GEP->idx_begin()->get();
70800b57cec5SDimitry Andric       if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
70810b57cec5SDimitry Andric         continue;
70820b57cec5SDimitry Andric       if (!isValidElementType(Idx->getType()))
70830b57cec5SDimitry Andric         continue;
70840b57cec5SDimitry Andric       if (GEP->getType()->isVectorTy())
70850b57cec5SDimitry Andric         continue;
70860b57cec5SDimitry Andric       GEPs[GEP->getPointerOperand()].push_back(GEP);
70870b57cec5SDimitry Andric     }
70880b57cec5SDimitry Andric   }
70890b57cec5SDimitry Andric }
70900b57cec5SDimitry Andric 
tryToVectorizePair(Value * A,Value * B,BoUpSLP & R)70910b57cec5SDimitry Andric bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
70920b57cec5SDimitry Andric   if (!A || !B)
70930b57cec5SDimitry Andric     return false;
70940b57cec5SDimitry Andric   Value *VL[] = {A, B};
70955ffd83dbSDimitry Andric   return tryToVectorizeList(VL, R, /*AllowReorder=*/true);
70960b57cec5SDimitry Andric }
70970b57cec5SDimitry Andric 
tryToVectorizeList(ArrayRef<Value * > VL,BoUpSLP & R,bool AllowReorder)70980b57cec5SDimitry Andric bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
70995f7ddb14SDimitry Andric                                            bool AllowReorder) {
71000b57cec5SDimitry Andric   if (VL.size() < 2)
71010b57cec5SDimitry Andric     return false;
71020b57cec5SDimitry Andric 
71030b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
71040b57cec5SDimitry Andric                     << VL.size() << ".\n");
71050b57cec5SDimitry Andric 
71065ffd83dbSDimitry Andric   // Check that all of the parts are instructions of the same type,
71070b57cec5SDimitry Andric   // we permit an alternate opcode via InstructionsState.
71080b57cec5SDimitry Andric   InstructionsState S = getSameOpcode(VL);
71090b57cec5SDimitry Andric   if (!S.getOpcode())
71100b57cec5SDimitry Andric     return false;
71110b57cec5SDimitry Andric 
71120b57cec5SDimitry Andric   Instruction *I0 = cast<Instruction>(S.OpValue);
71135ffd83dbSDimitry Andric   // Make sure invalid types (including vector type) are rejected before
71145ffd83dbSDimitry Andric   // determining vectorization factor for scalar instructions.
71150b57cec5SDimitry Andric   for (Value *V : VL) {
71160b57cec5SDimitry Andric     Type *Ty = V->getType();
71175f7ddb14SDimitry Andric     if (!isa<InsertElementInst>(V) && !isValidElementType(Ty)) {
71180b57cec5SDimitry Andric       // NOTE: the following will give user internal llvm type name, which may
71190b57cec5SDimitry Andric       // not be useful.
71200b57cec5SDimitry Andric       R.getORE()->emit([&]() {
71210b57cec5SDimitry Andric         std::string type_str;
71220b57cec5SDimitry Andric         llvm::raw_string_ostream rso(type_str);
71230b57cec5SDimitry Andric         Ty->print(rso);
71240b57cec5SDimitry Andric         return OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0)
71250b57cec5SDimitry Andric                << "Cannot SLP vectorize list: type "
71260b57cec5SDimitry Andric                << rso.str() + " is unsupported by vectorizer";
71270b57cec5SDimitry Andric       });
71280b57cec5SDimitry Andric       return false;
71290b57cec5SDimitry Andric     }
71300b57cec5SDimitry Andric   }
71310b57cec5SDimitry Andric 
71325ffd83dbSDimitry Andric   unsigned Sz = R.getVectorElementSize(I0);
71335ffd83dbSDimitry Andric   unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
71345ffd83dbSDimitry Andric   unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
7135af732203SDimitry Andric   MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
71365ffd83dbSDimitry Andric   if (MaxVF < 2) {
71375ffd83dbSDimitry Andric     R.getORE()->emit([&]() {
71385ffd83dbSDimitry Andric       return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
71395ffd83dbSDimitry Andric              << "Cannot SLP vectorize list: vectorization factor "
71405ffd83dbSDimitry Andric              << "less than 2 is not supported";
71415ffd83dbSDimitry Andric     });
71425ffd83dbSDimitry Andric     return false;
71435ffd83dbSDimitry Andric   }
71445ffd83dbSDimitry Andric 
71450b57cec5SDimitry Andric   bool Changed = false;
71460b57cec5SDimitry Andric   bool CandidateFound = false;
7147af732203SDimitry Andric   InstructionCost MinCost = SLPCostThreshold.getValue();
71485f7ddb14SDimitry Andric   Type *ScalarTy = VL[0]->getType();
71495f7ddb14SDimitry Andric   if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
71505f7ddb14SDimitry Andric     ScalarTy = IE->getOperand(1)->getType();
71515ffd83dbSDimitry Andric 
71520b57cec5SDimitry Andric   unsigned NextInst = 0, MaxInst = VL.size();
71538bcb0991SDimitry Andric   for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
71540b57cec5SDimitry Andric     // No actual vectorization should happen, if number of parts is the same as
71550b57cec5SDimitry Andric     // provided vectorization factor (i.e. the scalar type is used for vector
71560b57cec5SDimitry Andric     // code during codegen).
71575f7ddb14SDimitry Andric     auto *VecTy = FixedVectorType::get(ScalarTy, VF);
71580b57cec5SDimitry Andric     if (TTI->getNumberOfParts(VecTy) == VF)
71590b57cec5SDimitry Andric       continue;
71600b57cec5SDimitry Andric     for (unsigned I = NextInst; I < MaxInst; ++I) {
71610b57cec5SDimitry Andric       unsigned OpsWidth = 0;
71620b57cec5SDimitry Andric 
71630b57cec5SDimitry Andric       if (I + VF > MaxInst)
71640b57cec5SDimitry Andric         OpsWidth = MaxInst - I;
71650b57cec5SDimitry Andric       else
71660b57cec5SDimitry Andric         OpsWidth = VF;
71670b57cec5SDimitry Andric 
71685f7ddb14SDimitry Andric       if (!isPowerOf2_32(OpsWidth))
71695f7ddb14SDimitry Andric         continue;
71705f7ddb14SDimitry Andric 
71715f7ddb14SDimitry Andric       if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
71720b57cec5SDimitry Andric         break;
71730b57cec5SDimitry Andric 
71748bcb0991SDimitry Andric       ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
71750b57cec5SDimitry Andric       // Check that a previous iteration of this loop did not delete the Value.
71768bcb0991SDimitry Andric       if (llvm::any_of(Ops, [&R](Value *V) {
71778bcb0991SDimitry Andric             auto *I = dyn_cast<Instruction>(V);
71788bcb0991SDimitry Andric             return I && R.isDeleted(I);
71798bcb0991SDimitry Andric           }))
71800b57cec5SDimitry Andric         continue;
71810b57cec5SDimitry Andric 
71820b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
71830b57cec5SDimitry Andric                         << "\n");
71840b57cec5SDimitry Andric 
71850b57cec5SDimitry Andric       R.buildTree(Ops);
71865f7ddb14SDimitry Andric       if (AllowReorder) {
71870b57cec5SDimitry Andric         Optional<ArrayRef<unsigned>> Order = R.bestOrder();
71885f7ddb14SDimitry Andric         if (Order) {
71890b57cec5SDimitry Andric           // TODO: reorder tree nodes without tree rebuilding.
71905f7ddb14SDimitry Andric           SmallVector<Value *, 4> ReorderedOps(Ops.size());
71915f7ddb14SDimitry Andric           transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
71925f7ddb14SDimitry Andric                     [Ops](const unsigned Idx) { return Ops[Idx]; });
71935f7ddb14SDimitry Andric           R.buildTree(ReorderedOps);
71945f7ddb14SDimitry Andric         }
71950b57cec5SDimitry Andric       }
71960b57cec5SDimitry Andric       if (R.isTreeTinyAndNotFullyVectorizable())
71970b57cec5SDimitry Andric         continue;
71980b57cec5SDimitry Andric 
71990b57cec5SDimitry Andric       R.computeMinimumValueSizes();
7200af732203SDimitry Andric       InstructionCost Cost = R.getTreeCost();
72010b57cec5SDimitry Andric       CandidateFound = true;
72020b57cec5SDimitry Andric       MinCost = std::min(MinCost, Cost);
72030b57cec5SDimitry Andric 
72040b57cec5SDimitry Andric       if (Cost < -SLPCostThreshold) {
72050b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
72060b57cec5SDimitry Andric         R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
72070b57cec5SDimitry Andric                                                     cast<Instruction>(Ops[0]))
72080b57cec5SDimitry Andric                                  << "SLP vectorized with cost " << ore::NV("Cost", Cost)
72090b57cec5SDimitry Andric                                  << " and with tree size "
72100b57cec5SDimitry Andric                                  << ore::NV("TreeSize", R.getTreeSize()));
72110b57cec5SDimitry Andric 
72120b57cec5SDimitry Andric         R.vectorizeTree();
72130b57cec5SDimitry Andric         // Move to the next bundle.
72140b57cec5SDimitry Andric         I += VF - 1;
72150b57cec5SDimitry Andric         NextInst = I + 1;
72160b57cec5SDimitry Andric         Changed = true;
72170b57cec5SDimitry Andric       }
72180b57cec5SDimitry Andric     }
72190b57cec5SDimitry Andric   }
72200b57cec5SDimitry Andric 
72210b57cec5SDimitry Andric   if (!Changed && CandidateFound) {
72220b57cec5SDimitry Andric     R.getORE()->emit([&]() {
72230b57cec5SDimitry Andric       return OptimizationRemarkMissed(SV_NAME, "NotBeneficial", I0)
72240b57cec5SDimitry Andric              << "List vectorization was possible but not beneficial with cost "
72250b57cec5SDimitry Andric              << ore::NV("Cost", MinCost) << " >= "
72260b57cec5SDimitry Andric              << ore::NV("Treshold", -SLPCostThreshold);
72270b57cec5SDimitry Andric     });
72280b57cec5SDimitry Andric   } else if (!Changed) {
72290b57cec5SDimitry Andric     R.getORE()->emit([&]() {
72300b57cec5SDimitry Andric       return OptimizationRemarkMissed(SV_NAME, "NotPossible", I0)
72310b57cec5SDimitry Andric              << "Cannot SLP vectorize list: vectorization was impossible"
72320b57cec5SDimitry Andric              << " with available vectorization factors";
72330b57cec5SDimitry Andric     });
72340b57cec5SDimitry Andric   }
72350b57cec5SDimitry Andric   return Changed;
72360b57cec5SDimitry Andric }
72370b57cec5SDimitry Andric 
tryToVectorize(Instruction * I,BoUpSLP & R)72380b57cec5SDimitry Andric bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
72390b57cec5SDimitry Andric   if (!I)
72400b57cec5SDimitry Andric     return false;
72410b57cec5SDimitry Andric 
72420b57cec5SDimitry Andric   if (!isa<BinaryOperator>(I) && !isa<CmpInst>(I))
72430b57cec5SDimitry Andric     return false;
72440b57cec5SDimitry Andric 
72450b57cec5SDimitry Andric   Value *P = I->getParent();
72460b57cec5SDimitry Andric 
72470b57cec5SDimitry Andric   // Vectorize in current basic block only.
72480b57cec5SDimitry Andric   auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
72490b57cec5SDimitry Andric   auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
72500b57cec5SDimitry Andric   if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
72510b57cec5SDimitry Andric     return false;
72520b57cec5SDimitry Andric 
72530b57cec5SDimitry Andric   // Try to vectorize V.
72540b57cec5SDimitry Andric   if (tryToVectorizePair(Op0, Op1, R))
72550b57cec5SDimitry Andric     return true;
72560b57cec5SDimitry Andric 
72570b57cec5SDimitry Andric   auto *A = dyn_cast<BinaryOperator>(Op0);
72580b57cec5SDimitry Andric   auto *B = dyn_cast<BinaryOperator>(Op1);
72590b57cec5SDimitry Andric   // Try to skip B.
72600b57cec5SDimitry Andric   if (B && B->hasOneUse()) {
72610b57cec5SDimitry Andric     auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
72620b57cec5SDimitry Andric     auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
72630b57cec5SDimitry Andric     if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
72640b57cec5SDimitry Andric       return true;
72650b57cec5SDimitry Andric     if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
72660b57cec5SDimitry Andric       return true;
72670b57cec5SDimitry Andric   }
72680b57cec5SDimitry Andric 
72690b57cec5SDimitry Andric   // Try to skip A.
72700b57cec5SDimitry Andric   if (A && A->hasOneUse()) {
72710b57cec5SDimitry Andric     auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
72720b57cec5SDimitry Andric     auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
72730b57cec5SDimitry Andric     if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
72740b57cec5SDimitry Andric       return true;
72750b57cec5SDimitry Andric     if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
72760b57cec5SDimitry Andric       return true;
72770b57cec5SDimitry Andric   }
72780b57cec5SDimitry Andric   return false;
72790b57cec5SDimitry Andric }
72800b57cec5SDimitry Andric 
72810b57cec5SDimitry Andric namespace {
72820b57cec5SDimitry Andric 
72830b57cec5SDimitry Andric /// Model horizontal reductions.
72840b57cec5SDimitry Andric ///
7285af732203SDimitry Andric /// A horizontal reduction is a tree of reduction instructions that has values
7286af732203SDimitry Andric /// that can be put into a vector as its leaves. For example:
72870b57cec5SDimitry Andric ///
72880b57cec5SDimitry Andric /// mul mul mul mul
72890b57cec5SDimitry Andric ///  \  /    \  /
72900b57cec5SDimitry Andric ///   +       +
72910b57cec5SDimitry Andric ///    \     /
72920b57cec5SDimitry Andric ///       +
7293af732203SDimitry Andric /// This tree has "mul" as its leaf values and "+" as its reduction
7294af732203SDimitry Andric /// instructions. A reduction can feed into a store or a binary operation
72950b57cec5SDimitry Andric /// feeding a phi.
72960b57cec5SDimitry Andric ///    ...
72970b57cec5SDimitry Andric ///    \  /
72980b57cec5SDimitry Andric ///     +
72990b57cec5SDimitry Andric ///     |
73000b57cec5SDimitry Andric ///  phi +=
73010b57cec5SDimitry Andric ///
73020b57cec5SDimitry Andric ///  Or:
73030b57cec5SDimitry Andric ///    ...
73040b57cec5SDimitry Andric ///    \  /
73050b57cec5SDimitry Andric ///     +
73060b57cec5SDimitry Andric ///     |
73070b57cec5SDimitry Andric ///   *p =
73080b57cec5SDimitry Andric ///
73090b57cec5SDimitry Andric class HorizontalReduction {
73100b57cec5SDimitry Andric   using ReductionOpsType = SmallVector<Value *, 16>;
73110b57cec5SDimitry Andric   using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
73120b57cec5SDimitry Andric   ReductionOpsListType ReductionOps;
73130b57cec5SDimitry Andric   SmallVector<Value *, 32> ReducedVals;
73140b57cec5SDimitry Andric   // Use map vector to make stable output.
73150b57cec5SDimitry Andric   MapVector<Instruction *, Value *> ExtraArgs;
7316af732203SDimitry Andric   WeakTrackingVH ReductionRoot;
7317af732203SDimitry Andric   /// The type of reduction operation.
7318af732203SDimitry Andric   RecurKind RdxKind;
73190b57cec5SDimitry Andric 
73205f7ddb14SDimitry Andric   const unsigned INVALID_OPERAND_INDEX = std::numeric_limits<unsigned>::max();
73215f7ddb14SDimitry Andric 
isCmpSelMinMax(Instruction * I)73225f7ddb14SDimitry Andric   static bool isCmpSelMinMax(Instruction *I) {
73235f7ddb14SDimitry Andric     return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
73245f7ddb14SDimitry Andric            RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
73255f7ddb14SDimitry Andric   }
73265f7ddb14SDimitry Andric 
73275f7ddb14SDimitry Andric   // And/or are potentially poison-safe logical patterns like:
73285f7ddb14SDimitry Andric   // select x, y, false
73295f7ddb14SDimitry Andric   // select x, true, y
isBoolLogicOp(Instruction * I)73305f7ddb14SDimitry Andric   static bool isBoolLogicOp(Instruction *I) {
73315f7ddb14SDimitry Andric     return match(I, m_LogicalAnd(m_Value(), m_Value())) ||
73325f7ddb14SDimitry Andric            match(I, m_LogicalOr(m_Value(), m_Value()));
73335f7ddb14SDimitry Andric   }
73345f7ddb14SDimitry Andric 
73350b57cec5SDimitry Andric   /// Checks if instruction is associative and can be vectorized.
isVectorizable(RecurKind Kind,Instruction * I)7336af732203SDimitry Andric   static bool isVectorizable(RecurKind Kind, Instruction *I) {
7337af732203SDimitry Andric     if (Kind == RecurKind::None)
7338af732203SDimitry Andric       return false;
73395f7ddb14SDimitry Andric 
73405f7ddb14SDimitry Andric     // Integer ops that map to select instructions or intrinsics are fine.
73415f7ddb14SDimitry Andric     if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind) ||
73425f7ddb14SDimitry Andric         isBoolLogicOp(I))
73430b57cec5SDimitry Andric       return true;
7344af732203SDimitry Andric 
7345af732203SDimitry Andric     if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
7346af732203SDimitry Andric       // FP min/max are associative except for NaN and -0.0. We do not
7347af732203SDimitry Andric       // have to rule out -0.0 here because the intrinsic semantics do not
7348af732203SDimitry Andric       // specify a fixed result for it.
7349af732203SDimitry Andric       return I->getFastMathFlags().noNaNs();
73500b57cec5SDimitry Andric     }
73510b57cec5SDimitry Andric 
7352af732203SDimitry Andric     return I->isAssociative();
73530b57cec5SDimitry Andric   }
73540b57cec5SDimitry Andric 
getRdxOperand(Instruction * I,unsigned Index)73555f7ddb14SDimitry Andric   static Value *getRdxOperand(Instruction *I, unsigned Index) {
73565f7ddb14SDimitry Andric     // Poison-safe 'or' takes the form: select X, true, Y
73575f7ddb14SDimitry Andric     // To make that work with the normal operand processing, we skip the
73585f7ddb14SDimitry Andric     // true value operand.
73595f7ddb14SDimitry Andric     // TODO: Change the code and data structures to handle this without a hack.
73605f7ddb14SDimitry Andric     if (getRdxKind(I) == RecurKind::Or && isa<SelectInst>(I) && Index == 1)
73615f7ddb14SDimitry Andric       return I->getOperand(2);
73625f7ddb14SDimitry Andric     return I->getOperand(Index);
73635f7ddb14SDimitry Andric   }
73645f7ddb14SDimitry Andric 
73650b57cec5SDimitry Andric   /// Checks if the ParentStackElem.first should be marked as a reduction
73660b57cec5SDimitry Andric   /// operation with an extra argument or as extra argument itself.
markExtraArg(std::pair<Instruction *,unsigned> & ParentStackElem,Value * ExtraArg)73670b57cec5SDimitry Andric   void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
73680b57cec5SDimitry Andric                     Value *ExtraArg) {
73690b57cec5SDimitry Andric     if (ExtraArgs.count(ParentStackElem.first)) {
73700b57cec5SDimitry Andric       ExtraArgs[ParentStackElem.first] = nullptr;
73710b57cec5SDimitry Andric       // We ran into something like:
73720b57cec5SDimitry Andric       // ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
73730b57cec5SDimitry Andric       // The whole ParentStackElem.first should be considered as an extra value
73740b57cec5SDimitry Andric       // in this case.
73750b57cec5SDimitry Andric       // Do not perform analysis of remaining operands of ParentStackElem.first
73760b57cec5SDimitry Andric       // instruction, this whole instruction is an extra argument.
73775f7ddb14SDimitry Andric       ParentStackElem.second = INVALID_OPERAND_INDEX;
73780b57cec5SDimitry Andric     } else {
73790b57cec5SDimitry Andric       // We ran into something like:
73800b57cec5SDimitry Andric       // ParentStackElem.first += ... + ExtraArg + ...
73810b57cec5SDimitry Andric       ExtraArgs[ParentStackElem.first] = ExtraArg;
73820b57cec5SDimitry Andric     }
73830b57cec5SDimitry Andric   }
73840b57cec5SDimitry Andric 
7385af732203SDimitry Andric   /// Creates reduction operation with the current opcode.
createOp(IRBuilder<> & Builder,RecurKind Kind,Value * LHS,Value * RHS,const Twine & Name,bool UseSelect)7386af732203SDimitry Andric   static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
73875f7ddb14SDimitry Andric                          Value *RHS, const Twine &Name, bool UseSelect) {
7388af732203SDimitry Andric     unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
7389af732203SDimitry Andric     switch (Kind) {
7390af732203SDimitry Andric     case RecurKind::Add:
7391af732203SDimitry Andric     case RecurKind::Mul:
7392af732203SDimitry Andric     case RecurKind::Or:
7393af732203SDimitry Andric     case RecurKind::And:
7394af732203SDimitry Andric     case RecurKind::Xor:
7395af732203SDimitry Andric     case RecurKind::FAdd:
7396af732203SDimitry Andric     case RecurKind::FMul:
7397af732203SDimitry Andric       return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
7398af732203SDimitry Andric                                  Name);
7399af732203SDimitry Andric     case RecurKind::FMax:
7400af732203SDimitry Andric       return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
7401af732203SDimitry Andric     case RecurKind::FMin:
7402af732203SDimitry Andric       return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
74035f7ddb14SDimitry Andric     case RecurKind::SMax:
74045f7ddb14SDimitry Andric       if (UseSelect) {
7405af732203SDimitry Andric         Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
7406af732203SDimitry Andric         return Builder.CreateSelect(Cmp, LHS, RHS, Name);
74070b57cec5SDimitry Andric       }
74085f7ddb14SDimitry Andric       return Builder.CreateBinaryIntrinsic(Intrinsic::smax, LHS, RHS);
74095f7ddb14SDimitry Andric     case RecurKind::SMin:
74105f7ddb14SDimitry Andric       if (UseSelect) {
7411af732203SDimitry Andric         Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
7412af732203SDimitry Andric         return Builder.CreateSelect(Cmp, LHS, RHS, Name);
7413af732203SDimitry Andric       }
74145f7ddb14SDimitry Andric       return Builder.CreateBinaryIntrinsic(Intrinsic::smin, LHS, RHS);
74155f7ddb14SDimitry Andric     case RecurKind::UMax:
74165f7ddb14SDimitry Andric       if (UseSelect) {
7417af732203SDimitry Andric         Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
7418af732203SDimitry Andric         return Builder.CreateSelect(Cmp, LHS, RHS, Name);
7419af732203SDimitry Andric       }
74205f7ddb14SDimitry Andric       return Builder.CreateBinaryIntrinsic(Intrinsic::umax, LHS, RHS);
74215f7ddb14SDimitry Andric     case RecurKind::UMin:
74225f7ddb14SDimitry Andric       if (UseSelect) {
7423af732203SDimitry Andric         Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
7424af732203SDimitry Andric         return Builder.CreateSelect(Cmp, LHS, RHS, Name);
7425af732203SDimitry Andric       }
74265f7ddb14SDimitry Andric       return Builder.CreateBinaryIntrinsic(Intrinsic::umin, LHS, RHS);
7427af732203SDimitry Andric     default:
7428af732203SDimitry Andric       llvm_unreachable("Unknown reduction operation.");
7429af732203SDimitry Andric     }
7430af732203SDimitry Andric   }
7431af732203SDimitry Andric 
7432af732203SDimitry Andric   /// Creates reduction operation with the current opcode with the IR flags
7433af732203SDimitry Andric   /// from \p ReductionOps.
createOp(IRBuilder<> & Builder,RecurKind RdxKind,Value * LHS,Value * RHS,const Twine & Name,const ReductionOpsListType & ReductionOps)7434af732203SDimitry Andric   static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
7435af732203SDimitry Andric                          Value *RHS, const Twine &Name,
7436af732203SDimitry Andric                          const ReductionOpsListType &ReductionOps) {
74375f7ddb14SDimitry Andric     bool UseSelect = ReductionOps.size() == 2;
74385f7ddb14SDimitry Andric     assert((!UseSelect || isa<SelectInst>(ReductionOps[1][0])) &&
74395f7ddb14SDimitry Andric            "Expected cmp + select pairs for reduction");
74405f7ddb14SDimitry Andric     Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
7441af732203SDimitry Andric     if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
74425f7ddb14SDimitry Andric       if (auto *Sel = dyn_cast<SelectInst>(Op)) {
7443af732203SDimitry Andric         propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
7444af732203SDimitry Andric         propagateIRFlags(Op, ReductionOps[1]);
7445af732203SDimitry Andric         return Op;
7446af732203SDimitry Andric       }
74475f7ddb14SDimitry Andric     }
7448af732203SDimitry Andric     propagateIRFlags(Op, ReductionOps[0]);
7449af732203SDimitry Andric     return Op;
7450af732203SDimitry Andric   }
74515f7ddb14SDimitry Andric 
7452af732203SDimitry Andric   /// Creates reduction operation with the current opcode with the IR flags
7453af732203SDimitry Andric   /// from \p I.
createOp(IRBuilder<> & Builder,RecurKind RdxKind,Value * LHS,Value * RHS,const Twine & Name,Instruction * I)7454af732203SDimitry Andric   static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
7455af732203SDimitry Andric                          Value *RHS, const Twine &Name, Instruction *I) {
74565f7ddb14SDimitry Andric     auto *SelI = dyn_cast<SelectInst>(I);
74575f7ddb14SDimitry Andric     Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr);
74585f7ddb14SDimitry Andric     if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
74595f7ddb14SDimitry Andric       if (auto *Sel = dyn_cast<SelectInst>(Op))
74605f7ddb14SDimitry Andric         propagateIRFlags(Sel->getCondition(), SelI->getCondition());
7461af732203SDimitry Andric     }
7462af732203SDimitry Andric     propagateIRFlags(Op, I);
7463af732203SDimitry Andric     return Op;
7464af732203SDimitry Andric   }
7465af732203SDimitry Andric 
getRdxKind(Instruction * I)7466af732203SDimitry Andric   static RecurKind getRdxKind(Instruction *I) {
7467af732203SDimitry Andric     assert(I && "Expected instruction for reduction matching");
7468af732203SDimitry Andric     TargetTransformInfo::ReductionFlags RdxFlags;
7469af732203SDimitry Andric     if (match(I, m_Add(m_Value(), m_Value())))
7470af732203SDimitry Andric       return RecurKind::Add;
7471af732203SDimitry Andric     if (match(I, m_Mul(m_Value(), m_Value())))
7472af732203SDimitry Andric       return RecurKind::Mul;
74735f7ddb14SDimitry Andric     if (match(I, m_And(m_Value(), m_Value())) ||
74745f7ddb14SDimitry Andric         match(I, m_LogicalAnd(m_Value(), m_Value())))
7475af732203SDimitry Andric       return RecurKind::And;
74765f7ddb14SDimitry Andric     if (match(I, m_Or(m_Value(), m_Value())) ||
74775f7ddb14SDimitry Andric         match(I, m_LogicalOr(m_Value(), m_Value())))
7478af732203SDimitry Andric       return RecurKind::Or;
7479af732203SDimitry Andric     if (match(I, m_Xor(m_Value(), m_Value())))
7480af732203SDimitry Andric       return RecurKind::Xor;
7481af732203SDimitry Andric     if (match(I, m_FAdd(m_Value(), m_Value())))
7482af732203SDimitry Andric       return RecurKind::FAdd;
7483af732203SDimitry Andric     if (match(I, m_FMul(m_Value(), m_Value())))
7484af732203SDimitry Andric       return RecurKind::FMul;
7485af732203SDimitry Andric 
7486af732203SDimitry Andric     if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
7487af732203SDimitry Andric       return RecurKind::FMax;
7488af732203SDimitry Andric     if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
7489af732203SDimitry Andric       return RecurKind::FMin;
7490af732203SDimitry Andric 
74915f7ddb14SDimitry Andric     // This matches either cmp+select or intrinsics. SLP is expected to handle
74925f7ddb14SDimitry Andric     // either form.
74935f7ddb14SDimitry Andric     // TODO: If we are canonicalizing to intrinsics, we can remove several
74945f7ddb14SDimitry Andric     //       special-case paths that deal with selects.
7495af732203SDimitry Andric     if (match(I, m_SMax(m_Value(), m_Value())))
7496af732203SDimitry Andric       return RecurKind::SMax;
7497af732203SDimitry Andric     if (match(I, m_SMin(m_Value(), m_Value())))
7498af732203SDimitry Andric       return RecurKind::SMin;
7499af732203SDimitry Andric     if (match(I, m_UMax(m_Value(), m_Value())))
7500af732203SDimitry Andric       return RecurKind::UMax;
7501af732203SDimitry Andric     if (match(I, m_UMin(m_Value(), m_Value())))
7502af732203SDimitry Andric       return RecurKind::UMin;
7503af732203SDimitry Andric 
7504af732203SDimitry Andric     if (auto *Select = dyn_cast<SelectInst>(I)) {
75050b57cec5SDimitry Andric       // Try harder: look for min/max pattern based on instructions producing
75060b57cec5SDimitry Andric       // same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
75070b57cec5SDimitry Andric       // During the intermediate stages of SLP, it's very common to have
75080b57cec5SDimitry Andric       // pattern like this (since optimizeGatherSequence is run only once
75090b57cec5SDimitry Andric       // at the end):
75100b57cec5SDimitry Andric       // %1 = extractelement <2 x i32> %a, i32 0
75110b57cec5SDimitry Andric       // %2 = extractelement <2 x i32> %a, i32 1
75120b57cec5SDimitry Andric       // %cond = icmp sgt i32 %1, %2
75130b57cec5SDimitry Andric       // %3 = extractelement <2 x i32> %a, i32 0
75140b57cec5SDimitry Andric       // %4 = extractelement <2 x i32> %a, i32 1
75150b57cec5SDimitry Andric       // %select = select i1 %cond, i32 %3, i32 %4
75160b57cec5SDimitry Andric       CmpInst::Predicate Pred;
75170b57cec5SDimitry Andric       Instruction *L1;
75180b57cec5SDimitry Andric       Instruction *L2;
75190b57cec5SDimitry Andric 
7520af732203SDimitry Andric       Value *LHS = Select->getTrueValue();
7521af732203SDimitry Andric       Value *RHS = Select->getFalseValue();
75220b57cec5SDimitry Andric       Value *Cond = Select->getCondition();
75230b57cec5SDimitry Andric 
75240b57cec5SDimitry Andric       // TODO: Support inverse predicates.
75250b57cec5SDimitry Andric       if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
75260b57cec5SDimitry Andric         if (!isa<ExtractElementInst>(RHS) ||
75270b57cec5SDimitry Andric             !L2->isIdenticalTo(cast<Instruction>(RHS)))
7528af732203SDimitry Andric           return RecurKind::None;
75290b57cec5SDimitry Andric       } else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
75300b57cec5SDimitry Andric         if (!isa<ExtractElementInst>(LHS) ||
75310b57cec5SDimitry Andric             !L1->isIdenticalTo(cast<Instruction>(LHS)))
7532af732203SDimitry Andric           return RecurKind::None;
75330b57cec5SDimitry Andric       } else {
75340b57cec5SDimitry Andric         if (!isa<ExtractElementInst>(LHS) || !isa<ExtractElementInst>(RHS))
7535af732203SDimitry Andric           return RecurKind::None;
75360b57cec5SDimitry Andric         if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) ||
75370b57cec5SDimitry Andric             !L1->isIdenticalTo(cast<Instruction>(LHS)) ||
75380b57cec5SDimitry Andric             !L2->isIdenticalTo(cast<Instruction>(RHS)))
7539af732203SDimitry Andric           return RecurKind::None;
75400b57cec5SDimitry Andric       }
7541af732203SDimitry Andric 
7542af732203SDimitry Andric       TargetTransformInfo::ReductionFlags RdxFlags;
75430b57cec5SDimitry Andric       switch (Pred) {
75440b57cec5SDimitry Andric       default:
7545af732203SDimitry Andric         return RecurKind::None;
75460b57cec5SDimitry Andric       case CmpInst::ICMP_SGT:
75470b57cec5SDimitry Andric       case CmpInst::ICMP_SGE:
7548af732203SDimitry Andric         return RecurKind::SMax;
7549af732203SDimitry Andric       case CmpInst::ICMP_SLT:
7550af732203SDimitry Andric       case CmpInst::ICMP_SLE:
7551af732203SDimitry Andric         return RecurKind::SMin;
7552af732203SDimitry Andric       case CmpInst::ICMP_UGT:
7553af732203SDimitry Andric       case CmpInst::ICMP_UGE:
7554af732203SDimitry Andric         return RecurKind::UMax;
7555af732203SDimitry Andric       case CmpInst::ICMP_ULT:
7556af732203SDimitry Andric       case CmpInst::ICMP_ULE:
7557af732203SDimitry Andric         return RecurKind::UMin;
7558af732203SDimitry Andric       }
7559af732203SDimitry Andric     }
7560af732203SDimitry Andric     return RecurKind::None;
7561af732203SDimitry Andric   }
75620b57cec5SDimitry Andric 
7563af732203SDimitry Andric   /// Get the index of the first operand.
getFirstOperandIndex(Instruction * I)75645f7ddb14SDimitry Andric   static unsigned getFirstOperandIndex(Instruction *I) {
75655f7ddb14SDimitry Andric     return isCmpSelMinMax(I) ? 1 : 0;
7566af732203SDimitry Andric   }
7567af732203SDimitry Andric 
7568af732203SDimitry Andric   /// Total number of operands in the reduction operation.
getNumberOfOperands(Instruction * I)75695f7ddb14SDimitry Andric   static unsigned getNumberOfOperands(Instruction *I) {
75705f7ddb14SDimitry Andric     return isCmpSelMinMax(I) ? 3 : 2;
7571af732203SDimitry Andric   }
7572af732203SDimitry Andric 
7573af732203SDimitry Andric   /// Checks if the instruction is in basic block \p BB.
75745f7ddb14SDimitry Andric   /// For a cmp+sel min/max reduction check that both ops are in \p BB.
hasSameParent(Instruction * I,BasicBlock * BB)75755f7ddb14SDimitry Andric   static bool hasSameParent(Instruction *I, BasicBlock *BB) {
75765f7ddb14SDimitry Andric     if (isCmpSelMinMax(I)) {
75775f7ddb14SDimitry Andric       auto *Sel = cast<SelectInst>(I);
75785f7ddb14SDimitry Andric       auto *Cmp = cast<Instruction>(Sel->getCondition());
75795f7ddb14SDimitry Andric       return Sel->getParent() == BB && Cmp->getParent() == BB;
7580af732203SDimitry Andric     }
7581af732203SDimitry Andric     return I->getParent() == BB;
7582af732203SDimitry Andric   }
7583af732203SDimitry Andric 
7584af732203SDimitry Andric   /// Expected number of uses for reduction operations/reduced values.
hasRequiredNumberOfUses(bool IsCmpSelMinMax,Instruction * I)75855f7ddb14SDimitry Andric   static bool hasRequiredNumberOfUses(bool IsCmpSelMinMax, Instruction *I) {
75865f7ddb14SDimitry Andric     if (IsCmpSelMinMax) {
7587af732203SDimitry Andric       // SelectInst must be used twice while the condition op must have single
7588af732203SDimitry Andric       // use only.
75895f7ddb14SDimitry Andric       if (auto *Sel = dyn_cast<SelectInst>(I))
75905f7ddb14SDimitry Andric         return Sel->hasNUses(2) && Sel->getCondition()->hasOneUse();
75915f7ddb14SDimitry Andric       return I->hasNUses(2);
75925f7ddb14SDimitry Andric     }
7593af732203SDimitry Andric 
7594af732203SDimitry Andric     // Arithmetic reduction operation must be used once only.
7595af732203SDimitry Andric     return I->hasOneUse();
7596af732203SDimitry Andric   }
7597af732203SDimitry Andric 
7598af732203SDimitry Andric   /// Initializes the list of reduction operations.
initReductionOps(Instruction * I)75995f7ddb14SDimitry Andric   void initReductionOps(Instruction *I) {
76005f7ddb14SDimitry Andric     if (isCmpSelMinMax(I))
7601af732203SDimitry Andric       ReductionOps.assign(2, ReductionOpsType());
7602af732203SDimitry Andric     else
7603af732203SDimitry Andric       ReductionOps.assign(1, ReductionOpsType());
7604af732203SDimitry Andric   }
7605af732203SDimitry Andric 
7606af732203SDimitry Andric   /// Add all reduction operations for the reduction instruction \p I.
addReductionOps(Instruction * I)76075f7ddb14SDimitry Andric   void addReductionOps(Instruction *I) {
76085f7ddb14SDimitry Andric     if (isCmpSelMinMax(I)) {
7609af732203SDimitry Andric       ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
7610af732203SDimitry Andric       ReductionOps[1].emplace_back(I);
7611af732203SDimitry Andric     } else {
7612af732203SDimitry Andric       ReductionOps[0].emplace_back(I);
76130b57cec5SDimitry Andric     }
76140b57cec5SDimitry Andric   }
7615af732203SDimitry Andric 
getLHS(RecurKind Kind,Instruction * I)7616af732203SDimitry Andric   static Value *getLHS(RecurKind Kind, Instruction *I) {
7617af732203SDimitry Andric     if (Kind == RecurKind::None)
7618af732203SDimitry Andric       return nullptr;
76195f7ddb14SDimitry Andric     return I->getOperand(getFirstOperandIndex(I));
76200b57cec5SDimitry Andric   }
getRHS(RecurKind Kind,Instruction * I)7621af732203SDimitry Andric   static Value *getRHS(RecurKind Kind, Instruction *I) {
7622af732203SDimitry Andric     if (Kind == RecurKind::None)
7623af732203SDimitry Andric       return nullptr;
76245f7ddb14SDimitry Andric     return I->getOperand(getFirstOperandIndex(I) + 1);
76250b57cec5SDimitry Andric   }
76260b57cec5SDimitry Andric 
76270b57cec5SDimitry Andric public:
76280b57cec5SDimitry Andric   HorizontalReduction() = default;
76290b57cec5SDimitry Andric 
76300b57cec5SDimitry Andric   /// Try to find a reduction tree.
matchAssociativeReduction(PHINode * Phi,Instruction * Inst)76315f7ddb14SDimitry Andric   bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst) {
76325f7ddb14SDimitry Andric     assert((!Phi || is_contained(Phi->operands(), Inst)) &&
7633af732203SDimitry Andric            "Phi needs to use the binary operator");
76345f7ddb14SDimitry Andric     assert((isa<BinaryOperator>(Inst) || isa<SelectInst>(Inst) ||
76355f7ddb14SDimitry Andric             isa<IntrinsicInst>(Inst)) &&
76365f7ddb14SDimitry Andric            "Expected binop, select, or intrinsic for reduction matching");
76375f7ddb14SDimitry Andric     RdxKind = getRdxKind(Inst);
76380b57cec5SDimitry Andric 
76390b57cec5SDimitry Andric     // We could have a initial reductions that is not an add.
76400b57cec5SDimitry Andric     //  r *= v1 + v2 + v3 + v4
76410b57cec5SDimitry Andric     // In such a case start looking for a tree rooted in the first '+'.
76420b57cec5SDimitry Andric     if (Phi) {
76435f7ddb14SDimitry Andric       if (getLHS(RdxKind, Inst) == Phi) {
76440b57cec5SDimitry Andric         Phi = nullptr;
76455f7ddb14SDimitry Andric         Inst = dyn_cast<Instruction>(getRHS(RdxKind, Inst));
76465f7ddb14SDimitry Andric         if (!Inst)
7647af732203SDimitry Andric           return false;
76485f7ddb14SDimitry Andric         RdxKind = getRdxKind(Inst);
76495f7ddb14SDimitry Andric       } else if (getRHS(RdxKind, Inst) == Phi) {
76500b57cec5SDimitry Andric         Phi = nullptr;
76515f7ddb14SDimitry Andric         Inst = dyn_cast<Instruction>(getLHS(RdxKind, Inst));
76525f7ddb14SDimitry Andric         if (!Inst)
7653af732203SDimitry Andric           return false;
76545f7ddb14SDimitry Andric         RdxKind = getRdxKind(Inst);
76550b57cec5SDimitry Andric       }
76560b57cec5SDimitry Andric     }
76570b57cec5SDimitry Andric 
76585f7ddb14SDimitry Andric     if (!isVectorizable(RdxKind, Inst))
76590b57cec5SDimitry Andric       return false;
76600b57cec5SDimitry Andric 
7661af732203SDimitry Andric     // Analyze "regular" integer/FP types for reductions - no target-specific
7662af732203SDimitry Andric     // types or pointers.
76635f7ddb14SDimitry Andric     Type *Ty = Inst->getType();
7664af732203SDimitry Andric     if (!isValidElementType(Ty) || Ty->isPointerTy())
76650b57cec5SDimitry Andric       return false;
76660b57cec5SDimitry Andric 
76675f7ddb14SDimitry Andric     // Though the ultimate reduction may have multiple uses, its condition must
76685f7ddb14SDimitry Andric     // have only single use.
76695f7ddb14SDimitry Andric     if (auto *Sel = dyn_cast<SelectInst>(Inst))
76705f7ddb14SDimitry Andric       if (!Sel->getCondition()->hasOneUse())
76715f7ddb14SDimitry Andric         return false;
76725f7ddb14SDimitry Andric 
76735f7ddb14SDimitry Andric     ReductionRoot = Inst;
76740b57cec5SDimitry Andric 
7675af732203SDimitry Andric     // The opcode for leaf values that we perform a reduction on.
7676af732203SDimitry Andric     // For example: load(x) + load(y) + load(z) + fptoui(w)
7677af732203SDimitry Andric     // The leaf opcode for 'w' does not match, so we don't include it as a
7678af732203SDimitry Andric     // potential candidate for the reduction.
7679af732203SDimitry Andric     unsigned LeafOpcode = 0;
7680af732203SDimitry Andric 
76815f7ddb14SDimitry Andric     // Post-order traverse the reduction tree starting at Inst. We only handle
76825f7ddb14SDimitry Andric     // true trees containing binary operators or selects.
76830b57cec5SDimitry Andric     SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
76845f7ddb14SDimitry Andric     Stack.push_back(std::make_pair(Inst, getFirstOperandIndex(Inst)));
76855f7ddb14SDimitry Andric     initReductionOps(Inst);
76860b57cec5SDimitry Andric     while (!Stack.empty()) {
76870b57cec5SDimitry Andric       Instruction *TreeN = Stack.back().first;
7688af732203SDimitry Andric       unsigned EdgeToVisit = Stack.back().second++;
7689af732203SDimitry Andric       const RecurKind TreeRdxKind = getRdxKind(TreeN);
7690af732203SDimitry Andric       bool IsReducedValue = TreeRdxKind != RdxKind;
76910b57cec5SDimitry Andric 
7692af732203SDimitry Andric       // Postorder visit.
76935f7ddb14SDimitry Andric       if (IsReducedValue || EdgeToVisit >= getNumberOfOperands(TreeN)) {
76940b57cec5SDimitry Andric         if (IsReducedValue)
76950b57cec5SDimitry Andric           ReducedVals.push_back(TreeN);
76960b57cec5SDimitry Andric         else {
76975f7ddb14SDimitry Andric           auto ExtraArgsIter = ExtraArgs.find(TreeN);
76985f7ddb14SDimitry Andric           if (ExtraArgsIter != ExtraArgs.end() && !ExtraArgsIter->second) {
76990b57cec5SDimitry Andric             // Check if TreeN is an extra argument of its parent operation.
77000b57cec5SDimitry Andric             if (Stack.size() <= 1) {
77010b57cec5SDimitry Andric               // TreeN can't be an extra argument as it is a root reduction
77020b57cec5SDimitry Andric               // operation.
77030b57cec5SDimitry Andric               return false;
77040b57cec5SDimitry Andric             }
77050b57cec5SDimitry Andric             // Yes, TreeN is an extra argument, do not add it to a list of
77060b57cec5SDimitry Andric             // reduction operations.
77070b57cec5SDimitry Andric             // Stack[Stack.size() - 2] always points to the parent operation.
77080b57cec5SDimitry Andric             markExtraArg(Stack[Stack.size() - 2], TreeN);
77090b57cec5SDimitry Andric             ExtraArgs.erase(TreeN);
77100b57cec5SDimitry Andric           } else
77115f7ddb14SDimitry Andric             addReductionOps(TreeN);
77120b57cec5SDimitry Andric         }
77130b57cec5SDimitry Andric         // Retract.
77140b57cec5SDimitry Andric         Stack.pop_back();
77150b57cec5SDimitry Andric         continue;
77160b57cec5SDimitry Andric       }
77170b57cec5SDimitry Andric 
77185f7ddb14SDimitry Andric       // Visit operands.
77195f7ddb14SDimitry Andric       Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit);
77205f7ddb14SDimitry Andric       auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
77215f7ddb14SDimitry Andric       if (!EdgeInst) {
7722af732203SDimitry Andric         // Edge value is not a reduction instruction or a leaf instruction.
7723af732203SDimitry Andric         // (It may be a constant, function argument, or something else.)
7724af732203SDimitry Andric         markExtraArg(Stack.back(), EdgeVal);
77250b57cec5SDimitry Andric         continue;
77260b57cec5SDimitry Andric       }
77275f7ddb14SDimitry Andric       RecurKind EdgeRdxKind = getRdxKind(EdgeInst);
7728af732203SDimitry Andric       // Continue analysis if the next operand is a reduction operation or
7729af732203SDimitry Andric       // (possibly) a leaf value. If the leaf value opcode is not set,
7730af732203SDimitry Andric       // the first met operation != reduction operation is considered as the
7731af732203SDimitry Andric       // leaf opcode.
7732af732203SDimitry Andric       // Only handle trees in the current basic block.
77330b57cec5SDimitry Andric       // Each tree node needs to have minimal number of users except for the
77340b57cec5SDimitry Andric       // ultimate reduction.
7735af732203SDimitry Andric       const bool IsRdxInst = EdgeRdxKind == RdxKind;
77365f7ddb14SDimitry Andric       if (EdgeInst != Phi && EdgeInst != Inst &&
77375f7ddb14SDimitry Andric           hasSameParent(EdgeInst, Inst->getParent()) &&
77385f7ddb14SDimitry Andric           hasRequiredNumberOfUses(isCmpSelMinMax(Inst), EdgeInst) &&
77395f7ddb14SDimitry Andric           (!LeafOpcode || LeafOpcode == EdgeInst->getOpcode() || IsRdxInst)) {
7740af732203SDimitry Andric         if (IsRdxInst) {
77410b57cec5SDimitry Andric           // We need to be able to reassociate the reduction operations.
77425f7ddb14SDimitry Andric           if (!isVectorizable(EdgeRdxKind, EdgeInst)) {
77430b57cec5SDimitry Andric             // I is an extra argument for TreeN (its parent operation).
77445f7ddb14SDimitry Andric             markExtraArg(Stack.back(), EdgeInst);
77450b57cec5SDimitry Andric             continue;
77460b57cec5SDimitry Andric           }
7747af732203SDimitry Andric         } else if (!LeafOpcode) {
77485f7ddb14SDimitry Andric           LeafOpcode = EdgeInst->getOpcode();
7749af732203SDimitry Andric         }
77505f7ddb14SDimitry Andric         Stack.push_back(
77515f7ddb14SDimitry Andric             std::make_pair(EdgeInst, getFirstOperandIndex(EdgeInst)));
7752af732203SDimitry Andric         continue;
7753af732203SDimitry Andric       }
77540b57cec5SDimitry Andric       // I is an extra argument for TreeN (its parent operation).
77555f7ddb14SDimitry Andric       markExtraArg(Stack.back(), EdgeInst);
77560b57cec5SDimitry Andric     }
77570b57cec5SDimitry Andric     return true;
77580b57cec5SDimitry Andric   }
77590b57cec5SDimitry Andric 
7760af732203SDimitry Andric   /// Attempt to vectorize the tree found by matchAssociativeReduction.
tryToReduce(BoUpSLP & V,TargetTransformInfo * TTI)77610b57cec5SDimitry Andric   bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
7762af732203SDimitry Andric     // If there are a sufficient number of reduction values, reduce
7763af732203SDimitry Andric     // to a nearby power-of-2. We can safely generate oversized
77640b57cec5SDimitry Andric     // vectors and rely on the backend to split them to legal sizes.
77650b57cec5SDimitry Andric     unsigned NumReducedVals = ReducedVals.size();
77660b57cec5SDimitry Andric     if (NumReducedVals < 4)
77670b57cec5SDimitry Andric       return false;
77680b57cec5SDimitry Andric 
7769af732203SDimitry Andric     // Intersect the fast-math-flags from all reduction operations.
7770af732203SDimitry Andric     FastMathFlags RdxFMF;
7771af732203SDimitry Andric     RdxFMF.set();
7772af732203SDimitry Andric     for (ReductionOpsType &RdxOp : ReductionOps) {
7773af732203SDimitry Andric       for (Value *RdxVal : RdxOp) {
7774af732203SDimitry Andric         if (auto *FPMO = dyn_cast<FPMathOperator>(RdxVal))
7775af732203SDimitry Andric           RdxFMF &= FPMO->getFastMathFlags();
7776af732203SDimitry Andric       }
7777af732203SDimitry Andric     }
77780b57cec5SDimitry Andric 
77790b57cec5SDimitry Andric     IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
7780af732203SDimitry Andric     Builder.setFastMathFlags(RdxFMF);
77810b57cec5SDimitry Andric 
77820b57cec5SDimitry Andric     BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
7783af732203SDimitry Andric     // The same extra argument may be used several times, so log each attempt
77840b57cec5SDimitry Andric     // to use it.
7785af732203SDimitry Andric     for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
77860b57cec5SDimitry Andric       assert(Pair.first && "DebugLoc must be set.");
77870b57cec5SDimitry Andric       ExternallyUsedValues[Pair.second].push_back(Pair.first);
77880b57cec5SDimitry Andric     }
7789480093f4SDimitry Andric 
7790480093f4SDimitry Andric     // The compare instruction of a min/max is the insertion point for new
7791480093f4SDimitry Andric     // instructions and may be replaced with a new compare instruction.
7792480093f4SDimitry Andric     auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
7793480093f4SDimitry Andric       assert(isa<SelectInst>(RdxRootInst) &&
7794480093f4SDimitry Andric              "Expected min/max reduction to have select root instruction");
7795480093f4SDimitry Andric       Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
7796480093f4SDimitry Andric       assert(isa<Instruction>(ScalarCond) &&
7797480093f4SDimitry Andric              "Expected min/max reduction to have compare condition");
7798480093f4SDimitry Andric       return cast<Instruction>(ScalarCond);
7799480093f4SDimitry Andric     };
7800480093f4SDimitry Andric 
78010b57cec5SDimitry Andric     // The reduction root is used as the insertion point for new instructions,
78020b57cec5SDimitry Andric     // so set it as externally used to prevent it from being deleted.
78030b57cec5SDimitry Andric     ExternallyUsedValues[ReductionRoot];
78040b57cec5SDimitry Andric     SmallVector<Value *, 16> IgnoreList;
7805af732203SDimitry Andric     for (ReductionOpsType &RdxOp : ReductionOps)
7806af732203SDimitry Andric       IgnoreList.append(RdxOp.begin(), RdxOp.end());
7807af732203SDimitry Andric 
7808af732203SDimitry Andric     unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
7809af732203SDimitry Andric     if (NumReducedVals > ReduxWidth) {
7810af732203SDimitry Andric       // In the loop below, we are building a tree based on a window of
7811af732203SDimitry Andric       // 'ReduxWidth' values.
7812af732203SDimitry Andric       // If the operands of those values have common traits (compare predicate,
7813af732203SDimitry Andric       // constant operand, etc), then we want to group those together to
7814af732203SDimitry Andric       // minimize the cost of the reduction.
7815af732203SDimitry Andric 
7816af732203SDimitry Andric       // TODO: This should be extended to count common operands for
7817af732203SDimitry Andric       //       compares and binops.
7818af732203SDimitry Andric 
7819af732203SDimitry Andric       // Step 1: Count the number of times each compare predicate occurs.
7820af732203SDimitry Andric       SmallDenseMap<unsigned, unsigned> PredCountMap;
7821af732203SDimitry Andric       for (Value *RdxVal : ReducedVals) {
7822af732203SDimitry Andric         CmpInst::Predicate Pred;
7823af732203SDimitry Andric         if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value())))
7824af732203SDimitry Andric           ++PredCountMap[Pred];
7825af732203SDimitry Andric       }
7826af732203SDimitry Andric       // Step 2: Sort the values so the most common predicates come first.
7827af732203SDimitry Andric       stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) {
7828af732203SDimitry Andric         CmpInst::Predicate PredA, PredB;
7829af732203SDimitry Andric         if (match(A, m_Cmp(PredA, m_Value(), m_Value())) &&
7830af732203SDimitry Andric             match(B, m_Cmp(PredB, m_Value(), m_Value()))) {
7831af732203SDimitry Andric           return PredCountMap[PredA] > PredCountMap[PredB];
7832af732203SDimitry Andric         }
7833af732203SDimitry Andric         return false;
7834af732203SDimitry Andric       });
7835af732203SDimitry Andric     }
7836af732203SDimitry Andric 
7837af732203SDimitry Andric     Value *VectorizedTree = nullptr;
7838af732203SDimitry Andric     unsigned i = 0;
78390b57cec5SDimitry Andric     while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
7840af732203SDimitry Andric       ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
78410b57cec5SDimitry Andric       V.buildTree(VL, ExternallyUsedValues, IgnoreList);
78420b57cec5SDimitry Andric       Optional<ArrayRef<unsigned>> Order = V.bestOrder();
7843af732203SDimitry Andric       if (Order) {
7844af732203SDimitry Andric         assert(Order->size() == VL.size() &&
7845af732203SDimitry Andric                "Order size must be the same as number of vectorized "
7846af732203SDimitry Andric                "instructions.");
78470b57cec5SDimitry Andric         // TODO: reorder tree nodes without tree rebuilding.
78480b57cec5SDimitry Andric         SmallVector<Value *, 4> ReorderedOps(VL.size());
78495f7ddb14SDimitry Andric         transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
78500b57cec5SDimitry Andric                   [VL](const unsigned Idx) { return VL[Idx]; });
78510b57cec5SDimitry Andric         V.buildTree(ReorderedOps, ExternallyUsedValues, IgnoreList);
78520b57cec5SDimitry Andric       }
78530b57cec5SDimitry Andric       if (V.isTreeTinyAndNotFullyVectorizable())
78540b57cec5SDimitry Andric         break;
7855af732203SDimitry Andric       if (V.isLoadCombineReductionCandidate(RdxKind))
78568bcb0991SDimitry Andric         break;
78570b57cec5SDimitry Andric 
78585f7ddb14SDimitry Andric       // For a poison-safe boolean logic reduction, do not replace select
78595f7ddb14SDimitry Andric       // instructions with logic ops. All reduced values will be frozen (see
78605f7ddb14SDimitry Andric       // below) to prevent leaking poison.
78615f7ddb14SDimitry Andric       if (isa<SelectInst>(ReductionRoot) &&
78625f7ddb14SDimitry Andric           isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
78635f7ddb14SDimitry Andric           NumReducedVals != ReduxWidth)
78645f7ddb14SDimitry Andric         break;
78655f7ddb14SDimitry Andric 
78660b57cec5SDimitry Andric       V.computeMinimumValueSizes();
78670b57cec5SDimitry Andric 
78680b57cec5SDimitry Andric       // Estimate cost.
78695f7ddb14SDimitry Andric       InstructionCost TreeCost =
78705f7ddb14SDimitry Andric           V.getTreeCost(makeArrayRef(&ReducedVals[i], ReduxWidth));
7871af732203SDimitry Andric       InstructionCost ReductionCost =
78725f7ddb14SDimitry Andric           getReductionCost(TTI, ReducedVals[i], ReduxWidth, RdxFMF);
7873af732203SDimitry Andric       InstructionCost Cost = TreeCost + ReductionCost;
7874af732203SDimitry Andric       if (!Cost.isValid()) {
7875af732203SDimitry Andric         LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
7876af732203SDimitry Andric         return false;
7877af732203SDimitry Andric       }
78780b57cec5SDimitry Andric       if (Cost >= -SLPCostThreshold) {
78790b57cec5SDimitry Andric         V.getORE()->emit([&]() {
7880af732203SDimitry Andric           return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
7881af732203SDimitry Andric                                           cast<Instruction>(VL[0]))
78820b57cec5SDimitry Andric                  << "Vectorizing horizontal reduction is possible"
7883af732203SDimitry Andric                  << "but not beneficial with cost " << ore::NV("Cost", Cost)
7884af732203SDimitry Andric                  << " and threshold "
78850b57cec5SDimitry Andric                  << ore::NV("Threshold", -SLPCostThreshold);
78860b57cec5SDimitry Andric         });
78870b57cec5SDimitry Andric         break;
78880b57cec5SDimitry Andric       }
78890b57cec5SDimitry Andric 
78900b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
78910b57cec5SDimitry Andric                         << Cost << ". (HorRdx)\n");
78920b57cec5SDimitry Andric       V.getORE()->emit([&]() {
7893af732203SDimitry Andric         return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
7894af732203SDimitry Andric                                   cast<Instruction>(VL[0]))
78950b57cec5SDimitry Andric                << "Vectorized horizontal reduction with cost "
78960b57cec5SDimitry Andric                << ore::NV("Cost", Cost) << " and with tree size "
78970b57cec5SDimitry Andric                << ore::NV("TreeSize", V.getTreeSize());
78980b57cec5SDimitry Andric       });
78990b57cec5SDimitry Andric 
79000b57cec5SDimitry Andric       // Vectorize a tree.
79010b57cec5SDimitry Andric       DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
79020b57cec5SDimitry Andric       Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
79030b57cec5SDimitry Andric 
7904af732203SDimitry Andric       // Emit a reduction. If the root is a select (min/max idiom), the insert
7905480093f4SDimitry Andric       // point is the compare condition of that select.
7906480093f4SDimitry Andric       Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
79075f7ddb14SDimitry Andric       if (isCmpSelMinMax(RdxRootInst))
7908480093f4SDimitry Andric         Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
7909480093f4SDimitry Andric       else
7910480093f4SDimitry Andric         Builder.SetInsertPoint(RdxRootInst);
7911480093f4SDimitry Andric 
79125f7ddb14SDimitry Andric       // To prevent poison from leaking across what used to be sequential, safe,
79135f7ddb14SDimitry Andric       // scalar boolean logic operations, the reduction operand must be frozen.
79145f7ddb14SDimitry Andric       if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
79155f7ddb14SDimitry Andric         VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
79165f7ddb14SDimitry Andric 
79170b57cec5SDimitry Andric       Value *ReducedSubTree =
79180b57cec5SDimitry Andric           emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
7919af732203SDimitry Andric 
7920af732203SDimitry Andric       if (!VectorizedTree) {
7921af732203SDimitry Andric         // Initialize the final value in the reduction.
79220b57cec5SDimitry Andric         VectorizedTree = ReducedSubTree;
7923af732203SDimitry Andric       } else {
7924af732203SDimitry Andric         // Update the final value in the reduction.
7925af732203SDimitry Andric         Builder.SetCurrentDebugLocation(Loc);
7926af732203SDimitry Andric         VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
7927af732203SDimitry Andric                                   ReducedSubTree, "op.rdx", ReductionOps);
7928af732203SDimitry Andric       }
79290b57cec5SDimitry Andric       i += ReduxWidth;
79300b57cec5SDimitry Andric       ReduxWidth = PowerOf2Floor(NumReducedVals - i);
79310b57cec5SDimitry Andric     }
79320b57cec5SDimitry Andric 
79330b57cec5SDimitry Andric     if (VectorizedTree) {
79340b57cec5SDimitry Andric       // Finish the reduction.
79350b57cec5SDimitry Andric       for (; i < NumReducedVals; ++i) {
79360b57cec5SDimitry Andric         auto *I = cast<Instruction>(ReducedVals[i]);
79370b57cec5SDimitry Andric         Builder.SetCurrentDebugLocation(I->getDebugLoc());
7938af732203SDimitry Andric         VectorizedTree =
7939af732203SDimitry Andric             createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps);
79400b57cec5SDimitry Andric       }
79410b57cec5SDimitry Andric       for (auto &Pair : ExternallyUsedValues) {
79420b57cec5SDimitry Andric         // Add each externally used value to the final reduction.
79430b57cec5SDimitry Andric         for (auto *I : Pair.second) {
79440b57cec5SDimitry Andric           Builder.SetCurrentDebugLocation(I->getDebugLoc());
7945af732203SDimitry Andric           VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
7946af732203SDimitry Andric                                     Pair.first, "op.extra", I);
79470b57cec5SDimitry Andric         }
79480b57cec5SDimitry Andric       }
7949480093f4SDimitry Andric 
79500b57cec5SDimitry Andric       ReductionRoot->replaceAllUsesWith(VectorizedTree);
7951480093f4SDimitry Andric 
79528bcb0991SDimitry Andric       // Mark all scalar reduction ops for deletion, they are replaced by the
79538bcb0991SDimitry Andric       // vector reductions.
79548bcb0991SDimitry Andric       V.eraseInstructions(IgnoreList);
79550b57cec5SDimitry Andric     }
79560b57cec5SDimitry Andric     return VectorizedTree != nullptr;
79570b57cec5SDimitry Andric   }
79580b57cec5SDimitry Andric 
numReductionValues() const7959af732203SDimitry Andric   unsigned numReductionValues() const { return ReducedVals.size(); }
79600b57cec5SDimitry Andric 
79610b57cec5SDimitry Andric private:
79620b57cec5SDimitry Andric   /// Calculate the cost of a reduction.
getReductionCost(TargetTransformInfo * TTI,Value * FirstReducedVal,unsigned ReduxWidth,FastMathFlags FMF)7963af732203SDimitry Andric   InstructionCost getReductionCost(TargetTransformInfo *TTI,
79645f7ddb14SDimitry Andric                                    Value *FirstReducedVal, unsigned ReduxWidth,
79655f7ddb14SDimitry Andric                                    FastMathFlags FMF) {
79660b57cec5SDimitry Andric     Type *ScalarTy = FirstReducedVal->getType();
7967af732203SDimitry Andric     FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
7968af732203SDimitry Andric     InstructionCost VectorCost, ScalarCost;
7969af732203SDimitry Andric     switch (RdxKind) {
7970af732203SDimitry Andric     case RecurKind::Add:
7971af732203SDimitry Andric     case RecurKind::Mul:
7972af732203SDimitry Andric     case RecurKind::Or:
7973af732203SDimitry Andric     case RecurKind::And:
7974af732203SDimitry Andric     case RecurKind::Xor:
7975af732203SDimitry Andric     case RecurKind::FAdd:
7976af732203SDimitry Andric     case RecurKind::FMul: {
7977af732203SDimitry Andric       unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
79785f7ddb14SDimitry Andric       VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF);
7979af732203SDimitry Andric       ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
79800b57cec5SDimitry Andric       break;
79810b57cec5SDimitry Andric     }
7982af732203SDimitry Andric     case RecurKind::FMax:
7983af732203SDimitry Andric     case RecurKind::FMin: {
7984af732203SDimitry Andric       auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
79855f7ddb14SDimitry Andric       VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
79865f7ddb14SDimitry Andric                                                /*unsigned=*/false);
7987af732203SDimitry Andric       ScalarCost =
7988af732203SDimitry Andric           TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) +
79890b57cec5SDimitry Andric           TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
79900b57cec5SDimitry Andric                                   CmpInst::makeCmpResultType(ScalarTy));
79910b57cec5SDimitry Andric       break;
7992af732203SDimitry Andric     }
7993af732203SDimitry Andric     case RecurKind::SMax:
7994af732203SDimitry Andric     case RecurKind::SMin:
7995af732203SDimitry Andric     case RecurKind::UMax:
7996af732203SDimitry Andric     case RecurKind::UMin: {
7997af732203SDimitry Andric       auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
7998af732203SDimitry Andric       bool IsUnsigned =
7999af732203SDimitry Andric           RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
80005f7ddb14SDimitry Andric       VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned);
8001af732203SDimitry Andric       ScalarCost =
8002af732203SDimitry Andric           TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) +
8003af732203SDimitry Andric           TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
8004af732203SDimitry Andric                                   CmpInst::makeCmpResultType(ScalarTy));
8005af732203SDimitry Andric       break;
8006af732203SDimitry Andric     }
8007af732203SDimitry Andric     default:
80080b57cec5SDimitry Andric       llvm_unreachable("Expected arithmetic or min/max reduction operation");
80090b57cec5SDimitry Andric     }
80100b57cec5SDimitry Andric 
8011af732203SDimitry Andric     // Scalar cost is repeated for N-1 elements.
8012af732203SDimitry Andric     ScalarCost *= (ReduxWidth - 1);
8013af732203SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
80140b57cec5SDimitry Andric                       << " for reduction that starts with " << *FirstReducedVal
8015af732203SDimitry Andric                       << " (It is a splitting reduction)\n");
8016af732203SDimitry Andric     return VectorCost - ScalarCost;
80170b57cec5SDimitry Andric   }
80180b57cec5SDimitry Andric 
80190b57cec5SDimitry Andric   /// Emit a horizontal reduction of the vectorized value.
emitReduction(Value * VectorizedValue,IRBuilder<> & Builder,unsigned ReduxWidth,const TargetTransformInfo * TTI)80200b57cec5SDimitry Andric   Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder,
80210b57cec5SDimitry Andric                        unsigned ReduxWidth, const TargetTransformInfo *TTI) {
80220b57cec5SDimitry Andric     assert(VectorizedValue && "Need to have a vectorized tree node");
80230b57cec5SDimitry Andric     assert(isPowerOf2_32(ReduxWidth) &&
80240b57cec5SDimitry Andric            "We only handle power-of-two reductions for now");
80250b57cec5SDimitry Andric 
8026af732203SDimitry Andric     return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
8027af732203SDimitry Andric                                        ReductionOps.back());
80280b57cec5SDimitry Andric   }
80290b57cec5SDimitry Andric };
80300b57cec5SDimitry Andric 
80310b57cec5SDimitry Andric } // end anonymous namespace
80320b57cec5SDimitry Andric 
getAggregateSize(Instruction * InsertInst)8033af732203SDimitry Andric static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
8034af732203SDimitry Andric   if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
8035af732203SDimitry Andric     return cast<FixedVectorType>(IE->getType())->getNumElements();
8036af732203SDimitry Andric 
8037af732203SDimitry Andric   unsigned AggregateSize = 1;
8038af732203SDimitry Andric   auto *IV = cast<InsertValueInst>(InsertInst);
8039af732203SDimitry Andric   Type *CurrentType = IV->getType();
8040af732203SDimitry Andric   do {
8041af732203SDimitry Andric     if (auto *ST = dyn_cast<StructType>(CurrentType)) {
8042af732203SDimitry Andric       for (auto *Elt : ST->elements())
8043af732203SDimitry Andric         if (Elt != ST->getElementType(0)) // check homogeneity
8044af732203SDimitry Andric           return None;
8045af732203SDimitry Andric       AggregateSize *= ST->getNumElements();
8046af732203SDimitry Andric       CurrentType = ST->getElementType(0);
8047af732203SDimitry Andric     } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
8048af732203SDimitry Andric       AggregateSize *= AT->getNumElements();
8049af732203SDimitry Andric       CurrentType = AT->getElementType();
8050af732203SDimitry Andric     } else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
8051af732203SDimitry Andric       AggregateSize *= VT->getNumElements();
8052af732203SDimitry Andric       return AggregateSize;
8053af732203SDimitry Andric     } else if (CurrentType->isSingleValueType()) {
8054af732203SDimitry Andric       return AggregateSize;
8055af732203SDimitry Andric     } else {
8056af732203SDimitry Andric       return None;
8057af732203SDimitry Andric     }
8058af732203SDimitry Andric   } while (true);
8059af732203SDimitry Andric }
8060af732203SDimitry Andric 
findBuildAggregate_rec(Instruction * LastInsertInst,TargetTransformInfo * TTI,SmallVectorImpl<Value * > & BuildVectorOpds,SmallVectorImpl<Value * > & InsertElts,unsigned OperandOffset)8061af732203SDimitry Andric static bool findBuildAggregate_rec(Instruction *LastInsertInst,
8062af732203SDimitry Andric                                    TargetTransformInfo *TTI,
8063af732203SDimitry Andric                                    SmallVectorImpl<Value *> &BuildVectorOpds,
8064af732203SDimitry Andric                                    SmallVectorImpl<Value *> &InsertElts,
8065af732203SDimitry Andric                                    unsigned OperandOffset) {
8066af732203SDimitry Andric   do {
8067af732203SDimitry Andric     Value *InsertedOperand = LastInsertInst->getOperand(1);
80685f7ddb14SDimitry Andric     Optional<int> OperandIndex = getInsertIndex(LastInsertInst, OperandOffset);
8069af732203SDimitry Andric     if (!OperandIndex)
8070af732203SDimitry Andric       return false;
8071af732203SDimitry Andric     if (isa<InsertElementInst>(InsertedOperand) ||
8072af732203SDimitry Andric         isa<InsertValueInst>(InsertedOperand)) {
8073af732203SDimitry Andric       if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
8074af732203SDimitry Andric                                   BuildVectorOpds, InsertElts, *OperandIndex))
8075af732203SDimitry Andric         return false;
8076af732203SDimitry Andric     } else {
8077af732203SDimitry Andric       BuildVectorOpds[*OperandIndex] = InsertedOperand;
8078af732203SDimitry Andric       InsertElts[*OperandIndex] = LastInsertInst;
8079af732203SDimitry Andric     }
8080af732203SDimitry Andric     LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
8081af732203SDimitry Andric   } while (LastInsertInst != nullptr &&
8082af732203SDimitry Andric            (isa<InsertValueInst>(LastInsertInst) ||
8083af732203SDimitry Andric             isa<InsertElementInst>(LastInsertInst)) &&
8084af732203SDimitry Andric            LastInsertInst->hasOneUse());
80855f7ddb14SDimitry Andric   return true;
8086af732203SDimitry Andric }
8087af732203SDimitry Andric 
80880b57cec5SDimitry Andric /// Recognize construction of vectors like
8089af732203SDimitry Andric ///  %ra = insertelement <4 x float> poison, float %s0, i32 0
80900b57cec5SDimitry Andric ///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
80910b57cec5SDimitry Andric ///  %rc = insertelement <4 x float> %rb, float %s2, i32 2
80920b57cec5SDimitry Andric ///  %rd = insertelement <4 x float> %rc, float %s3, i32 3
8093480093f4SDimitry Andric ///  starting from the last insertelement or insertvalue instruction.
80940b57cec5SDimitry Andric ///
8095af732203SDimitry Andric /// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
8096480093f4SDimitry Andric /// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
8097480093f4SDimitry Andric /// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
8098480093f4SDimitry Andric ///
8099480093f4SDimitry Andric /// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
81000b57cec5SDimitry Andric ///
81010b57cec5SDimitry Andric /// \return true if it matches.
findBuildAggregate(Instruction * LastInsertInst,TargetTransformInfo * TTI,SmallVectorImpl<Value * > & BuildVectorOpds,SmallVectorImpl<Value * > & InsertElts)8102af732203SDimitry Andric static bool findBuildAggregate(Instruction *LastInsertInst,
8103af732203SDimitry Andric                                TargetTransformInfo *TTI,
8104480093f4SDimitry Andric                                SmallVectorImpl<Value *> &BuildVectorOpds,
81055ffd83dbSDimitry Andric                                SmallVectorImpl<Value *> &InsertElts) {
8106af732203SDimitry Andric 
8107480093f4SDimitry Andric   assert((isa<InsertElementInst>(LastInsertInst) ||
8108480093f4SDimitry Andric           isa<InsertValueInst>(LastInsertInst)) &&
8109480093f4SDimitry Andric          "Expected insertelement or insertvalue instruction!");
8110af732203SDimitry Andric 
8111af732203SDimitry Andric   assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
8112af732203SDimitry Andric          "Expected empty result vectors!");
8113af732203SDimitry Andric 
8114af732203SDimitry Andric   Optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
8115af732203SDimitry Andric   if (!AggregateSize)
8116480093f4SDimitry Andric     return false;
8117af732203SDimitry Andric   BuildVectorOpds.resize(*AggregateSize);
8118af732203SDimitry Andric   InsertElts.resize(*AggregateSize);
8119af732203SDimitry Andric 
8120af732203SDimitry Andric   if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
8121af732203SDimitry Andric                              0)) {
8122af732203SDimitry Andric     llvm::erase_value(BuildVectorOpds, nullptr);
8123af732203SDimitry Andric     llvm::erase_value(InsertElts, nullptr);
8124af732203SDimitry Andric     if (BuildVectorOpds.size() >= 2)
81250b57cec5SDimitry Andric       return true;
81260b57cec5SDimitry Andric   }
81270b57cec5SDimitry Andric 
8128af732203SDimitry Andric   return false;
8129af732203SDimitry Andric }
8130af732203SDimitry Andric 
81310b57cec5SDimitry Andric /// Try and get a reduction value from a phi node.
81320b57cec5SDimitry Andric ///
81330b57cec5SDimitry Andric /// Given a phi node \p P in a block \p ParentBB, consider possible reductions
81340b57cec5SDimitry Andric /// if they come from either \p ParentBB or a containing loop latch.
81350b57cec5SDimitry Andric ///
81360b57cec5SDimitry Andric /// \returns A candidate reduction value if possible, or \code nullptr \endcode
81370b57cec5SDimitry Andric /// if not possible.
getReductionValue(const DominatorTree * DT,PHINode * P,BasicBlock * ParentBB,LoopInfo * LI)81380b57cec5SDimitry Andric static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
81390b57cec5SDimitry Andric                                 BasicBlock *ParentBB, LoopInfo *LI) {
81400b57cec5SDimitry Andric   // There are situations where the reduction value is not dominated by the
81410b57cec5SDimitry Andric   // reduction phi. Vectorizing such cases has been reported to cause
81420b57cec5SDimitry Andric   // miscompiles. See PR25787.
81430b57cec5SDimitry Andric   auto DominatedReduxValue = [&](Value *R) {
81440b57cec5SDimitry Andric     return isa<Instruction>(R) &&
81450b57cec5SDimitry Andric            DT->dominates(P->getParent(), cast<Instruction>(R)->getParent());
81460b57cec5SDimitry Andric   };
81470b57cec5SDimitry Andric 
81480b57cec5SDimitry Andric   Value *Rdx = nullptr;
81490b57cec5SDimitry Andric 
81500b57cec5SDimitry Andric   // Return the incoming value if it comes from the same BB as the phi node.
81510b57cec5SDimitry Andric   if (P->getIncomingBlock(0) == ParentBB) {
81520b57cec5SDimitry Andric     Rdx = P->getIncomingValue(0);
81530b57cec5SDimitry Andric   } else if (P->getIncomingBlock(1) == ParentBB) {
81540b57cec5SDimitry Andric     Rdx = P->getIncomingValue(1);
81550b57cec5SDimitry Andric   }
81560b57cec5SDimitry Andric 
81570b57cec5SDimitry Andric   if (Rdx && DominatedReduxValue(Rdx))
81580b57cec5SDimitry Andric     return Rdx;
81590b57cec5SDimitry Andric 
81600b57cec5SDimitry Andric   // Otherwise, check whether we have a loop latch to look at.
81610b57cec5SDimitry Andric   Loop *BBL = LI->getLoopFor(ParentBB);
81620b57cec5SDimitry Andric   if (!BBL)
81630b57cec5SDimitry Andric     return nullptr;
81640b57cec5SDimitry Andric   BasicBlock *BBLatch = BBL->getLoopLatch();
81650b57cec5SDimitry Andric   if (!BBLatch)
81660b57cec5SDimitry Andric     return nullptr;
81670b57cec5SDimitry Andric 
81680b57cec5SDimitry Andric   // There is a loop latch, return the incoming value if it comes from
81690b57cec5SDimitry Andric   // that. This reduction pattern occasionally turns up.
81700b57cec5SDimitry Andric   if (P->getIncomingBlock(0) == BBLatch) {
81710b57cec5SDimitry Andric     Rdx = P->getIncomingValue(0);
81720b57cec5SDimitry Andric   } else if (P->getIncomingBlock(1) == BBLatch) {
81730b57cec5SDimitry Andric     Rdx = P->getIncomingValue(1);
81740b57cec5SDimitry Andric   }
81750b57cec5SDimitry Andric 
81760b57cec5SDimitry Andric   if (Rdx && DominatedReduxValue(Rdx))
81770b57cec5SDimitry Andric     return Rdx;
81780b57cec5SDimitry Andric 
81790b57cec5SDimitry Andric   return nullptr;
81800b57cec5SDimitry Andric }
81810b57cec5SDimitry Andric 
matchRdxBop(Instruction * I,Value * & V0,Value * & V1)8182af732203SDimitry Andric static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
8183af732203SDimitry Andric   if (match(I, m_BinOp(m_Value(V0), m_Value(V1))))
8184af732203SDimitry Andric     return true;
8185af732203SDimitry Andric   if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(V0), m_Value(V1))))
8186af732203SDimitry Andric     return true;
8187af732203SDimitry Andric   if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(V0), m_Value(V1))))
8188af732203SDimitry Andric     return true;
81895f7ddb14SDimitry Andric   if (match(I, m_Intrinsic<Intrinsic::smax>(m_Value(V0), m_Value(V1))))
81905f7ddb14SDimitry Andric     return true;
81915f7ddb14SDimitry Andric   if (match(I, m_Intrinsic<Intrinsic::smin>(m_Value(V0), m_Value(V1))))
81925f7ddb14SDimitry Andric     return true;
81935f7ddb14SDimitry Andric   if (match(I, m_Intrinsic<Intrinsic::umax>(m_Value(V0), m_Value(V1))))
81945f7ddb14SDimitry Andric     return true;
81955f7ddb14SDimitry Andric   if (match(I, m_Intrinsic<Intrinsic::umin>(m_Value(V0), m_Value(V1))))
81965f7ddb14SDimitry Andric     return true;
8197af732203SDimitry Andric   return false;
8198af732203SDimitry Andric }
8199af732203SDimitry Andric 
82000b57cec5SDimitry Andric /// Attempt to reduce a horizontal reduction.
82010b57cec5SDimitry Andric /// If it is legal to match a horizontal reduction feeding the phi node \a P
82020b57cec5SDimitry Andric /// with reduction operators \a Root (or one of its operands) in a basic block
82030b57cec5SDimitry Andric /// \a BB, then check if it can be done. If horizontal reduction is not found
82040b57cec5SDimitry Andric /// and root instruction is a binary operation, vectorization of the operands is
82050b57cec5SDimitry Andric /// attempted.
82060b57cec5SDimitry Andric /// \returns true if a horizontal reduction was matched and reduced or operands
82070b57cec5SDimitry Andric /// of one of the binary instruction were vectorized.
82080b57cec5SDimitry Andric /// \returns false if a horizontal reduction was not matched (or not possible)
82090b57cec5SDimitry Andric /// or no vectorization of any binary operation feeding \a Root instruction was
82100b57cec5SDimitry Andric /// performed.
tryToVectorizeHorReductionOrInstOperands(PHINode * P,Instruction * Root,BasicBlock * BB,BoUpSLP & R,TargetTransformInfo * TTI,const function_ref<bool (Instruction *,BoUpSLP &)> Vectorize)82110b57cec5SDimitry Andric static bool tryToVectorizeHorReductionOrInstOperands(
82120b57cec5SDimitry Andric     PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
82130b57cec5SDimitry Andric     TargetTransformInfo *TTI,
82140b57cec5SDimitry Andric     const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
82150b57cec5SDimitry Andric   if (!ShouldVectorizeHor)
82160b57cec5SDimitry Andric     return false;
82170b57cec5SDimitry Andric 
82180b57cec5SDimitry Andric   if (!Root)
82190b57cec5SDimitry Andric     return false;
82200b57cec5SDimitry Andric 
82210b57cec5SDimitry Andric   if (Root->getParent() != BB || isa<PHINode>(Root))
82220b57cec5SDimitry Andric     return false;
82230b57cec5SDimitry Andric   // Start analysis starting from Root instruction. If horizontal reduction is
82240b57cec5SDimitry Andric   // found, try to vectorize it. If it is not a horizontal reduction or
82250b57cec5SDimitry Andric   // vectorization is not possible or not effective, and currently analyzed
82260b57cec5SDimitry Andric   // instruction is a binary operation, try to vectorize the operands, using
82270b57cec5SDimitry Andric   // pre-order DFS traversal order. If the operands were not vectorized, repeat
82280b57cec5SDimitry Andric   // the same procedure considering each operand as a possible root of the
82290b57cec5SDimitry Andric   // horizontal reduction.
82300b57cec5SDimitry Andric   // Interrupt the process if the Root instruction itself was vectorized or all
82310b57cec5SDimitry Andric   // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
82325f7ddb14SDimitry Andric   // Skip the analysis of CmpInsts.Compiler implements postanalysis of the
82335f7ddb14SDimitry Andric   // CmpInsts so we can skip extra attempts in
82345f7ddb14SDimitry Andric   // tryToVectorizeHorReductionOrInstOperands and save compile time.
82358bcb0991SDimitry Andric   SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
82360b57cec5SDimitry Andric   SmallPtrSet<Value *, 8> VisitedInstrs;
82370b57cec5SDimitry Andric   bool Res = false;
82380b57cec5SDimitry Andric   while (!Stack.empty()) {
82398bcb0991SDimitry Andric     Instruction *Inst;
82400b57cec5SDimitry Andric     unsigned Level;
82418bcb0991SDimitry Andric     std::tie(Inst, Level) = Stack.pop_back_val();
82425f7ddb14SDimitry Andric     // Do not try to analyze instruction that has already been vectorized.
82435f7ddb14SDimitry Andric     // This may happen when we vectorize instruction operands on a previous
82445f7ddb14SDimitry Andric     // iteration while stack was populated before that happened.
82455f7ddb14SDimitry Andric     if (R.isDeleted(Inst))
82465f7ddb14SDimitry Andric       continue;
8247af732203SDimitry Andric     Value *B0, *B1;
8248af732203SDimitry Andric     bool IsBinop = matchRdxBop(Inst, B0, B1);
8249af732203SDimitry Andric     bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
8250af732203SDimitry Andric     if (IsBinop || IsSelect) {
82510b57cec5SDimitry Andric       HorizontalReduction HorRdx;
82520b57cec5SDimitry Andric       if (HorRdx.matchAssociativeReduction(P, Inst)) {
82530b57cec5SDimitry Andric         if (HorRdx.tryToReduce(R, TTI)) {
82540b57cec5SDimitry Andric           Res = true;
82550b57cec5SDimitry Andric           // Set P to nullptr to avoid re-analysis of phi node in
82560b57cec5SDimitry Andric           // matchAssociativeReduction function unless this is the root node.
82570b57cec5SDimitry Andric           P = nullptr;
82580b57cec5SDimitry Andric           continue;
82590b57cec5SDimitry Andric         }
82600b57cec5SDimitry Andric       }
8261af732203SDimitry Andric       if (P && IsBinop) {
8262af732203SDimitry Andric         Inst = dyn_cast<Instruction>(B0);
82630b57cec5SDimitry Andric         if (Inst == P)
8264af732203SDimitry Andric           Inst = dyn_cast<Instruction>(B1);
82650b57cec5SDimitry Andric         if (!Inst) {
82660b57cec5SDimitry Andric           // Set P to nullptr to avoid re-analysis of phi node in
82670b57cec5SDimitry Andric           // matchAssociativeReduction function unless this is the root node.
82680b57cec5SDimitry Andric           P = nullptr;
82690b57cec5SDimitry Andric           continue;
82700b57cec5SDimitry Andric         }
82710b57cec5SDimitry Andric       }
82720b57cec5SDimitry Andric     }
82730b57cec5SDimitry Andric     // Set P to nullptr to avoid re-analysis of phi node in
82740b57cec5SDimitry Andric     // matchAssociativeReduction function unless this is the root node.
82750b57cec5SDimitry Andric     P = nullptr;
82765f7ddb14SDimitry Andric     // Do not try to vectorize CmpInst operands, this is done separately.
82775f7ddb14SDimitry Andric     if (!isa<CmpInst>(Inst) && Vectorize(Inst, R)) {
82780b57cec5SDimitry Andric       Res = true;
82790b57cec5SDimitry Andric       continue;
82800b57cec5SDimitry Andric     }
82810b57cec5SDimitry Andric 
82820b57cec5SDimitry Andric     // Try to vectorize operands.
82830b57cec5SDimitry Andric     // Continue analysis for the instruction from the same basic block only to
82840b57cec5SDimitry Andric     // save compile time.
82850b57cec5SDimitry Andric     if (++Level < RecursionMaxDepth)
82860b57cec5SDimitry Andric       for (auto *Op : Inst->operand_values())
82870b57cec5SDimitry Andric         if (VisitedInstrs.insert(Op).second)
82880b57cec5SDimitry Andric           if (auto *I = dyn_cast<Instruction>(Op))
82895f7ddb14SDimitry Andric             // Do not try to vectorize CmpInst operands,  this is done
82905f7ddb14SDimitry Andric             // separately.
82915f7ddb14SDimitry Andric             if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
82925f7ddb14SDimitry Andric                 I->getParent() == BB)
82938bcb0991SDimitry Andric               Stack.emplace_back(I, Level);
82940b57cec5SDimitry Andric   }
82950b57cec5SDimitry Andric   return Res;
82960b57cec5SDimitry Andric }
82970b57cec5SDimitry Andric 
vectorizeRootInstruction(PHINode * P,Value * V,BasicBlock * BB,BoUpSLP & R,TargetTransformInfo * TTI)82980b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
82990b57cec5SDimitry Andric                                                  BasicBlock *BB, BoUpSLP &R,
83000b57cec5SDimitry Andric                                                  TargetTransformInfo *TTI) {
8301af732203SDimitry Andric   auto *I = dyn_cast_or_null<Instruction>(V);
83020b57cec5SDimitry Andric   if (!I)
83030b57cec5SDimitry Andric     return false;
83040b57cec5SDimitry Andric 
83050b57cec5SDimitry Andric   if (!isa<BinaryOperator>(I))
83060b57cec5SDimitry Andric     P = nullptr;
83070b57cec5SDimitry Andric   // Try to match and vectorize a horizontal reduction.
83080b57cec5SDimitry Andric   auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
83090b57cec5SDimitry Andric     return tryToVectorize(I, R);
83100b57cec5SDimitry Andric   };
83110b57cec5SDimitry Andric   return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
83120b57cec5SDimitry Andric                                                   ExtraVectorization);
83130b57cec5SDimitry Andric }
83140b57cec5SDimitry Andric 
vectorizeInsertValueInst(InsertValueInst * IVI,BasicBlock * BB,BoUpSLP & R)83150b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
83160b57cec5SDimitry Andric                                                  BasicBlock *BB, BoUpSLP &R) {
83170b57cec5SDimitry Andric   const DataLayout &DL = BB->getModule()->getDataLayout();
83180b57cec5SDimitry Andric   if (!R.canMapToVector(IVI->getType(), DL))
83190b57cec5SDimitry Andric     return false;
83200b57cec5SDimitry Andric 
83210b57cec5SDimitry Andric   SmallVector<Value *, 16> BuildVectorOpds;
83225ffd83dbSDimitry Andric   SmallVector<Value *, 16> BuildVectorInsts;
8323af732203SDimitry Andric   if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
83240b57cec5SDimitry Andric     return false;
83250b57cec5SDimitry Andric 
83260b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
83270b57cec5SDimitry Andric   // Aggregate value is unlikely to be processed in vector register, we need to
83280b57cec5SDimitry Andric   // extract scalars into scalar registers, so NeedExtraction is set true.
83295f7ddb14SDimitry Andric   return tryToVectorizeList(BuildVectorOpds, R, /*AllowReorder=*/false);
83300b57cec5SDimitry Andric }
83310b57cec5SDimitry Andric 
vectorizeInsertElementInst(InsertElementInst * IEI,BasicBlock * BB,BoUpSLP & R)83320b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
83330b57cec5SDimitry Andric                                                    BasicBlock *BB, BoUpSLP &R) {
83345ffd83dbSDimitry Andric   SmallVector<Value *, 16> BuildVectorInsts;
83350b57cec5SDimitry Andric   SmallVector<Value *, 16> BuildVectorOpds;
83365f7ddb14SDimitry Andric   SmallVector<int> Mask;
83375ffd83dbSDimitry Andric   if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
83380b57cec5SDimitry Andric       (llvm::all_of(BuildVectorOpds,
83390b57cec5SDimitry Andric                     [](Value *V) { return isa<ExtractElementInst>(V); }) &&
83405f7ddb14SDimitry Andric        isShuffle(BuildVectorOpds, Mask)))
83410b57cec5SDimitry Andric     return false;
83420b57cec5SDimitry Andric 
83435f7ddb14SDimitry Andric   LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");
83445f7ddb14SDimitry Andric   return tryToVectorizeList(BuildVectorInsts, R, /*AllowReorder=*/true);
83450b57cec5SDimitry Andric }
83460b57cec5SDimitry Andric 
vectorizeSimpleInstructions(SmallVectorImpl<Instruction * > & Instructions,BasicBlock * BB,BoUpSLP & R,bool AtTerminator)83470b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeSimpleInstructions(
83485f7ddb14SDimitry Andric     SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
83495f7ddb14SDimitry Andric     bool AtTerminator) {
83500b57cec5SDimitry Andric   bool OpsChanged = false;
83515f7ddb14SDimitry Andric   SmallVector<Instruction *, 4> PostponedCmps;
83528bcb0991SDimitry Andric   for (auto *I : reverse(Instructions)) {
83538bcb0991SDimitry Andric     if (R.isDeleted(I))
83540b57cec5SDimitry Andric       continue;
83550b57cec5SDimitry Andric     if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
83560b57cec5SDimitry Andric       OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
83570b57cec5SDimitry Andric     else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
83580b57cec5SDimitry Andric       OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
83595f7ddb14SDimitry Andric     else if (isa<CmpInst>(I))
83605f7ddb14SDimitry Andric       PostponedCmps.push_back(I);
83615f7ddb14SDimitry Andric   }
83625f7ddb14SDimitry Andric   if (AtTerminator) {
83635f7ddb14SDimitry Andric     // Try to find reductions first.
83645f7ddb14SDimitry Andric     for (Instruction *I : PostponedCmps) {
83655f7ddb14SDimitry Andric       if (R.isDeleted(I))
83665f7ddb14SDimitry Andric         continue;
83675f7ddb14SDimitry Andric       for (Value *Op : I->operands())
83685f7ddb14SDimitry Andric         OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
83695f7ddb14SDimitry Andric     }
83705f7ddb14SDimitry Andric     // Try to vectorize operands as vector bundles.
83715f7ddb14SDimitry Andric     for (Instruction *I : PostponedCmps) {
83725f7ddb14SDimitry Andric       if (R.isDeleted(I))
83735f7ddb14SDimitry Andric         continue;
83745f7ddb14SDimitry Andric       OpsChanged |= tryToVectorize(I, R);
83750b57cec5SDimitry Andric     }
83760b57cec5SDimitry Andric     Instructions.clear();
83775f7ddb14SDimitry Andric   } else {
83785f7ddb14SDimitry Andric     // Insert in reverse order since the PostponedCmps vector was filled in
83795f7ddb14SDimitry Andric     // reverse order.
83805f7ddb14SDimitry Andric     Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
83815f7ddb14SDimitry Andric   }
83820b57cec5SDimitry Andric   return OpsChanged;
83830b57cec5SDimitry Andric }
83840b57cec5SDimitry Andric 
vectorizeChainsInBlock(BasicBlock * BB,BoUpSLP & R)83850b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
83860b57cec5SDimitry Andric   bool Changed = false;
83870b57cec5SDimitry Andric   SmallVector<Value *, 4> Incoming;
83880b57cec5SDimitry Andric   SmallPtrSet<Value *, 16> VisitedInstrs;
83895f7ddb14SDimitry Andric   // Maps phi nodes to the non-phi nodes found in the use tree for each phi
83905f7ddb14SDimitry Andric   // node. Allows better to identify the chains that can be vectorized in the
83915f7ddb14SDimitry Andric   // better way.
83925f7ddb14SDimitry Andric   DenseMap<Value *, SmallVector<Value *, 4>> PHIToOpcodes;
83930b57cec5SDimitry Andric 
83940b57cec5SDimitry Andric   bool HaveVectorizedPhiNodes = true;
83950b57cec5SDimitry Andric   while (HaveVectorizedPhiNodes) {
83960b57cec5SDimitry Andric     HaveVectorizedPhiNodes = false;
83970b57cec5SDimitry Andric 
83980b57cec5SDimitry Andric     // Collect the incoming values from the PHIs.
83990b57cec5SDimitry Andric     Incoming.clear();
84000b57cec5SDimitry Andric     for (Instruction &I : *BB) {
84010b57cec5SDimitry Andric       PHINode *P = dyn_cast<PHINode>(&I);
84020b57cec5SDimitry Andric       if (!P)
84030b57cec5SDimitry Andric         break;
84040b57cec5SDimitry Andric 
84055f7ddb14SDimitry Andric       // No need to analyze deleted, vectorized and non-vectorizable
84065f7ddb14SDimitry Andric       // instructions.
84075f7ddb14SDimitry Andric       if (!VisitedInstrs.count(P) && !R.isDeleted(P) &&
84085f7ddb14SDimitry Andric           isValidElementType(P->getType()))
84090b57cec5SDimitry Andric         Incoming.push_back(P);
84100b57cec5SDimitry Andric     }
84110b57cec5SDimitry Andric 
84125f7ddb14SDimitry Andric     // Find the corresponding non-phi nodes for better matching when trying to
84135f7ddb14SDimitry Andric     // build the tree.
84145f7ddb14SDimitry Andric     for (Value *V : Incoming) {
84155f7ddb14SDimitry Andric       SmallVectorImpl<Value *> &Opcodes =
84165f7ddb14SDimitry Andric           PHIToOpcodes.try_emplace(V).first->getSecond();
84175f7ddb14SDimitry Andric       if (!Opcodes.empty())
84185f7ddb14SDimitry Andric         continue;
84195f7ddb14SDimitry Andric       SmallVector<Value *, 4> Nodes(1, V);
84205f7ddb14SDimitry Andric       SmallPtrSet<Value *, 4> Visited;
84215f7ddb14SDimitry Andric       while (!Nodes.empty()) {
84225f7ddb14SDimitry Andric         auto *PHI = cast<PHINode>(Nodes.pop_back_val());
84235f7ddb14SDimitry Andric         if (!Visited.insert(PHI).second)
84245f7ddb14SDimitry Andric           continue;
84255f7ddb14SDimitry Andric         for (Value *V : PHI->incoming_values()) {
84265f7ddb14SDimitry Andric           if (auto *PHI1 = dyn_cast<PHINode>((V))) {
84275f7ddb14SDimitry Andric             Nodes.push_back(PHI1);
84285f7ddb14SDimitry Andric             continue;
84295f7ddb14SDimitry Andric           }
84305f7ddb14SDimitry Andric           Opcodes.emplace_back(V);
84315f7ddb14SDimitry Andric         }
84325f7ddb14SDimitry Andric       }
84335f7ddb14SDimitry Andric     }
84345f7ddb14SDimitry Andric 
84355f7ddb14SDimitry Andric     // Sort by type, parent, operands.
84365f7ddb14SDimitry Andric     stable_sort(Incoming, [this, &PHIToOpcodes](Value *V1, Value *V2) {
84375f7ddb14SDimitry Andric       assert(isValidElementType(V1->getType()) &&
84385f7ddb14SDimitry Andric              isValidElementType(V2->getType()) &&
84395f7ddb14SDimitry Andric              "Expected vectorizable types only.");
84405f7ddb14SDimitry Andric       // It is fine to compare type IDs here, since we expect only vectorizable
84415f7ddb14SDimitry Andric       // types, like ints, floats and pointers, we don't care about other type.
84425f7ddb14SDimitry Andric       if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
84435f7ddb14SDimitry Andric         return true;
84445f7ddb14SDimitry Andric       if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
84455f7ddb14SDimitry Andric         return false;
84465f7ddb14SDimitry Andric       ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
84475f7ddb14SDimitry Andric       ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
84485f7ddb14SDimitry Andric       if (Opcodes1.size() < Opcodes2.size())
84495f7ddb14SDimitry Andric         return true;
84505f7ddb14SDimitry Andric       if (Opcodes1.size() > Opcodes2.size())
84515f7ddb14SDimitry Andric         return false;
84525f7ddb14SDimitry Andric       for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
84535f7ddb14SDimitry Andric         // Undefs are compatible with any other value.
84545f7ddb14SDimitry Andric         if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
84555f7ddb14SDimitry Andric           continue;
84565f7ddb14SDimitry Andric         if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
84575f7ddb14SDimitry Andric           if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
84585f7ddb14SDimitry Andric             DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
84595f7ddb14SDimitry Andric             DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
84605f7ddb14SDimitry Andric             if (!NodeI1)
84615f7ddb14SDimitry Andric               return NodeI2 != nullptr;
84625f7ddb14SDimitry Andric             if (!NodeI2)
84635f7ddb14SDimitry Andric               return false;
84645f7ddb14SDimitry Andric             assert((NodeI1 == NodeI2) ==
84655f7ddb14SDimitry Andric                        (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
84665f7ddb14SDimitry Andric                    "Different nodes should have different DFS numbers");
84675f7ddb14SDimitry Andric             if (NodeI1 != NodeI2)
84685f7ddb14SDimitry Andric               return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
84695f7ddb14SDimitry Andric             InstructionsState S = getSameOpcode({I1, I2});
84705f7ddb14SDimitry Andric             if (S.getOpcode())
84715f7ddb14SDimitry Andric               continue;
84725f7ddb14SDimitry Andric             return I1->getOpcode() < I2->getOpcode();
84735f7ddb14SDimitry Andric           }
84745f7ddb14SDimitry Andric         if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
84755f7ddb14SDimitry Andric           continue;
84765f7ddb14SDimitry Andric         if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
84775f7ddb14SDimitry Andric           return true;
84785f7ddb14SDimitry Andric         if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
84795f7ddb14SDimitry Andric           return false;
84805f7ddb14SDimitry Andric       }
84815f7ddb14SDimitry Andric       return false;
84825f7ddb14SDimitry Andric     });
84835f7ddb14SDimitry Andric 
84845f7ddb14SDimitry Andric     auto &&AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
84855f7ddb14SDimitry Andric       if (V1 == V2)
84865f7ddb14SDimitry Andric         return true;
84875f7ddb14SDimitry Andric       if (V1->getType() != V2->getType())
84885f7ddb14SDimitry Andric         return false;
84895f7ddb14SDimitry Andric       ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
84905f7ddb14SDimitry Andric       ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
84915f7ddb14SDimitry Andric       if (Opcodes1.size() != Opcodes2.size())
84925f7ddb14SDimitry Andric         return false;
84935f7ddb14SDimitry Andric       for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
84945f7ddb14SDimitry Andric         // Undefs are compatible with any other value.
84955f7ddb14SDimitry Andric         if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
84965f7ddb14SDimitry Andric           continue;
84975f7ddb14SDimitry Andric         if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
84985f7ddb14SDimitry Andric           if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
84995f7ddb14SDimitry Andric             if (I1->getParent() != I2->getParent())
85005f7ddb14SDimitry Andric               return false;
85015f7ddb14SDimitry Andric             InstructionsState S = getSameOpcode({I1, I2});
85025f7ddb14SDimitry Andric             if (S.getOpcode())
85035f7ddb14SDimitry Andric               continue;
85045f7ddb14SDimitry Andric             return false;
85055f7ddb14SDimitry Andric           }
85065f7ddb14SDimitry Andric         if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
85075f7ddb14SDimitry Andric           continue;
85085f7ddb14SDimitry Andric         if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
85095f7ddb14SDimitry Andric           return false;
85105f7ddb14SDimitry Andric       }
85115f7ddb14SDimitry Andric       return true;
85125f7ddb14SDimitry Andric     };
85130b57cec5SDimitry Andric 
85140b57cec5SDimitry Andric     // Try to vectorize elements base on their type.
85155f7ddb14SDimitry Andric     SmallVector<Value *, 4> Candidates;
85160b57cec5SDimitry Andric     for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
85170b57cec5SDimitry Andric                                            E = Incoming.end();
85180b57cec5SDimitry Andric          IncIt != E;) {
85190b57cec5SDimitry Andric 
85205f7ddb14SDimitry Andric       // Look for the next elements with the same type, parent and operand
85215f7ddb14SDimitry Andric       // kinds.
85220b57cec5SDimitry Andric       SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
85235f7ddb14SDimitry Andric       while (SameTypeIt != E && AreCompatiblePHIs(*SameTypeIt, *IncIt)) {
85240b57cec5SDimitry Andric         VisitedInstrs.insert(*SameTypeIt);
85250b57cec5SDimitry Andric         ++SameTypeIt;
85260b57cec5SDimitry Andric       }
85270b57cec5SDimitry Andric 
85280b57cec5SDimitry Andric       // Try to vectorize them.
85290b57cec5SDimitry Andric       unsigned NumElts = (SameTypeIt - IncIt);
85300b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs ("
85310b57cec5SDimitry Andric                         << NumElts << ")\n");
85320b57cec5SDimitry Andric       // The order in which the phi nodes appear in the program does not matter.
85330b57cec5SDimitry Andric       // So allow tryToVectorizeList to reorder them if it is beneficial. This
85340b57cec5SDimitry Andric       // is done when there are exactly two elements since tryToVectorizeList
85350b57cec5SDimitry Andric       // asserts that there are only two values when AllowReorder is true.
85365f7ddb14SDimitry Andric       if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
85375f7ddb14SDimitry Andric                                             /*AllowReorder=*/true)) {
85380b57cec5SDimitry Andric         // Success start over because instructions might have been changed.
85390b57cec5SDimitry Andric         HaveVectorizedPhiNodes = true;
85400b57cec5SDimitry Andric         Changed = true;
85415f7ddb14SDimitry Andric       } else if (NumElts < 4 &&
85425f7ddb14SDimitry Andric                  (Candidates.empty() ||
85435f7ddb14SDimitry Andric                   Candidates.front()->getType() == (*IncIt)->getType())) {
85445f7ddb14SDimitry Andric         Candidates.append(IncIt, std::next(IncIt, NumElts));
85455f7ddb14SDimitry Andric       }
85465f7ddb14SDimitry Andric       // Final attempt to vectorize phis with the same types.
85475f7ddb14SDimitry Andric       if (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType()) {
85485f7ddb14SDimitry Andric         if (Candidates.size() > 1 &&
85495f7ddb14SDimitry Andric             tryToVectorizeList(Candidates, R, /*AllowReorder=*/true)) {
85505f7ddb14SDimitry Andric           // Success start over because instructions might have been changed.
85515f7ddb14SDimitry Andric           HaveVectorizedPhiNodes = true;
85525f7ddb14SDimitry Andric           Changed = true;
85535f7ddb14SDimitry Andric         }
85545f7ddb14SDimitry Andric         Candidates.clear();
85550b57cec5SDimitry Andric       }
85560b57cec5SDimitry Andric 
85570b57cec5SDimitry Andric       // Start over at the next instruction of a different type (or the end).
85580b57cec5SDimitry Andric       IncIt = SameTypeIt;
85590b57cec5SDimitry Andric     }
85600b57cec5SDimitry Andric   }
85610b57cec5SDimitry Andric 
85620b57cec5SDimitry Andric   VisitedInstrs.clear();
85630b57cec5SDimitry Andric 
85648bcb0991SDimitry Andric   SmallVector<Instruction *, 8> PostProcessInstructions;
85650b57cec5SDimitry Andric   SmallDenseSet<Instruction *, 4> KeyNodes;
85660b57cec5SDimitry Andric   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
8567af732203SDimitry Andric     // Skip instructions with scalable type. The num of elements is unknown at
8568af732203SDimitry Andric     // compile-time for scalable type.
8569af732203SDimitry Andric     if (isa<ScalableVectorType>(it->getType()))
8570af732203SDimitry Andric       continue;
8571af732203SDimitry Andric 
85728bcb0991SDimitry Andric     // Skip instructions marked for the deletion.
85738bcb0991SDimitry Andric     if (R.isDeleted(&*it))
85748bcb0991SDimitry Andric       continue;
85750b57cec5SDimitry Andric     // We may go through BB multiple times so skip the one we have checked.
85760b57cec5SDimitry Andric     if (!VisitedInstrs.insert(&*it).second) {
8577af732203SDimitry Andric       if (it->use_empty() && KeyNodes.contains(&*it) &&
85785f7ddb14SDimitry Andric           vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
85795f7ddb14SDimitry Andric                                       it->isTerminator())) {
85800b57cec5SDimitry Andric         // We would like to start over since some instructions are deleted
85810b57cec5SDimitry Andric         // and the iterator may become invalid value.
85820b57cec5SDimitry Andric         Changed = true;
85830b57cec5SDimitry Andric         it = BB->begin();
85840b57cec5SDimitry Andric         e = BB->end();
85850b57cec5SDimitry Andric       }
85860b57cec5SDimitry Andric       continue;
85870b57cec5SDimitry Andric     }
85880b57cec5SDimitry Andric 
85890b57cec5SDimitry Andric     if (isa<DbgInfoIntrinsic>(it))
85900b57cec5SDimitry Andric       continue;
85910b57cec5SDimitry Andric 
85920b57cec5SDimitry Andric     // Try to vectorize reductions that use PHINodes.
85930b57cec5SDimitry Andric     if (PHINode *P = dyn_cast<PHINode>(it)) {
85940b57cec5SDimitry Andric       // Check that the PHI is a reduction PHI.
8595af732203SDimitry Andric       if (P->getNumIncomingValues() == 2) {
85960b57cec5SDimitry Andric         // Try to match and vectorize a horizontal reduction.
85970b57cec5SDimitry Andric         if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
85980b57cec5SDimitry Andric                                      TTI)) {
85990b57cec5SDimitry Andric           Changed = true;
86000b57cec5SDimitry Andric           it = BB->begin();
86010b57cec5SDimitry Andric           e = BB->end();
86020b57cec5SDimitry Andric           continue;
86030b57cec5SDimitry Andric         }
8604af732203SDimitry Andric       }
8605af732203SDimitry Andric       // Try to vectorize the incoming values of the PHI, to catch reductions
8606af732203SDimitry Andric       // that feed into PHIs.
8607af732203SDimitry Andric       for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) {
8608af732203SDimitry Andric         // Skip if the incoming block is the current BB for now. Also, bypass
8609af732203SDimitry Andric         // unreachable IR for efficiency and to avoid crashing.
8610af732203SDimitry Andric         // TODO: Collect the skipped incoming values and try to vectorize them
8611af732203SDimitry Andric         // after processing BB.
8612af732203SDimitry Andric         if (BB == P->getIncomingBlock(I) ||
8613af732203SDimitry Andric             !DT->isReachableFromEntry(P->getIncomingBlock(I)))
8614af732203SDimitry Andric           continue;
8615af732203SDimitry Andric 
8616af732203SDimitry Andric         Changed |= vectorizeRootInstruction(nullptr, P->getIncomingValue(I),
8617af732203SDimitry Andric                                             P->getIncomingBlock(I), R, TTI);
8618af732203SDimitry Andric       }
86190b57cec5SDimitry Andric       continue;
86200b57cec5SDimitry Andric     }
86210b57cec5SDimitry Andric 
86220b57cec5SDimitry Andric     // Ran into an instruction without users, like terminator, or function call
86230b57cec5SDimitry Andric     // with ignored return value, store. Ignore unused instructions (basing on
86240b57cec5SDimitry Andric     // instruction type, except for CallInst and InvokeInst).
86250b57cec5SDimitry Andric     if (it->use_empty() && (it->getType()->isVoidTy() || isa<CallInst>(it) ||
86260b57cec5SDimitry Andric                             isa<InvokeInst>(it))) {
86270b57cec5SDimitry Andric       KeyNodes.insert(&*it);
86280b57cec5SDimitry Andric       bool OpsChanged = false;
86290b57cec5SDimitry Andric       if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(it)) {
86300b57cec5SDimitry Andric         for (auto *V : it->operand_values()) {
86310b57cec5SDimitry Andric           // Try to match and vectorize a horizontal reduction.
86320b57cec5SDimitry Andric           OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
86330b57cec5SDimitry Andric         }
86340b57cec5SDimitry Andric       }
86350b57cec5SDimitry Andric       // Start vectorization of post-process list of instructions from the
86360b57cec5SDimitry Andric       // top-tree instructions to try to vectorize as many instructions as
86370b57cec5SDimitry Andric       // possible.
86385f7ddb14SDimitry Andric       OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
86395f7ddb14SDimitry Andric                                                 it->isTerminator());
86400b57cec5SDimitry Andric       if (OpsChanged) {
86410b57cec5SDimitry Andric         // We would like to start over since some instructions are deleted
86420b57cec5SDimitry Andric         // and the iterator may become invalid value.
86430b57cec5SDimitry Andric         Changed = true;
86440b57cec5SDimitry Andric         it = BB->begin();
86450b57cec5SDimitry Andric         e = BB->end();
86460b57cec5SDimitry Andric         continue;
86470b57cec5SDimitry Andric       }
86480b57cec5SDimitry Andric     }
86490b57cec5SDimitry Andric 
86500b57cec5SDimitry Andric     if (isa<InsertElementInst>(it) || isa<CmpInst>(it) ||
86510b57cec5SDimitry Andric         isa<InsertValueInst>(it))
86520b57cec5SDimitry Andric       PostProcessInstructions.push_back(&*it);
86530b57cec5SDimitry Andric   }
86540b57cec5SDimitry Andric 
86550b57cec5SDimitry Andric   return Changed;
86560b57cec5SDimitry Andric }
86570b57cec5SDimitry Andric 
vectorizeGEPIndices(BasicBlock * BB,BoUpSLP & R)86580b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
86590b57cec5SDimitry Andric   auto Changed = false;
86600b57cec5SDimitry Andric   for (auto &Entry : GEPs) {
86610b57cec5SDimitry Andric     // If the getelementptr list has fewer than two elements, there's nothing
86620b57cec5SDimitry Andric     // to do.
86630b57cec5SDimitry Andric     if (Entry.second.size() < 2)
86640b57cec5SDimitry Andric       continue;
86650b57cec5SDimitry Andric 
86660b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length "
86670b57cec5SDimitry Andric                       << Entry.second.size() << ".\n");
86680b57cec5SDimitry Andric 
86698bcb0991SDimitry Andric     // Process the GEP list in chunks suitable for the target's supported
86705ffd83dbSDimitry Andric     // vector size. If a vector register can't hold 1 element, we are done. We
86715ffd83dbSDimitry Andric     // are trying to vectorize the index computations, so the maximum number of
86725ffd83dbSDimitry Andric     // elements is based on the size of the index expression, rather than the
86735ffd83dbSDimitry Andric     // size of the GEP itself (the target's pointer size).
86748bcb0991SDimitry Andric     unsigned MaxVecRegSize = R.getMaxVecRegSize();
86755ffd83dbSDimitry Andric     unsigned EltSize = R.getVectorElementSize(*Entry.second[0]->idx_begin());
86768bcb0991SDimitry Andric     if (MaxVecRegSize < EltSize)
86778bcb0991SDimitry Andric       continue;
86788bcb0991SDimitry Andric 
86798bcb0991SDimitry Andric     unsigned MaxElts = MaxVecRegSize / EltSize;
86808bcb0991SDimitry Andric     for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) {
86818bcb0991SDimitry Andric       auto Len = std::min<unsigned>(BE - BI, MaxElts);
8682af732203SDimitry Andric       ArrayRef<GetElementPtrInst *> GEPList(&Entry.second[BI], Len);
86830b57cec5SDimitry Andric 
86840b57cec5SDimitry Andric       // Initialize a set a candidate getelementptrs. Note that we use a
86850b57cec5SDimitry Andric       // SetVector here to preserve program order. If the index computations
86860b57cec5SDimitry Andric       // are vectorizable and begin with loads, we want to minimize the chance
86870b57cec5SDimitry Andric       // of having to reorder them later.
86880b57cec5SDimitry Andric       SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
86890b57cec5SDimitry Andric 
86900b57cec5SDimitry Andric       // Some of the candidates may have already been vectorized after we
86918bcb0991SDimitry Andric       // initially collected them. If so, they are marked as deleted, so remove
86928bcb0991SDimitry Andric       // them from the set of candidates.
86938bcb0991SDimitry Andric       Candidates.remove_if(
86948bcb0991SDimitry Andric           [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
86950b57cec5SDimitry Andric 
86960b57cec5SDimitry Andric       // Remove from the set of candidates all pairs of getelementptrs with
86970b57cec5SDimitry Andric       // constant differences. Such getelementptrs are likely not good
86980b57cec5SDimitry Andric       // candidates for vectorization in a bottom-up phase since one can be
86990b57cec5SDimitry Andric       // computed from the other. We also ensure all candidate getelementptr
87000b57cec5SDimitry Andric       // indices are unique.
87010b57cec5SDimitry Andric       for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
87028bcb0991SDimitry Andric         auto *GEPI = GEPList[I];
87030b57cec5SDimitry Andric         if (!Candidates.count(GEPI))
87040b57cec5SDimitry Andric           continue;
87050b57cec5SDimitry Andric         auto *SCEVI = SE->getSCEV(GEPList[I]);
87060b57cec5SDimitry Andric         for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
87078bcb0991SDimitry Andric           auto *GEPJ = GEPList[J];
87080b57cec5SDimitry Andric           auto *SCEVJ = SE->getSCEV(GEPList[J]);
87090b57cec5SDimitry Andric           if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
87108bcb0991SDimitry Andric             Candidates.remove(GEPI);
87118bcb0991SDimitry Andric             Candidates.remove(GEPJ);
87120b57cec5SDimitry Andric           } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
87138bcb0991SDimitry Andric             Candidates.remove(GEPJ);
87140b57cec5SDimitry Andric           }
87150b57cec5SDimitry Andric         }
87160b57cec5SDimitry Andric       }
87170b57cec5SDimitry Andric 
87180b57cec5SDimitry Andric       // We break out of the above computation as soon as we know there are
87190b57cec5SDimitry Andric       // fewer than two candidates remaining.
87200b57cec5SDimitry Andric       if (Candidates.size() < 2)
87210b57cec5SDimitry Andric         continue;
87220b57cec5SDimitry Andric 
87230b57cec5SDimitry Andric       // Add the single, non-constant index of each candidate to the bundle. We
87240b57cec5SDimitry Andric       // ensured the indices met these constraints when we originally collected
87250b57cec5SDimitry Andric       // the getelementptrs.
87260b57cec5SDimitry Andric       SmallVector<Value *, 16> Bundle(Candidates.size());
87270b57cec5SDimitry Andric       auto BundleIndex = 0u;
87280b57cec5SDimitry Andric       for (auto *V : Candidates) {
87290b57cec5SDimitry Andric         auto *GEP = cast<GetElementPtrInst>(V);
87300b57cec5SDimitry Andric         auto *GEPIdx = GEP->idx_begin()->get();
87310b57cec5SDimitry Andric         assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
87320b57cec5SDimitry Andric         Bundle[BundleIndex++] = GEPIdx;
87330b57cec5SDimitry Andric       }
87340b57cec5SDimitry Andric 
87350b57cec5SDimitry Andric       // Try and vectorize the indices. We are currently only interested in
87360b57cec5SDimitry Andric       // gather-like cases of the form:
87370b57cec5SDimitry Andric       //
87380b57cec5SDimitry Andric       // ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
87390b57cec5SDimitry Andric       //
87400b57cec5SDimitry Andric       // where the loads of "a", the loads of "b", and the subtractions can be
87410b57cec5SDimitry Andric       // performed in parallel. It's likely that detecting this pattern in a
87420b57cec5SDimitry Andric       // bottom-up phase will be simpler and less costly than building a
87430b57cec5SDimitry Andric       // full-blown top-down phase beginning at the consecutive loads.
87440b57cec5SDimitry Andric       Changed |= tryToVectorizeList(Bundle, R);
87450b57cec5SDimitry Andric     }
87460b57cec5SDimitry Andric   }
87470b57cec5SDimitry Andric   return Changed;
87480b57cec5SDimitry Andric }
87490b57cec5SDimitry Andric 
vectorizeStoreChains(BoUpSLP & R)87500b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
87510b57cec5SDimitry Andric   bool Changed = false;
87525f7ddb14SDimitry Andric   // Sort by type, base pointers and values operand. Value operands must be
87535f7ddb14SDimitry Andric   // compatible (have the same opcode, same parent), otherwise it is
87545f7ddb14SDimitry Andric   // definitely not profitable to try to vectorize them.
87555f7ddb14SDimitry Andric   auto &&StoreSorter = [this](StoreInst *V, StoreInst *V2) {
87565f7ddb14SDimitry Andric     if (V->getPointerOperandType()->getTypeID() <
87575f7ddb14SDimitry Andric         V2->getPointerOperandType()->getTypeID())
87585f7ddb14SDimitry Andric       return true;
87595f7ddb14SDimitry Andric     if (V->getPointerOperandType()->getTypeID() >
87605f7ddb14SDimitry Andric         V2->getPointerOperandType()->getTypeID())
87615f7ddb14SDimitry Andric       return false;
87625f7ddb14SDimitry Andric     // UndefValues are compatible with all other values.
87635f7ddb14SDimitry Andric     if (isa<UndefValue>(V->getValueOperand()) ||
87645f7ddb14SDimitry Andric         isa<UndefValue>(V2->getValueOperand()))
87655f7ddb14SDimitry Andric       return false;
87665f7ddb14SDimitry Andric     if (auto *I1 = dyn_cast<Instruction>(V->getValueOperand()))
87675f7ddb14SDimitry Andric       if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
87685f7ddb14SDimitry Andric         DomTreeNodeBase<llvm::BasicBlock> *NodeI1 =
87695f7ddb14SDimitry Andric             DT->getNode(I1->getParent());
87705f7ddb14SDimitry Andric         DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
87715f7ddb14SDimitry Andric             DT->getNode(I2->getParent());
87725f7ddb14SDimitry Andric         assert(NodeI1 && "Should only process reachable instructions");
87735f7ddb14SDimitry Andric         assert(NodeI1 && "Should only process reachable instructions");
87745f7ddb14SDimitry Andric         assert((NodeI1 == NodeI2) ==
87755f7ddb14SDimitry Andric                    (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
87765f7ddb14SDimitry Andric                "Different nodes should have different DFS numbers");
87775f7ddb14SDimitry Andric         if (NodeI1 != NodeI2)
87785f7ddb14SDimitry Andric           return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
87795f7ddb14SDimitry Andric         InstructionsState S = getSameOpcode({I1, I2});
87805f7ddb14SDimitry Andric         if (S.getOpcode())
87815f7ddb14SDimitry Andric           return false;
87825f7ddb14SDimitry Andric         return I1->getOpcode() < I2->getOpcode();
87835f7ddb14SDimitry Andric       }
87845f7ddb14SDimitry Andric     if (isa<Constant>(V->getValueOperand()) &&
87855f7ddb14SDimitry Andric         isa<Constant>(V2->getValueOperand()))
87865f7ddb14SDimitry Andric       return false;
87875f7ddb14SDimitry Andric     return V->getValueOperand()->getValueID() <
87885f7ddb14SDimitry Andric            V2->getValueOperand()->getValueID();
87895f7ddb14SDimitry Andric   };
87905f7ddb14SDimitry Andric 
87915f7ddb14SDimitry Andric   auto &&AreCompatibleStores = [](StoreInst *V1, StoreInst *V2) {
87925f7ddb14SDimitry Andric     if (V1 == V2)
87935f7ddb14SDimitry Andric       return true;
87945f7ddb14SDimitry Andric     if (V1->getPointerOperandType() != V2->getPointerOperandType())
87955f7ddb14SDimitry Andric       return false;
87965f7ddb14SDimitry Andric     // Undefs are compatible with any other value.
87975f7ddb14SDimitry Andric     if (isa<UndefValue>(V1->getValueOperand()) ||
87985f7ddb14SDimitry Andric         isa<UndefValue>(V2->getValueOperand()))
87995f7ddb14SDimitry Andric       return true;
88005f7ddb14SDimitry Andric     if (auto *I1 = dyn_cast<Instruction>(V1->getValueOperand()))
88015f7ddb14SDimitry Andric       if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
88025f7ddb14SDimitry Andric         if (I1->getParent() != I2->getParent())
88035f7ddb14SDimitry Andric           return false;
88045f7ddb14SDimitry Andric         InstructionsState S = getSameOpcode({I1, I2});
88055f7ddb14SDimitry Andric         return S.getOpcode() > 0;
88065f7ddb14SDimitry Andric       }
88075f7ddb14SDimitry Andric     if (isa<Constant>(V1->getValueOperand()) &&
88085f7ddb14SDimitry Andric         isa<Constant>(V2->getValueOperand()))
88095f7ddb14SDimitry Andric       return true;
88105f7ddb14SDimitry Andric     return V1->getValueOperand()->getValueID() ==
88115f7ddb14SDimitry Andric            V2->getValueOperand()->getValueID();
88125f7ddb14SDimitry Andric   };
88135f7ddb14SDimitry Andric 
88140b57cec5SDimitry Andric   // Attempt to sort and vectorize each of the store-groups.
88155f7ddb14SDimitry Andric   for (auto &Pair : Stores) {
88165f7ddb14SDimitry Andric     if (Pair.second.size() < 2)
88170b57cec5SDimitry Andric       continue;
88180b57cec5SDimitry Andric 
88190b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
88205f7ddb14SDimitry Andric                       << Pair.second.size() << ".\n");
88210b57cec5SDimitry Andric 
88225f7ddb14SDimitry Andric     stable_sort(Pair.second, StoreSorter);
88235f7ddb14SDimitry Andric 
88245f7ddb14SDimitry Andric     // Try to vectorize elements based on their compatibility.
88255f7ddb14SDimitry Andric     for (ArrayRef<StoreInst *>::iterator IncIt = Pair.second.begin(),
88265f7ddb14SDimitry Andric                                          E = Pair.second.end();
88275f7ddb14SDimitry Andric          IncIt != E;) {
88285f7ddb14SDimitry Andric 
88295f7ddb14SDimitry Andric       // Look for the next elements with the same type.
88305f7ddb14SDimitry Andric       ArrayRef<StoreInst *>::iterator SameTypeIt = IncIt;
88315f7ddb14SDimitry Andric       Type *EltTy = (*IncIt)->getPointerOperand()->getType();
88325f7ddb14SDimitry Andric 
88335f7ddb14SDimitry Andric       while (SameTypeIt != E && AreCompatibleStores(*SameTypeIt, *IncIt))
88345f7ddb14SDimitry Andric         ++SameTypeIt;
88355f7ddb14SDimitry Andric 
88365f7ddb14SDimitry Andric       // Try to vectorize them.
88375f7ddb14SDimitry Andric       unsigned NumElts = (SameTypeIt - IncIt);
88385f7ddb14SDimitry Andric       LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at stores ("
88395f7ddb14SDimitry Andric                         << NumElts << ")\n");
88405f7ddb14SDimitry Andric       if (NumElts > 1 && !EltTy->getPointerElementType()->isVectorTy() &&
88415f7ddb14SDimitry Andric           vectorizeStores(makeArrayRef(IncIt, NumElts), R)) {
88425f7ddb14SDimitry Andric         // Success start over because instructions might have been changed.
88435f7ddb14SDimitry Andric         Changed = true;
88445f7ddb14SDimitry Andric       }
88455f7ddb14SDimitry Andric 
88465f7ddb14SDimitry Andric       // Start over at the next instruction of a different type (or the end).
88475f7ddb14SDimitry Andric       IncIt = SameTypeIt;
88485f7ddb14SDimitry Andric     }
88490b57cec5SDimitry Andric   }
88500b57cec5SDimitry Andric   return Changed;
88510b57cec5SDimitry Andric }
88520b57cec5SDimitry Andric 
88530b57cec5SDimitry Andric char SLPVectorizer::ID = 0;
88540b57cec5SDimitry Andric 
88550b57cec5SDimitry Andric static const char lv_name[] = "SLP Vectorizer";
88560b57cec5SDimitry Andric 
INITIALIZE_PASS_BEGIN(SLPVectorizer,SV_NAME,lv_name,false,false)88570b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
88580b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
88590b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
88600b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
88610b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
88620b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
88630b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
88640b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
88655ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(InjectTLIMappingsLegacy)
88660b57cec5SDimitry Andric INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
88670b57cec5SDimitry Andric 
88680b57cec5SDimitry Andric Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }
8869