10b57cec5SDimitry Andric //===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements the Bottom Up SLP vectorizer. It detects consecutive
100b57cec5SDimitry Andric // stores that can be put together into vector-stores. Next, it attempts to
110b57cec5SDimitry Andric // construct vectorizable tree using the use-def chains. If a profitable tree
120b57cec5SDimitry Andric // was found, the SLP vectorizer performs vectorization on the tree.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // The pass is inspired by the work described in the paper:
150b57cec5SDimitry Andric // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
200b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
210b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
220b57cec5SDimitry Andric #include "llvm/ADT/Optional.h"
230b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
255f7ddb14SDimitry Andric #include "llvm/ADT/SetOperations.h"
260b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
27480093f4SDimitry Andric #include "llvm/ADT/SmallBitVector.h"
280b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
290b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
30af732203SDimitry Andric #include "llvm/ADT/SmallString.h"
310b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
320b57cec5SDimitry Andric #include "llvm/ADT/iterator.h"
330b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
340b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
35af732203SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
360b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
370b57cec5SDimitry Andric #include "llvm/Analysis/DemandedBits.h"
380b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
39af732203SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
400b57cec5SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h"
410b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
420b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
430b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
440b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
450b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
460b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
470b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
480b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
490b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
500b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
510b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
520b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
530b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
540b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
550b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
560b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
570b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
580b57cec5SDimitry Andric #include "llvm/IR/Function.h"
590b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
600b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
610b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
620b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
630b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
640b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
650b57cec5SDimitry Andric #include "llvm/IR/Module.h"
660b57cec5SDimitry Andric #include "llvm/IR/NoFolder.h"
670b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
680b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
690b57cec5SDimitry Andric #include "llvm/IR/Type.h"
700b57cec5SDimitry Andric #include "llvm/IR/Use.h"
710b57cec5SDimitry Andric #include "llvm/IR/User.h"
720b57cec5SDimitry Andric #include "llvm/IR/Value.h"
730b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h"
740b57cec5SDimitry Andric #include "llvm/IR/Verifier.h"
75480093f4SDimitry Andric #include "llvm/InitializePasses.h"
760b57cec5SDimitry Andric #include "llvm/Pass.h"
770b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
780b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
790b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
800b57cec5SDimitry Andric #include "llvm/Support/DOTGraphTraits.h"
810b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
820b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
830b57cec5SDimitry Andric #include "llvm/Support/GraphWriter.h"
84af732203SDimitry Andric #include "llvm/Support/InstructionCost.h"
850b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
860b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
870b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
885ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/InjectTLIMappings.h"
890b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
900b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize.h"
910b57cec5SDimitry Andric #include <algorithm>
920b57cec5SDimitry Andric #include <cassert>
930b57cec5SDimitry Andric #include <cstdint>
940b57cec5SDimitry Andric #include <iterator>
950b57cec5SDimitry Andric #include <memory>
960b57cec5SDimitry Andric #include <set>
970b57cec5SDimitry Andric #include <string>
980b57cec5SDimitry Andric #include <tuple>
990b57cec5SDimitry Andric #include <utility>
1000b57cec5SDimitry Andric #include <vector>
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric using namespace llvm;
1030b57cec5SDimitry Andric using namespace llvm::PatternMatch;
1040b57cec5SDimitry Andric using namespace slpvectorizer;
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric #define SV_NAME "slp-vectorizer"
1070b57cec5SDimitry Andric #define DEBUG_TYPE "SLP"
1080b57cec5SDimitry Andric
1090b57cec5SDimitry Andric STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
1100b57cec5SDimitry Andric
1115ffd83dbSDimitry Andric cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
1120b57cec5SDimitry Andric cl::desc("Run the SLP vectorization passes"));
1130b57cec5SDimitry Andric
1140b57cec5SDimitry Andric static cl::opt<int>
1150b57cec5SDimitry Andric SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
1160b57cec5SDimitry Andric cl::desc("Only vectorize if you gain more than this "
1170b57cec5SDimitry Andric "number "));
1180b57cec5SDimitry Andric
1190b57cec5SDimitry Andric static cl::opt<bool>
1200b57cec5SDimitry Andric ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
1210b57cec5SDimitry Andric cl::desc("Attempt to vectorize horizontal reductions"));
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric static cl::opt<bool> ShouldStartVectorizeHorAtStore(
1240b57cec5SDimitry Andric "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
1250b57cec5SDimitry Andric cl::desc(
1260b57cec5SDimitry Andric "Attempt to vectorize horizontal reductions feeding into a store"));
1270b57cec5SDimitry Andric
1280b57cec5SDimitry Andric static cl::opt<int>
1290b57cec5SDimitry Andric MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
1300b57cec5SDimitry Andric cl::desc("Attempt to vectorize for this register size in bits"));
1310b57cec5SDimitry Andric
132af732203SDimitry Andric static cl::opt<unsigned>
133af732203SDimitry Andric MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
134af732203SDimitry Andric cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
135af732203SDimitry Andric
136480093f4SDimitry Andric static cl::opt<int>
137480093f4SDimitry Andric MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
138480093f4SDimitry Andric cl::desc("Maximum depth of the lookup for consecutive stores."));
139480093f4SDimitry Andric
1400b57cec5SDimitry Andric /// Limits the size of scheduling regions in a block.
1410b57cec5SDimitry Andric /// It avoid long compile times for _very_ large blocks where vector
1420b57cec5SDimitry Andric /// instructions are spread over a wide range.
1430b57cec5SDimitry Andric /// This limit is way higher than needed by real-world functions.
1440b57cec5SDimitry Andric static cl::opt<int>
1450b57cec5SDimitry Andric ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
1460b57cec5SDimitry Andric cl::desc("Limit the size of the SLP scheduling region per block"));
1470b57cec5SDimitry Andric
1480b57cec5SDimitry Andric static cl::opt<int> MinVectorRegSizeOption(
1490b57cec5SDimitry Andric "slp-min-reg-size", cl::init(128), cl::Hidden,
1500b57cec5SDimitry Andric cl::desc("Attempt to vectorize for this register size in bits"));
1510b57cec5SDimitry Andric
1520b57cec5SDimitry Andric static cl::opt<unsigned> RecursionMaxDepth(
1530b57cec5SDimitry Andric "slp-recursion-max-depth", cl::init(12), cl::Hidden,
1540b57cec5SDimitry Andric cl::desc("Limit the recursion depth when building a vectorizable tree"));
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric static cl::opt<unsigned> MinTreeSize(
1570b57cec5SDimitry Andric "slp-min-tree-size", cl::init(3), cl::Hidden,
1580b57cec5SDimitry Andric cl::desc("Only vectorize small trees if they are fully vectorizable"));
1590b57cec5SDimitry Andric
160480093f4SDimitry Andric // The maximum depth that the look-ahead score heuristic will explore.
161480093f4SDimitry Andric // The higher this value, the higher the compilation time overhead.
162480093f4SDimitry Andric static cl::opt<int> LookAheadMaxDepth(
163480093f4SDimitry Andric "slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
164480093f4SDimitry Andric cl::desc("The maximum look-ahead depth for operand reordering scores"));
165480093f4SDimitry Andric
166480093f4SDimitry Andric // The Look-ahead heuristic goes through the users of the bundle to calculate
167480093f4SDimitry Andric // the users cost in getExternalUsesCost(). To avoid compilation time increase
168480093f4SDimitry Andric // we limit the number of users visited to this value.
169480093f4SDimitry Andric static cl::opt<unsigned> LookAheadUsersBudget(
170480093f4SDimitry Andric "slp-look-ahead-users-budget", cl::init(2), cl::Hidden,
171480093f4SDimitry Andric cl::desc("The maximum number of users to visit while visiting the "
172480093f4SDimitry Andric "predecessors. This prevents compilation time increase."));
173480093f4SDimitry Andric
1740b57cec5SDimitry Andric static cl::opt<bool>
1750b57cec5SDimitry Andric ViewSLPTree("view-slp-tree", cl::Hidden,
1760b57cec5SDimitry Andric cl::desc("Display the SLP trees with Graphviz"));
1770b57cec5SDimitry Andric
1780b57cec5SDimitry Andric // Limit the number of alias checks. The limit is chosen so that
1790b57cec5SDimitry Andric // it has no negative effect on the llvm benchmarks.
1800b57cec5SDimitry Andric static const unsigned AliasedCheckLimit = 10;
1810b57cec5SDimitry Andric
1820b57cec5SDimitry Andric // Another limit for the alias checks: The maximum distance between load/store
1830b57cec5SDimitry Andric // instructions where alias checks are done.
1840b57cec5SDimitry Andric // This limit is useful for very large basic blocks.
1850b57cec5SDimitry Andric static const unsigned MaxMemDepDistance = 160;
1860b57cec5SDimitry Andric
1870b57cec5SDimitry Andric /// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
1880b57cec5SDimitry Andric /// regions to be handled.
1890b57cec5SDimitry Andric static const int MinScheduleRegionSize = 16;
1900b57cec5SDimitry Andric
1910b57cec5SDimitry Andric /// Predicate for the element types that the SLP vectorizer supports.
1920b57cec5SDimitry Andric ///
1930b57cec5SDimitry Andric /// The most important thing to filter here are types which are invalid in LLVM
1940b57cec5SDimitry Andric /// vectors. We also filter target specific types which have absolutely no
1950b57cec5SDimitry Andric /// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
1960b57cec5SDimitry Andric /// avoids spending time checking the cost model and realizing that they will
1970b57cec5SDimitry Andric /// be inevitably scalarized.
isValidElementType(Type * Ty)1980b57cec5SDimitry Andric static bool isValidElementType(Type *Ty) {
1990b57cec5SDimitry Andric return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
2000b57cec5SDimitry Andric !Ty->isPPC_FP128Ty();
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric
2030b57cec5SDimitry Andric /// \returns true if all of the instructions in \p VL are in the same block or
2040b57cec5SDimitry Andric /// false otherwise.
allSameBlock(ArrayRef<Value * > VL)2050b57cec5SDimitry Andric static bool allSameBlock(ArrayRef<Value *> VL) {
2060b57cec5SDimitry Andric Instruction *I0 = dyn_cast<Instruction>(VL[0]);
2070b57cec5SDimitry Andric if (!I0)
2080b57cec5SDimitry Andric return false;
2090b57cec5SDimitry Andric BasicBlock *BB = I0->getParent();
210af732203SDimitry Andric for (int I = 1, E = VL.size(); I < E; I++) {
211af732203SDimitry Andric auto *II = dyn_cast<Instruction>(VL[I]);
212af732203SDimitry Andric if (!II)
2130b57cec5SDimitry Andric return false;
2140b57cec5SDimitry Andric
215af732203SDimitry Andric if (BB != II->getParent())
2160b57cec5SDimitry Andric return false;
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric return true;
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
2215f7ddb14SDimitry Andric /// \returns True if the value is a constant (but not globals/constant
2225f7ddb14SDimitry Andric /// expressions).
isConstant(Value * V)2235f7ddb14SDimitry Andric static bool isConstant(Value *V) {
2245f7ddb14SDimitry Andric return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
2255f7ddb14SDimitry Andric }
2265f7ddb14SDimitry Andric
2278bcb0991SDimitry Andric /// \returns True if all of the values in \p VL are constants (but not
2288bcb0991SDimitry Andric /// globals/constant expressions).
allConstant(ArrayRef<Value * > VL)2290b57cec5SDimitry Andric static bool allConstant(ArrayRef<Value *> VL) {
2308bcb0991SDimitry Andric // Constant expressions and globals can't be vectorized like normal integer/FP
2318bcb0991SDimitry Andric // constants.
2325f7ddb14SDimitry Andric return all_of(VL, isConstant);
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric
2350b57cec5SDimitry Andric /// \returns True if all of the values in \p VL are identical.
isSplat(ArrayRef<Value * > VL)2360b57cec5SDimitry Andric static bool isSplat(ArrayRef<Value *> VL) {
2370b57cec5SDimitry Andric for (unsigned i = 1, e = VL.size(); i < e; ++i)
2380b57cec5SDimitry Andric if (VL[i] != VL[0])
2390b57cec5SDimitry Andric return false;
2400b57cec5SDimitry Andric return true;
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric
243af732203SDimitry Andric /// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
isCommutative(Instruction * I)2440b57cec5SDimitry Andric static bool isCommutative(Instruction *I) {
245af732203SDimitry Andric if (auto *Cmp = dyn_cast<CmpInst>(I))
246af732203SDimitry Andric return Cmp->isCommutative();
247af732203SDimitry Andric if (auto *BO = dyn_cast<BinaryOperator>(I))
248af732203SDimitry Andric return BO->isCommutative();
249af732203SDimitry Andric // TODO: This should check for generic Instruction::isCommutative(), but
250af732203SDimitry Andric // we need to confirm that the caller code correctly handles Intrinsics
251af732203SDimitry Andric // for example (does not have 2 operands).
252af732203SDimitry Andric return false;
2530b57cec5SDimitry Andric }
2540b57cec5SDimitry Andric
2550b57cec5SDimitry Andric /// Checks if the vector of instructions can be represented as a shuffle, like:
2560b57cec5SDimitry Andric /// %x0 = extractelement <4 x i8> %x, i32 0
2570b57cec5SDimitry Andric /// %x3 = extractelement <4 x i8> %x, i32 3
2580b57cec5SDimitry Andric /// %y1 = extractelement <4 x i8> %y, i32 1
2590b57cec5SDimitry Andric /// %y2 = extractelement <4 x i8> %y, i32 2
2600b57cec5SDimitry Andric /// %x0x0 = mul i8 %x0, %x0
2610b57cec5SDimitry Andric /// %x3x3 = mul i8 %x3, %x3
2620b57cec5SDimitry Andric /// %y1y1 = mul i8 %y1, %y1
2630b57cec5SDimitry Andric /// %y2y2 = mul i8 %y2, %y2
264af732203SDimitry Andric /// %ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
2650b57cec5SDimitry Andric /// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
2660b57cec5SDimitry Andric /// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
2670b57cec5SDimitry Andric /// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
2680b57cec5SDimitry Andric /// ret <4 x i8> %ins4
2690b57cec5SDimitry Andric /// can be transformed into:
2700b57cec5SDimitry Andric /// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
2710b57cec5SDimitry Andric /// i32 6>
2720b57cec5SDimitry Andric /// %2 = mul <4 x i8> %1, %1
2730b57cec5SDimitry Andric /// ret <4 x i8> %2
2740b57cec5SDimitry Andric /// We convert this initially to something like:
2750b57cec5SDimitry Andric /// %x0 = extractelement <4 x i8> %x, i32 0
2760b57cec5SDimitry Andric /// %x3 = extractelement <4 x i8> %x, i32 3
2770b57cec5SDimitry Andric /// %y1 = extractelement <4 x i8> %y, i32 1
2780b57cec5SDimitry Andric /// %y2 = extractelement <4 x i8> %y, i32 2
279af732203SDimitry Andric /// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0
2800b57cec5SDimitry Andric /// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
2810b57cec5SDimitry Andric /// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
2820b57cec5SDimitry Andric /// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
2830b57cec5SDimitry Andric /// %5 = mul <4 x i8> %4, %4
2840b57cec5SDimitry Andric /// %6 = extractelement <4 x i8> %5, i32 0
285af732203SDimitry Andric /// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0
2860b57cec5SDimitry Andric /// %7 = extractelement <4 x i8> %5, i32 1
2870b57cec5SDimitry Andric /// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
2880b57cec5SDimitry Andric /// %8 = extractelement <4 x i8> %5, i32 2
2890b57cec5SDimitry Andric /// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
2900b57cec5SDimitry Andric /// %9 = extractelement <4 x i8> %5, i32 3
2910b57cec5SDimitry Andric /// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
2920b57cec5SDimitry Andric /// ret <4 x i8> %ins4
2930b57cec5SDimitry Andric /// InstCombiner transforms this into a shuffle and vector mul
2945f7ddb14SDimitry Andric /// Mask will return the Shuffle Mask equivalent to the extracted elements.
2950b57cec5SDimitry Andric /// TODO: Can we split off and reuse the shuffle mask detection from
2960b57cec5SDimitry Andric /// TargetTransformInfo::getInstructionThroughput?
2970b57cec5SDimitry Andric static Optional<TargetTransformInfo::ShuffleKind>
isShuffle(ArrayRef<Value * > VL,SmallVectorImpl<int> & Mask)2985f7ddb14SDimitry Andric isShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
2990b57cec5SDimitry Andric auto *EI0 = cast<ExtractElementInst>(VL[0]);
300af732203SDimitry Andric unsigned Size =
301af732203SDimitry Andric cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
3020b57cec5SDimitry Andric Value *Vec1 = nullptr;
3030b57cec5SDimitry Andric Value *Vec2 = nullptr;
3040b57cec5SDimitry Andric enum ShuffleMode { Unknown, Select, Permute };
3050b57cec5SDimitry Andric ShuffleMode CommonShuffleMode = Unknown;
3060b57cec5SDimitry Andric for (unsigned I = 0, E = VL.size(); I < E; ++I) {
3070b57cec5SDimitry Andric auto *EI = cast<ExtractElementInst>(VL[I]);
3080b57cec5SDimitry Andric auto *Vec = EI->getVectorOperand();
3090b57cec5SDimitry Andric // All vector operands must have the same number of vector elements.
310af732203SDimitry Andric if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
3110b57cec5SDimitry Andric return None;
3120b57cec5SDimitry Andric auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
3130b57cec5SDimitry Andric if (!Idx)
3140b57cec5SDimitry Andric return None;
3150b57cec5SDimitry Andric // Undefined behavior if Idx is negative or >= Size.
3165f7ddb14SDimitry Andric if (Idx->getValue().uge(Size)) {
3175f7ddb14SDimitry Andric Mask.push_back(UndefMaskElem);
3180b57cec5SDimitry Andric continue;
3195f7ddb14SDimitry Andric }
3200b57cec5SDimitry Andric unsigned IntIdx = Idx->getValue().getZExtValue();
3215f7ddb14SDimitry Andric Mask.push_back(IntIdx);
322af732203SDimitry Andric // We can extractelement from undef or poison vector.
3230b57cec5SDimitry Andric if (isa<UndefValue>(Vec))
3240b57cec5SDimitry Andric continue;
3250b57cec5SDimitry Andric // For correct shuffling we have to have at most 2 different vector operands
3260b57cec5SDimitry Andric // in all extractelement instructions.
3270b57cec5SDimitry Andric if (!Vec1 || Vec1 == Vec)
3280b57cec5SDimitry Andric Vec1 = Vec;
3290b57cec5SDimitry Andric else if (!Vec2 || Vec2 == Vec)
3300b57cec5SDimitry Andric Vec2 = Vec;
3310b57cec5SDimitry Andric else
3320b57cec5SDimitry Andric return None;
3330b57cec5SDimitry Andric if (CommonShuffleMode == Permute)
3340b57cec5SDimitry Andric continue;
3350b57cec5SDimitry Andric // If the extract index is not the same as the operation number, it is a
3360b57cec5SDimitry Andric // permutation.
3370b57cec5SDimitry Andric if (IntIdx != I) {
3380b57cec5SDimitry Andric CommonShuffleMode = Permute;
3390b57cec5SDimitry Andric continue;
3400b57cec5SDimitry Andric }
3410b57cec5SDimitry Andric CommonShuffleMode = Select;
3420b57cec5SDimitry Andric }
3430b57cec5SDimitry Andric // If we're not crossing lanes in different vectors, consider it as blending.
3440b57cec5SDimitry Andric if (CommonShuffleMode == Select && Vec2)
3450b57cec5SDimitry Andric return TargetTransformInfo::SK_Select;
3460b57cec5SDimitry Andric // If Vec2 was never used, we have a permutation of a single vector, otherwise
3470b57cec5SDimitry Andric // we have permutation of 2 vectors.
3480b57cec5SDimitry Andric return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
3490b57cec5SDimitry Andric : TargetTransformInfo::SK_PermuteSingleSrc;
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andric namespace {
3530b57cec5SDimitry Andric
3540b57cec5SDimitry Andric /// Main data required for vectorization of instructions.
3550b57cec5SDimitry Andric struct InstructionsState {
3560b57cec5SDimitry Andric /// The very first instruction in the list with the main opcode.
3570b57cec5SDimitry Andric Value *OpValue = nullptr;
3580b57cec5SDimitry Andric
3590b57cec5SDimitry Andric /// The main/alternate instruction.
3600b57cec5SDimitry Andric Instruction *MainOp = nullptr;
3610b57cec5SDimitry Andric Instruction *AltOp = nullptr;
3620b57cec5SDimitry Andric
3630b57cec5SDimitry Andric /// The main/alternate opcodes for the list of instructions.
getOpcode__anon75ab86280111::InstructionsState3640b57cec5SDimitry Andric unsigned getOpcode() const {
3650b57cec5SDimitry Andric return MainOp ? MainOp->getOpcode() : 0;
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric
getAltOpcode__anon75ab86280111::InstructionsState3680b57cec5SDimitry Andric unsigned getAltOpcode() const {
3690b57cec5SDimitry Andric return AltOp ? AltOp->getOpcode() : 0;
3700b57cec5SDimitry Andric }
3710b57cec5SDimitry Andric
3720b57cec5SDimitry Andric /// Some of the instructions in the list have alternate opcodes.
isAltShuffle__anon75ab86280111::InstructionsState3730b57cec5SDimitry Andric bool isAltShuffle() const { return getOpcode() != getAltOpcode(); }
3740b57cec5SDimitry Andric
isOpcodeOrAlt__anon75ab86280111::InstructionsState3750b57cec5SDimitry Andric bool isOpcodeOrAlt(Instruction *I) const {
3760b57cec5SDimitry Andric unsigned CheckedOpcode = I->getOpcode();
3770b57cec5SDimitry Andric return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric InstructionsState() = delete;
InstructionsState__anon75ab86280111::InstructionsState3810b57cec5SDimitry Andric InstructionsState(Value *OpValue, Instruction *MainOp, Instruction *AltOp)
3820b57cec5SDimitry Andric : OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
3830b57cec5SDimitry Andric };
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric } // end anonymous namespace
3860b57cec5SDimitry Andric
3870b57cec5SDimitry Andric /// Chooses the correct key for scheduling data. If \p Op has the same (or
3880b57cec5SDimitry Andric /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p
3890b57cec5SDimitry Andric /// OpValue.
isOneOf(const InstructionsState & S,Value * Op)3900b57cec5SDimitry Andric static Value *isOneOf(const InstructionsState &S, Value *Op) {
3910b57cec5SDimitry Andric auto *I = dyn_cast<Instruction>(Op);
3920b57cec5SDimitry Andric if (I && S.isOpcodeOrAlt(I))
3930b57cec5SDimitry Andric return Op;
3940b57cec5SDimitry Andric return S.OpValue;
3950b57cec5SDimitry Andric }
3960b57cec5SDimitry Andric
39755e4f9d5SDimitry Andric /// \returns true if \p Opcode is allowed as part of of the main/alternate
39855e4f9d5SDimitry Andric /// instruction for SLP vectorization.
39955e4f9d5SDimitry Andric ///
40055e4f9d5SDimitry Andric /// Example of unsupported opcode is SDIV that can potentially cause UB if the
40155e4f9d5SDimitry Andric /// "shuffled out" lane would result in division by zero.
isValidForAlternation(unsigned Opcode)40255e4f9d5SDimitry Andric static bool isValidForAlternation(unsigned Opcode) {
40355e4f9d5SDimitry Andric if (Instruction::isIntDivRem(Opcode))
40455e4f9d5SDimitry Andric return false;
40555e4f9d5SDimitry Andric
40655e4f9d5SDimitry Andric return true;
40755e4f9d5SDimitry Andric }
40855e4f9d5SDimitry Andric
4090b57cec5SDimitry Andric /// \returns analysis of the Instructions in \p VL described in
4100b57cec5SDimitry Andric /// InstructionsState, the Opcode that we suppose the whole list
4110b57cec5SDimitry Andric /// could be vectorized even if its structure is diverse.
getSameOpcode(ArrayRef<Value * > VL,unsigned BaseIndex=0)4120b57cec5SDimitry Andric static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
4130b57cec5SDimitry Andric unsigned BaseIndex = 0) {
4140b57cec5SDimitry Andric // Make sure these are all Instructions.
4150b57cec5SDimitry Andric if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
4160b57cec5SDimitry Andric return InstructionsState(VL[BaseIndex], nullptr, nullptr);
4170b57cec5SDimitry Andric
4180b57cec5SDimitry Andric bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
4190b57cec5SDimitry Andric bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
4200b57cec5SDimitry Andric unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
4210b57cec5SDimitry Andric unsigned AltOpcode = Opcode;
4220b57cec5SDimitry Andric unsigned AltIndex = BaseIndex;
4230b57cec5SDimitry Andric
4240b57cec5SDimitry Andric // Check for one alternate opcode from another BinaryOperator.
4250b57cec5SDimitry Andric // TODO - generalize to support all operators (types, calls etc.).
4260b57cec5SDimitry Andric for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
4270b57cec5SDimitry Andric unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
4280b57cec5SDimitry Andric if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
4290b57cec5SDimitry Andric if (InstOpcode == Opcode || InstOpcode == AltOpcode)
4300b57cec5SDimitry Andric continue;
43155e4f9d5SDimitry Andric if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
43255e4f9d5SDimitry Andric isValidForAlternation(Opcode)) {
4330b57cec5SDimitry Andric AltOpcode = InstOpcode;
4340b57cec5SDimitry Andric AltIndex = Cnt;
4350b57cec5SDimitry Andric continue;
4360b57cec5SDimitry Andric }
4370b57cec5SDimitry Andric } else if (IsCastOp && isa<CastInst>(VL[Cnt])) {
4380b57cec5SDimitry Andric Type *Ty0 = cast<Instruction>(VL[BaseIndex])->getOperand(0)->getType();
4390b57cec5SDimitry Andric Type *Ty1 = cast<Instruction>(VL[Cnt])->getOperand(0)->getType();
4400b57cec5SDimitry Andric if (Ty0 == Ty1) {
4410b57cec5SDimitry Andric if (InstOpcode == Opcode || InstOpcode == AltOpcode)
4420b57cec5SDimitry Andric continue;
4430b57cec5SDimitry Andric if (Opcode == AltOpcode) {
44455e4f9d5SDimitry Andric assert(isValidForAlternation(Opcode) &&
44555e4f9d5SDimitry Andric isValidForAlternation(InstOpcode) &&
44655e4f9d5SDimitry Andric "Cast isn't safe for alternation, logic needs to be updated!");
4470b57cec5SDimitry Andric AltOpcode = InstOpcode;
4480b57cec5SDimitry Andric AltIndex = Cnt;
4490b57cec5SDimitry Andric continue;
4500b57cec5SDimitry Andric }
4510b57cec5SDimitry Andric }
4520b57cec5SDimitry Andric } else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
4530b57cec5SDimitry Andric continue;
4540b57cec5SDimitry Andric return InstructionsState(VL[BaseIndex], nullptr, nullptr);
4550b57cec5SDimitry Andric }
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
4580b57cec5SDimitry Andric cast<Instruction>(VL[AltIndex]));
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric /// \returns true if all of the values in \p VL have the same type or false
4620b57cec5SDimitry Andric /// otherwise.
allSameType(ArrayRef<Value * > VL)4630b57cec5SDimitry Andric static bool allSameType(ArrayRef<Value *> VL) {
4640b57cec5SDimitry Andric Type *Ty = VL[0]->getType();
4650b57cec5SDimitry Andric for (int i = 1, e = VL.size(); i < e; i++)
4660b57cec5SDimitry Andric if (VL[i]->getType() != Ty)
4670b57cec5SDimitry Andric return false;
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric return true;
4700b57cec5SDimitry Andric }
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andric /// \returns True if Extract{Value,Element} instruction extracts element Idx.
getExtractIndex(Instruction * E)4730b57cec5SDimitry Andric static Optional<unsigned> getExtractIndex(Instruction *E) {
4740b57cec5SDimitry Andric unsigned Opcode = E->getOpcode();
4750b57cec5SDimitry Andric assert((Opcode == Instruction::ExtractElement ||
4760b57cec5SDimitry Andric Opcode == Instruction::ExtractValue) &&
4770b57cec5SDimitry Andric "Expected extractelement or extractvalue instruction.");
4780b57cec5SDimitry Andric if (Opcode == Instruction::ExtractElement) {
4790b57cec5SDimitry Andric auto *CI = dyn_cast<ConstantInt>(E->getOperand(1));
4800b57cec5SDimitry Andric if (!CI)
4810b57cec5SDimitry Andric return None;
4820b57cec5SDimitry Andric return CI->getZExtValue();
4830b57cec5SDimitry Andric }
4840b57cec5SDimitry Andric ExtractValueInst *EI = cast<ExtractValueInst>(E);
4850b57cec5SDimitry Andric if (EI->getNumIndices() != 1)
4860b57cec5SDimitry Andric return None;
4870b57cec5SDimitry Andric return *EI->idx_begin();
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric
4900b57cec5SDimitry Andric /// \returns True if in-tree use also needs extract. This refers to
4910b57cec5SDimitry Andric /// possible scalar operand in vectorized instruction.
InTreeUserNeedToExtract(Value * Scalar,Instruction * UserInst,TargetLibraryInfo * TLI)4920b57cec5SDimitry Andric static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
4930b57cec5SDimitry Andric TargetLibraryInfo *TLI) {
4940b57cec5SDimitry Andric unsigned Opcode = UserInst->getOpcode();
4950b57cec5SDimitry Andric switch (Opcode) {
4960b57cec5SDimitry Andric case Instruction::Load: {
4970b57cec5SDimitry Andric LoadInst *LI = cast<LoadInst>(UserInst);
4980b57cec5SDimitry Andric return (LI->getPointerOperand() == Scalar);
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric case Instruction::Store: {
5010b57cec5SDimitry Andric StoreInst *SI = cast<StoreInst>(UserInst);
5020b57cec5SDimitry Andric return (SI->getPointerOperand() == Scalar);
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric case Instruction::Call: {
5050b57cec5SDimitry Andric CallInst *CI = cast<CallInst>(UserInst);
5060b57cec5SDimitry Andric Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
5070b57cec5SDimitry Andric for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
5080b57cec5SDimitry Andric if (hasVectorInstrinsicScalarOpd(ID, i))
5090b57cec5SDimitry Andric return (CI->getArgOperand(i) == Scalar);
5100b57cec5SDimitry Andric }
5110b57cec5SDimitry Andric LLVM_FALLTHROUGH;
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric default:
5140b57cec5SDimitry Andric return false;
5150b57cec5SDimitry Andric }
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric
5180b57cec5SDimitry Andric /// \returns the AA location that is being access by the instruction.
getLocation(Instruction * I,AAResults * AA)519af732203SDimitry Andric static MemoryLocation getLocation(Instruction *I, AAResults *AA) {
5200b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(I))
5210b57cec5SDimitry Andric return MemoryLocation::get(SI);
5220b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(I))
5230b57cec5SDimitry Andric return MemoryLocation::get(LI);
5240b57cec5SDimitry Andric return MemoryLocation();
5250b57cec5SDimitry Andric }
5260b57cec5SDimitry Andric
5270b57cec5SDimitry Andric /// \returns True if the instruction is not a volatile or atomic load/store.
isSimple(Instruction * I)5280b57cec5SDimitry Andric static bool isSimple(Instruction *I) {
5290b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(I))
5300b57cec5SDimitry Andric return LI->isSimple();
5310b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(I))
5320b57cec5SDimitry Andric return SI->isSimple();
5330b57cec5SDimitry Andric if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
5340b57cec5SDimitry Andric return !MI->isVolatile();
5350b57cec5SDimitry Andric return true;
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric
5380b57cec5SDimitry Andric namespace llvm {
5390b57cec5SDimitry Andric
inversePermutation(ArrayRef<unsigned> Indices,SmallVectorImpl<int> & Mask)540af732203SDimitry Andric static void inversePermutation(ArrayRef<unsigned> Indices,
541af732203SDimitry Andric SmallVectorImpl<int> &Mask) {
542af732203SDimitry Andric Mask.clear();
543af732203SDimitry Andric const unsigned E = Indices.size();
544af732203SDimitry Andric Mask.resize(E, E + 1);
545af732203SDimitry Andric for (unsigned I = 0; I < E; ++I)
546af732203SDimitry Andric Mask[Indices[I]] = I;
547af732203SDimitry Andric }
548af732203SDimitry Andric
5495f7ddb14SDimitry Andric /// \returns inserting index of InsertElement or InsertValue instruction,
5505f7ddb14SDimitry Andric /// using Offset as base offset for index.
getInsertIndex(Value * InsertInst,unsigned Offset)5515f7ddb14SDimitry Andric static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
5525f7ddb14SDimitry Andric int Index = Offset;
5535f7ddb14SDimitry Andric if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
5545f7ddb14SDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
5555f7ddb14SDimitry Andric auto *VT = cast<FixedVectorType>(IE->getType());
5565f7ddb14SDimitry Andric if (CI->getValue().uge(VT->getNumElements()))
5575f7ddb14SDimitry Andric return UndefMaskElem;
5585f7ddb14SDimitry Andric Index *= VT->getNumElements();
5595f7ddb14SDimitry Andric Index += CI->getZExtValue();
5605f7ddb14SDimitry Andric return Index;
5615f7ddb14SDimitry Andric }
5625f7ddb14SDimitry Andric if (isa<UndefValue>(IE->getOperand(2)))
5635f7ddb14SDimitry Andric return UndefMaskElem;
5645f7ddb14SDimitry Andric return None;
5655f7ddb14SDimitry Andric }
5665f7ddb14SDimitry Andric
5675f7ddb14SDimitry Andric auto *IV = cast<InsertValueInst>(InsertInst);
5685f7ddb14SDimitry Andric Type *CurrentType = IV->getType();
5695f7ddb14SDimitry Andric for (unsigned I : IV->indices()) {
5705f7ddb14SDimitry Andric if (auto *ST = dyn_cast<StructType>(CurrentType)) {
5715f7ddb14SDimitry Andric Index *= ST->getNumElements();
5725f7ddb14SDimitry Andric CurrentType = ST->getElementType(I);
5735f7ddb14SDimitry Andric } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
5745f7ddb14SDimitry Andric Index *= AT->getNumElements();
5755f7ddb14SDimitry Andric CurrentType = AT->getElementType();
5765f7ddb14SDimitry Andric } else {
5775f7ddb14SDimitry Andric return None;
5785f7ddb14SDimitry Andric }
5795f7ddb14SDimitry Andric Index += I;
5805f7ddb14SDimitry Andric }
5815f7ddb14SDimitry Andric return Index;
5825f7ddb14SDimitry Andric }
5835f7ddb14SDimitry Andric
5840b57cec5SDimitry Andric namespace slpvectorizer {
5850b57cec5SDimitry Andric
5860b57cec5SDimitry Andric /// Bottom Up SLP Vectorizer.
5870b57cec5SDimitry Andric class BoUpSLP {
5880b57cec5SDimitry Andric struct TreeEntry;
5898bcb0991SDimitry Andric struct ScheduleData;
5900b57cec5SDimitry Andric
5910b57cec5SDimitry Andric public:
5920b57cec5SDimitry Andric using ValueList = SmallVector<Value *, 8>;
5930b57cec5SDimitry Andric using InstrList = SmallVector<Instruction *, 16>;
5940b57cec5SDimitry Andric using ValueSet = SmallPtrSet<Value *, 16>;
5950b57cec5SDimitry Andric using StoreList = SmallVector<StoreInst *, 8>;
5960b57cec5SDimitry Andric using ExtraValueToDebugLocsMap =
5970b57cec5SDimitry Andric MapVector<Value *, SmallVector<Instruction *, 2>>;
598af732203SDimitry Andric using OrdersType = SmallVector<unsigned, 4>;
5990b57cec5SDimitry Andric
BoUpSLP(Function * Func,ScalarEvolution * Se,TargetTransformInfo * Tti,TargetLibraryInfo * TLi,AAResults * Aa,LoopInfo * Li,DominatorTree * Dt,AssumptionCache * AC,DemandedBits * DB,const DataLayout * DL,OptimizationRemarkEmitter * ORE)6000b57cec5SDimitry Andric BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
601af732203SDimitry Andric TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
6020b57cec5SDimitry Andric DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
6030b57cec5SDimitry Andric const DataLayout *DL, OptimizationRemarkEmitter *ORE)
6040b57cec5SDimitry Andric : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
6050b57cec5SDimitry Andric DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
6060b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(F, AC, EphValues);
6070b57cec5SDimitry Andric // Use the vector register size specified by the target unless overridden
6080b57cec5SDimitry Andric // by a command-line option.
6090b57cec5SDimitry Andric // TODO: It would be better to limit the vectorization factor based on
6100b57cec5SDimitry Andric // data type rather than just register size. For example, x86 AVX has
6110b57cec5SDimitry Andric // 256-bit registers, but it does not support integer operations
6120b57cec5SDimitry Andric // at that width (that requires AVX2).
6130b57cec5SDimitry Andric if (MaxVectorRegSizeOption.getNumOccurrences())
6140b57cec5SDimitry Andric MaxVecRegSize = MaxVectorRegSizeOption;
6150b57cec5SDimitry Andric else
6165f7ddb14SDimitry Andric MaxVecRegSize =
6175f7ddb14SDimitry Andric TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
6185f7ddb14SDimitry Andric .getFixedSize();
6190b57cec5SDimitry Andric
6200b57cec5SDimitry Andric if (MinVectorRegSizeOption.getNumOccurrences())
6210b57cec5SDimitry Andric MinVecRegSize = MinVectorRegSizeOption;
6220b57cec5SDimitry Andric else
6230b57cec5SDimitry Andric MinVecRegSize = TTI->getMinVectorRegisterBitWidth();
6240b57cec5SDimitry Andric }
6250b57cec5SDimitry Andric
6260b57cec5SDimitry Andric /// Vectorize the tree that starts with the elements in \p VL.
6270b57cec5SDimitry Andric /// Returns the vectorized root.
6280b57cec5SDimitry Andric Value *vectorizeTree();
6290b57cec5SDimitry Andric
6300b57cec5SDimitry Andric /// Vectorize the tree but with the list of externally used values \p
6310b57cec5SDimitry Andric /// ExternallyUsedValues. Values in this MapVector can be replaced but the
6320b57cec5SDimitry Andric /// generated extractvalue instructions.
6330b57cec5SDimitry Andric Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
6340b57cec5SDimitry Andric
6350b57cec5SDimitry Andric /// \returns the cost incurred by unwanted spills and fills, caused by
6360b57cec5SDimitry Andric /// holding live values over call sites.
637af732203SDimitry Andric InstructionCost getSpillCost() const;
6380b57cec5SDimitry Andric
6390b57cec5SDimitry Andric /// \returns the vectorization cost of the subtree that starts at \p VL.
6400b57cec5SDimitry Andric /// A negative number means that this is profitable.
6415f7ddb14SDimitry Andric InstructionCost getTreeCost(ArrayRef<Value *> VectorizedVals = None);
6420b57cec5SDimitry Andric
6430b57cec5SDimitry Andric /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
6440b57cec5SDimitry Andric /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
6450b57cec5SDimitry Andric void buildTree(ArrayRef<Value *> Roots,
6460b57cec5SDimitry Andric ArrayRef<Value *> UserIgnoreLst = None);
6470b57cec5SDimitry Andric
6480b57cec5SDimitry Andric /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
6490b57cec5SDimitry Andric /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
650480093f4SDimitry Andric /// into account (and updating it, if required) list of externally used
6510b57cec5SDimitry Andric /// values stored in \p ExternallyUsedValues.
6520b57cec5SDimitry Andric void buildTree(ArrayRef<Value *> Roots,
6530b57cec5SDimitry Andric ExtraValueToDebugLocsMap &ExternallyUsedValues,
6540b57cec5SDimitry Andric ArrayRef<Value *> UserIgnoreLst = None);
6550b57cec5SDimitry Andric
6560b57cec5SDimitry Andric /// Clear the internal data structures that are created by 'buildTree'.
deleteTree()6570b57cec5SDimitry Andric void deleteTree() {
6580b57cec5SDimitry Andric VectorizableTree.clear();
6590b57cec5SDimitry Andric ScalarToTreeEntry.clear();
6600b57cec5SDimitry Andric MustGather.clear();
6610b57cec5SDimitry Andric ExternalUses.clear();
6620b57cec5SDimitry Andric NumOpsWantToKeepOrder.clear();
6630b57cec5SDimitry Andric NumOpsWantToKeepOriginalOrder = 0;
6640b57cec5SDimitry Andric for (auto &Iter : BlocksSchedules) {
6650b57cec5SDimitry Andric BlockScheduling *BS = Iter.second.get();
6660b57cec5SDimitry Andric BS->clear();
6670b57cec5SDimitry Andric }
6680b57cec5SDimitry Andric MinBWs.clear();
6695f7ddb14SDimitry Andric InstrElementSize.clear();
6700b57cec5SDimitry Andric }
6710b57cec5SDimitry Andric
getTreeSize() const6720b57cec5SDimitry Andric unsigned getTreeSize() const { return VectorizableTree.size(); }
6730b57cec5SDimitry Andric
6740b57cec5SDimitry Andric /// Perform LICM and CSE on the newly generated gather sequences.
6750b57cec5SDimitry Andric void optimizeGatherSequence();
6760b57cec5SDimitry Andric
6770b57cec5SDimitry Andric /// \returns The best order of instructions for vectorization.
bestOrder() const6780b57cec5SDimitry Andric Optional<ArrayRef<unsigned>> bestOrder() const {
679af732203SDimitry Andric assert(llvm::all_of(
680af732203SDimitry Andric NumOpsWantToKeepOrder,
681af732203SDimitry Andric [this](const decltype(NumOpsWantToKeepOrder)::value_type &D) {
682af732203SDimitry Andric return D.getFirst().size() ==
683af732203SDimitry Andric VectorizableTree[0]->Scalars.size();
684af732203SDimitry Andric }) &&
685af732203SDimitry Andric "All orders must have the same size as number of instructions in "
686af732203SDimitry Andric "tree node.");
6870b57cec5SDimitry Andric auto I = std::max_element(
6880b57cec5SDimitry Andric NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
6890b57cec5SDimitry Andric [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
6900b57cec5SDimitry Andric const decltype(NumOpsWantToKeepOrder)::value_type &D2) {
6910b57cec5SDimitry Andric return D1.second < D2.second;
6920b57cec5SDimitry Andric });
6930b57cec5SDimitry Andric if (I == NumOpsWantToKeepOrder.end() ||
6940b57cec5SDimitry Andric I->getSecond() <= NumOpsWantToKeepOriginalOrder)
6950b57cec5SDimitry Andric return None;
6960b57cec5SDimitry Andric
6970b57cec5SDimitry Andric return makeArrayRef(I->getFirst());
6980b57cec5SDimitry Andric }
6990b57cec5SDimitry Andric
700af732203SDimitry Andric /// Builds the correct order for root instructions.
701af732203SDimitry Andric /// If some leaves have the same instructions to be vectorized, we may
702af732203SDimitry Andric /// incorrectly evaluate the best order for the root node (it is built for the
703af732203SDimitry Andric /// vector of instructions without repeated instructions and, thus, has less
704af732203SDimitry Andric /// elements than the root node). This function builds the correct order for
705af732203SDimitry Andric /// the root node.
706af732203SDimitry Andric /// For example, if the root node is \<a+b, a+c, a+d, f+e\>, then the leaves
707af732203SDimitry Andric /// are \<a, a, a, f\> and \<b, c, d, e\>. When we try to vectorize the first
708af732203SDimitry Andric /// leaf, it will be shrink to \<a, b\>. If instructions in this leaf should
709af732203SDimitry Andric /// be reordered, the best order will be \<1, 0\>. We need to extend this
710af732203SDimitry Andric /// order for the root node. For the root node this order should look like
711af732203SDimitry Andric /// \<3, 0, 1, 2\>. This function extends the order for the reused
712af732203SDimitry Andric /// instructions.
findRootOrder(OrdersType & Order)713af732203SDimitry Andric void findRootOrder(OrdersType &Order) {
714af732203SDimitry Andric // If the leaf has the same number of instructions to vectorize as the root
715af732203SDimitry Andric // - order must be set already.
716af732203SDimitry Andric unsigned RootSize = VectorizableTree[0]->Scalars.size();
717af732203SDimitry Andric if (Order.size() == RootSize)
718af732203SDimitry Andric return;
719af732203SDimitry Andric SmallVector<unsigned, 4> RealOrder(Order.size());
720af732203SDimitry Andric std::swap(Order, RealOrder);
721af732203SDimitry Andric SmallVector<int, 4> Mask;
722af732203SDimitry Andric inversePermutation(RealOrder, Mask);
723af732203SDimitry Andric Order.assign(Mask.begin(), Mask.end());
724af732203SDimitry Andric // The leaf has less number of instructions - need to find the true order of
725af732203SDimitry Andric // the root.
726af732203SDimitry Andric // Scan the nodes starting from the leaf back to the root.
727af732203SDimitry Andric const TreeEntry *PNode = VectorizableTree.back().get();
728af732203SDimitry Andric SmallVector<const TreeEntry *, 4> Nodes(1, PNode);
729af732203SDimitry Andric SmallPtrSet<const TreeEntry *, 4> Visited;
730af732203SDimitry Andric while (!Nodes.empty() && Order.size() != RootSize) {
731af732203SDimitry Andric const TreeEntry *PNode = Nodes.pop_back_val();
732af732203SDimitry Andric if (!Visited.insert(PNode).second)
733af732203SDimitry Andric continue;
734af732203SDimitry Andric const TreeEntry &Node = *PNode;
735af732203SDimitry Andric for (const EdgeInfo &EI : Node.UserTreeIndices)
736af732203SDimitry Andric if (EI.UserTE)
737af732203SDimitry Andric Nodes.push_back(EI.UserTE);
738af732203SDimitry Andric if (Node.ReuseShuffleIndices.empty())
739af732203SDimitry Andric continue;
740af732203SDimitry Andric // Build the order for the parent node.
741af732203SDimitry Andric OrdersType NewOrder(Node.ReuseShuffleIndices.size(), RootSize);
742af732203SDimitry Andric SmallVector<unsigned, 4> OrderCounter(Order.size(), 0);
743af732203SDimitry Andric // The algorithm of the order extension is:
744af732203SDimitry Andric // 1. Calculate the number of the same instructions for the order.
745af732203SDimitry Andric // 2. Calculate the index of the new order: total number of instructions
746af732203SDimitry Andric // with order less than the order of the current instruction + reuse
747af732203SDimitry Andric // number of the current instruction.
748af732203SDimitry Andric // 3. The new order is just the index of the instruction in the original
749af732203SDimitry Andric // vector of the instructions.
750af732203SDimitry Andric for (unsigned I : Node.ReuseShuffleIndices)
751af732203SDimitry Andric ++OrderCounter[Order[I]];
752af732203SDimitry Andric SmallVector<unsigned, 4> CurrentCounter(Order.size(), 0);
753af732203SDimitry Andric for (unsigned I = 0, E = Node.ReuseShuffleIndices.size(); I < E; ++I) {
754af732203SDimitry Andric unsigned ReusedIdx = Node.ReuseShuffleIndices[I];
755af732203SDimitry Andric unsigned OrderIdx = Order[ReusedIdx];
756af732203SDimitry Andric unsigned NewIdx = 0;
757af732203SDimitry Andric for (unsigned J = 0; J < OrderIdx; ++J)
758af732203SDimitry Andric NewIdx += OrderCounter[J];
759af732203SDimitry Andric NewIdx += CurrentCounter[OrderIdx];
760af732203SDimitry Andric ++CurrentCounter[OrderIdx];
761af732203SDimitry Andric assert(NewOrder[NewIdx] == RootSize &&
762af732203SDimitry Andric "The order index should not be written already.");
763af732203SDimitry Andric NewOrder[NewIdx] = I;
764af732203SDimitry Andric }
765af732203SDimitry Andric std::swap(Order, NewOrder);
766af732203SDimitry Andric }
767af732203SDimitry Andric assert(Order.size() == RootSize &&
768af732203SDimitry Andric "Root node is expected or the size of the order must be the same as "
769af732203SDimitry Andric "the number of elements in the root node.");
770af732203SDimitry Andric assert(llvm::all_of(Order,
771af732203SDimitry Andric [RootSize](unsigned Val) { return Val != RootSize; }) &&
772af732203SDimitry Andric "All indices must be initialized");
773af732203SDimitry Andric }
774af732203SDimitry Andric
7750b57cec5SDimitry Andric /// \return The vector element size in bits to use when vectorizing the
7760b57cec5SDimitry Andric /// expression tree ending at \p V. If V is a store, the size is the width of
7770b57cec5SDimitry Andric /// the stored value. Otherwise, the size is the width of the largest loaded
7780b57cec5SDimitry Andric /// value reaching V. This method is used by the vectorizer to calculate
7790b57cec5SDimitry Andric /// vectorization factors.
7805ffd83dbSDimitry Andric unsigned getVectorElementSize(Value *V);
7810b57cec5SDimitry Andric
7820b57cec5SDimitry Andric /// Compute the minimum type sizes required to represent the entries in a
7830b57cec5SDimitry Andric /// vectorizable tree.
7840b57cec5SDimitry Andric void computeMinimumValueSizes();
7850b57cec5SDimitry Andric
7860b57cec5SDimitry Andric // \returns maximum vector register size as set by TTI or overridden by cl::opt.
getMaxVecRegSize() const7870b57cec5SDimitry Andric unsigned getMaxVecRegSize() const {
7880b57cec5SDimitry Andric return MaxVecRegSize;
7890b57cec5SDimitry Andric }
7900b57cec5SDimitry Andric
7910b57cec5SDimitry Andric // \returns minimum vector register size as set by cl::opt.
getMinVecRegSize() const7920b57cec5SDimitry Andric unsigned getMinVecRegSize() const {
7930b57cec5SDimitry Andric return MinVecRegSize;
7940b57cec5SDimitry Andric }
7950b57cec5SDimitry Andric
getMaximumVF(unsigned ElemWidth,unsigned Opcode) const796af732203SDimitry Andric unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
797af732203SDimitry Andric unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
798af732203SDimitry Andric MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
799af732203SDimitry Andric return MaxVF ? MaxVF : UINT_MAX;
800af732203SDimitry Andric }
801af732203SDimitry Andric
802480093f4SDimitry Andric /// Check if homogeneous aggregate is isomorphic to some VectorType.
803480093f4SDimitry Andric /// Accepts homogeneous multidimensional aggregate of scalars/vectors like
804480093f4SDimitry Andric /// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
805480093f4SDimitry Andric /// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on.
8060b57cec5SDimitry Andric ///
8070b57cec5SDimitry Andric /// \returns number of elements in vector if isomorphism exists, 0 otherwise.
8080b57cec5SDimitry Andric unsigned canMapToVector(Type *T, const DataLayout &DL) const;
8090b57cec5SDimitry Andric
8100b57cec5SDimitry Andric /// \returns True if the VectorizableTree is both tiny and not fully
8110b57cec5SDimitry Andric /// vectorizable. We do not vectorize such trees.
8120b57cec5SDimitry Andric bool isTreeTinyAndNotFullyVectorizable() const;
8130b57cec5SDimitry Andric
8148bcb0991SDimitry Andric /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
8158bcb0991SDimitry Andric /// can be load combined in the backend. Load combining may not be allowed in
8168bcb0991SDimitry Andric /// the IR optimizer, so we do not want to alter the pattern. For example,
8178bcb0991SDimitry Andric /// partially transforming a scalar bswap() pattern into vector code is
8188bcb0991SDimitry Andric /// effectively impossible for the backend to undo.
8198bcb0991SDimitry Andric /// TODO: If load combining is allowed in the IR optimizer, this analysis
8208bcb0991SDimitry Andric /// may not be necessary.
821af732203SDimitry Andric bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;
8228bcb0991SDimitry Andric
8235ffd83dbSDimitry Andric /// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
8245ffd83dbSDimitry Andric /// can be load combined in the backend. Load combining may not be allowed in
8255ffd83dbSDimitry Andric /// the IR optimizer, so we do not want to alter the pattern. For example,
8265ffd83dbSDimitry Andric /// partially transforming a scalar bswap() pattern into vector code is
8275ffd83dbSDimitry Andric /// effectively impossible for the backend to undo.
8285ffd83dbSDimitry Andric /// TODO: If load combining is allowed in the IR optimizer, this analysis
8295ffd83dbSDimitry Andric /// may not be necessary.
8305ffd83dbSDimitry Andric bool isLoadCombineCandidate() const;
8315ffd83dbSDimitry Andric
getORE()8320b57cec5SDimitry Andric OptimizationRemarkEmitter *getORE() { return ORE; }
8330b57cec5SDimitry Andric
8340b57cec5SDimitry Andric /// This structure holds any data we need about the edges being traversed
8350b57cec5SDimitry Andric /// during buildTree_rec(). We keep track of:
8360b57cec5SDimitry Andric /// (i) the user TreeEntry index, and
8370b57cec5SDimitry Andric /// (ii) the index of the edge.
8380b57cec5SDimitry Andric struct EdgeInfo {
8390b57cec5SDimitry Andric EdgeInfo() = default;
EdgeInfollvm::slpvectorizer::BoUpSLP::EdgeInfo8400b57cec5SDimitry Andric EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
8410b57cec5SDimitry Andric : UserTE(UserTE), EdgeIdx(EdgeIdx) {}
8420b57cec5SDimitry Andric /// The user TreeEntry.
8430b57cec5SDimitry Andric TreeEntry *UserTE = nullptr;
8440b57cec5SDimitry Andric /// The operand index of the use.
8450b57cec5SDimitry Andric unsigned EdgeIdx = UINT_MAX;
8460b57cec5SDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & OS,const BoUpSLP::EdgeInfo & EI)8470b57cec5SDimitry Andric friend inline raw_ostream &operator<<(raw_ostream &OS,
8480b57cec5SDimitry Andric const BoUpSLP::EdgeInfo &EI) {
8490b57cec5SDimitry Andric EI.dump(OS);
8500b57cec5SDimitry Andric return OS;
8510b57cec5SDimitry Andric }
8520b57cec5SDimitry Andric /// Debug print.
dumpllvm::slpvectorizer::BoUpSLP::EdgeInfo8530b57cec5SDimitry Andric void dump(raw_ostream &OS) const {
8540b57cec5SDimitry Andric OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
8550b57cec5SDimitry Andric << " EdgeIdx:" << EdgeIdx << "}";
8560b57cec5SDimitry Andric }
dumpllvm::slpvectorizer::BoUpSLP::EdgeInfo8570b57cec5SDimitry Andric LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
8580b57cec5SDimitry Andric #endif
8590b57cec5SDimitry Andric };
8600b57cec5SDimitry Andric
8610b57cec5SDimitry Andric /// A helper data structure to hold the operands of a vector of instructions.
8620b57cec5SDimitry Andric /// This supports a fixed vector length for all operand vectors.
8630b57cec5SDimitry Andric class VLOperands {
8640b57cec5SDimitry Andric /// For each operand we need (i) the value, and (ii) the opcode that it
8650b57cec5SDimitry Andric /// would be attached to if the expression was in a left-linearized form.
8660b57cec5SDimitry Andric /// This is required to avoid illegal operand reordering.
8670b57cec5SDimitry Andric /// For example:
8680b57cec5SDimitry Andric /// \verbatim
8690b57cec5SDimitry Andric /// 0 Op1
8700b57cec5SDimitry Andric /// |/
8710b57cec5SDimitry Andric /// Op1 Op2 Linearized + Op2
8720b57cec5SDimitry Andric /// \ / ----------> |/
8730b57cec5SDimitry Andric /// - -
8740b57cec5SDimitry Andric ///
8750b57cec5SDimitry Andric /// Op1 - Op2 (0 + Op1) - Op2
8760b57cec5SDimitry Andric /// \endverbatim
8770b57cec5SDimitry Andric ///
8780b57cec5SDimitry Andric /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
8790b57cec5SDimitry Andric ///
8800b57cec5SDimitry Andric /// Another way to think of this is to track all the operations across the
8810b57cec5SDimitry Andric /// path from the operand all the way to the root of the tree and to
8820b57cec5SDimitry Andric /// calculate the operation that corresponds to this path. For example, the
8830b57cec5SDimitry Andric /// path from Op2 to the root crosses the RHS of the '-', therefore the
8840b57cec5SDimitry Andric /// corresponding operation is a '-' (which matches the one in the
8850b57cec5SDimitry Andric /// linearized tree, as shown above).
8860b57cec5SDimitry Andric ///
8870b57cec5SDimitry Andric /// For lack of a better term, we refer to this operation as Accumulated
8880b57cec5SDimitry Andric /// Path Operation (APO).
8890b57cec5SDimitry Andric struct OperandData {
8900b57cec5SDimitry Andric OperandData() = default;
OperandDatallvm::slpvectorizer::BoUpSLP::VLOperands::OperandData8910b57cec5SDimitry Andric OperandData(Value *V, bool APO, bool IsUsed)
8920b57cec5SDimitry Andric : V(V), APO(APO), IsUsed(IsUsed) {}
8930b57cec5SDimitry Andric /// The operand value.
8940b57cec5SDimitry Andric Value *V = nullptr;
8950b57cec5SDimitry Andric /// TreeEntries only allow a single opcode, or an alternate sequence of
8960b57cec5SDimitry Andric /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
8970b57cec5SDimitry Andric /// APO. It is set to 'true' if 'V' is attached to an inverse operation
8980b57cec5SDimitry Andric /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
8990b57cec5SDimitry Andric /// (e.g., Add/Mul)
9000b57cec5SDimitry Andric bool APO = false;
9010b57cec5SDimitry Andric /// Helper data for the reordering function.
9020b57cec5SDimitry Andric bool IsUsed = false;
9030b57cec5SDimitry Andric };
9040b57cec5SDimitry Andric
9050b57cec5SDimitry Andric /// During operand reordering, we are trying to select the operand at lane
9060b57cec5SDimitry Andric /// that matches best with the operand at the neighboring lane. Our
9070b57cec5SDimitry Andric /// selection is based on the type of value we are looking for. For example,
9080b57cec5SDimitry Andric /// if the neighboring lane has a load, we need to look for a load that is
9090b57cec5SDimitry Andric /// accessing a consecutive address. These strategies are summarized in the
9100b57cec5SDimitry Andric /// 'ReorderingMode' enumerator.
9110b57cec5SDimitry Andric enum class ReorderingMode {
9120b57cec5SDimitry Andric Load, ///< Matching loads to consecutive memory addresses
9130b57cec5SDimitry Andric Opcode, ///< Matching instructions based on opcode (same or alternate)
9140b57cec5SDimitry Andric Constant, ///< Matching constants
9150b57cec5SDimitry Andric Splat, ///< Matching the same instruction multiple times (broadcast)
9160b57cec5SDimitry Andric Failed, ///< We failed to create a vectorizable group
9170b57cec5SDimitry Andric };
9180b57cec5SDimitry Andric
9190b57cec5SDimitry Andric using OperandDataVec = SmallVector<OperandData, 2>;
9200b57cec5SDimitry Andric
9210b57cec5SDimitry Andric /// A vector of operand vectors.
9220b57cec5SDimitry Andric SmallVector<OperandDataVec, 4> OpsVec;
9230b57cec5SDimitry Andric
9240b57cec5SDimitry Andric const DataLayout &DL;
9250b57cec5SDimitry Andric ScalarEvolution &SE;
926480093f4SDimitry Andric const BoUpSLP &R;
9270b57cec5SDimitry Andric
9280b57cec5SDimitry Andric /// \returns the operand data at \p OpIdx and \p Lane.
getData(unsigned OpIdx,unsigned Lane)9290b57cec5SDimitry Andric OperandData &getData(unsigned OpIdx, unsigned Lane) {
9300b57cec5SDimitry Andric return OpsVec[OpIdx][Lane];
9310b57cec5SDimitry Andric }
9320b57cec5SDimitry Andric
9330b57cec5SDimitry Andric /// \returns the operand data at \p OpIdx and \p Lane. Const version.
getData(unsigned OpIdx,unsigned Lane) const9340b57cec5SDimitry Andric const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
9350b57cec5SDimitry Andric return OpsVec[OpIdx][Lane];
9360b57cec5SDimitry Andric }
9370b57cec5SDimitry Andric
9380b57cec5SDimitry Andric /// Clears the used flag for all entries.
clearUsed()9390b57cec5SDimitry Andric void clearUsed() {
9400b57cec5SDimitry Andric for (unsigned OpIdx = 0, NumOperands = getNumOperands();
9410b57cec5SDimitry Andric OpIdx != NumOperands; ++OpIdx)
9420b57cec5SDimitry Andric for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
9430b57cec5SDimitry Andric ++Lane)
9440b57cec5SDimitry Andric OpsVec[OpIdx][Lane].IsUsed = false;
9450b57cec5SDimitry Andric }
9460b57cec5SDimitry Andric
9470b57cec5SDimitry Andric /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
swap(unsigned OpIdx1,unsigned OpIdx2,unsigned Lane)9480b57cec5SDimitry Andric void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
9490b57cec5SDimitry Andric std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
9500b57cec5SDimitry Andric }
9510b57cec5SDimitry Andric
952480093f4SDimitry Andric // The hard-coded scores listed here are not very important. When computing
953480093f4SDimitry Andric // the scores of matching one sub-tree with another, we are basically
954480093f4SDimitry Andric // counting the number of values that are matching. So even if all scores
955480093f4SDimitry Andric // are set to 1, we would still get a decent matching result.
956480093f4SDimitry Andric // However, sometimes we have to break ties. For example we may have to
957480093f4SDimitry Andric // choose between matching loads vs matching opcodes. This is what these
958480093f4SDimitry Andric // scores are helping us with: they provide the order of preference.
959480093f4SDimitry Andric
960480093f4SDimitry Andric /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
961480093f4SDimitry Andric static const int ScoreConsecutiveLoads = 3;
962480093f4SDimitry Andric /// ExtractElementInst from same vector and consecutive indexes.
963480093f4SDimitry Andric static const int ScoreConsecutiveExtracts = 3;
964480093f4SDimitry Andric /// Constants.
965480093f4SDimitry Andric static const int ScoreConstants = 2;
966480093f4SDimitry Andric /// Instructions with the same opcode.
967480093f4SDimitry Andric static const int ScoreSameOpcode = 2;
968480093f4SDimitry Andric /// Instructions with alt opcodes (e.g, add + sub).
969480093f4SDimitry Andric static const int ScoreAltOpcodes = 1;
970480093f4SDimitry Andric /// Identical instructions (a.k.a. splat or broadcast).
971480093f4SDimitry Andric static const int ScoreSplat = 1;
972480093f4SDimitry Andric /// Matching with an undef is preferable to failing.
973480093f4SDimitry Andric static const int ScoreUndef = 1;
974480093f4SDimitry Andric /// Score for failing to find a decent match.
975480093f4SDimitry Andric static const int ScoreFail = 0;
976480093f4SDimitry Andric /// User exteranl to the vectorized code.
977480093f4SDimitry Andric static const int ExternalUseCost = 1;
978480093f4SDimitry Andric /// The user is internal but in a different lane.
979480093f4SDimitry Andric static const int UserInDiffLaneCost = ExternalUseCost;
980480093f4SDimitry Andric
981480093f4SDimitry Andric /// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
getShallowScore(Value * V1,Value * V2,const DataLayout & DL,ScalarEvolution & SE)982480093f4SDimitry Andric static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
983480093f4SDimitry Andric ScalarEvolution &SE) {
984480093f4SDimitry Andric auto *LI1 = dyn_cast<LoadInst>(V1);
985480093f4SDimitry Andric auto *LI2 = dyn_cast<LoadInst>(V2);
9865f7ddb14SDimitry Andric if (LI1 && LI2) {
9875f7ddb14SDimitry Andric if (LI1->getParent() != LI2->getParent())
9885f7ddb14SDimitry Andric return VLOperands::ScoreFail;
9895f7ddb14SDimitry Andric
9905f7ddb14SDimitry Andric Optional<int> Dist = getPointersDiff(
9915f7ddb14SDimitry Andric LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
9925f7ddb14SDimitry Andric LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
9935f7ddb14SDimitry Andric return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
994480093f4SDimitry Andric : VLOperands::ScoreFail;
9955f7ddb14SDimitry Andric }
996480093f4SDimitry Andric
997480093f4SDimitry Andric auto *C1 = dyn_cast<Constant>(V1);
998480093f4SDimitry Andric auto *C2 = dyn_cast<Constant>(V2);
999480093f4SDimitry Andric if (C1 && C2)
1000480093f4SDimitry Andric return VLOperands::ScoreConstants;
1001480093f4SDimitry Andric
1002480093f4SDimitry Andric // Extracts from consecutive indexes of the same vector better score as
1003480093f4SDimitry Andric // the extracts could be optimized away.
100447395794SDimitry Andric Value *EV;
100547395794SDimitry Andric ConstantInt *Ex1Idx, *Ex2Idx;
10065ffd83dbSDimitry Andric if (match(V1, m_ExtractElt(m_Value(EV), m_ConstantInt(Ex1Idx))) &&
10075ffd83dbSDimitry Andric match(V2, m_ExtractElt(m_Deferred(EV), m_ConstantInt(Ex2Idx))) &&
100847395794SDimitry Andric Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue())
1009480093f4SDimitry Andric return VLOperands::ScoreConsecutiveExtracts;
1010480093f4SDimitry Andric
1011480093f4SDimitry Andric auto *I1 = dyn_cast<Instruction>(V1);
1012480093f4SDimitry Andric auto *I2 = dyn_cast<Instruction>(V2);
1013480093f4SDimitry Andric if (I1 && I2) {
1014480093f4SDimitry Andric if (I1 == I2)
1015480093f4SDimitry Andric return VLOperands::ScoreSplat;
1016480093f4SDimitry Andric InstructionsState S = getSameOpcode({I1, I2});
1017480093f4SDimitry Andric // Note: Only consider instructions with <= 2 operands to avoid
1018480093f4SDimitry Andric // complexity explosion.
1019480093f4SDimitry Andric if (S.getOpcode() && S.MainOp->getNumOperands() <= 2)
1020480093f4SDimitry Andric return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
1021480093f4SDimitry Andric : VLOperands::ScoreSameOpcode;
1022480093f4SDimitry Andric }
1023480093f4SDimitry Andric
1024480093f4SDimitry Andric if (isa<UndefValue>(V2))
1025480093f4SDimitry Andric return VLOperands::ScoreUndef;
1026480093f4SDimitry Andric
1027480093f4SDimitry Andric return VLOperands::ScoreFail;
1028480093f4SDimitry Andric }
1029480093f4SDimitry Andric
1030480093f4SDimitry Andric /// Holds the values and their lane that are taking part in the look-ahead
1031480093f4SDimitry Andric /// score calculation. This is used in the external uses cost calculation.
1032480093f4SDimitry Andric SmallDenseMap<Value *, int> InLookAheadValues;
1033480093f4SDimitry Andric
1034480093f4SDimitry Andric /// \Returns the additinal cost due to uses of \p LHS and \p RHS that are
1035480093f4SDimitry Andric /// either external to the vectorized code, or require shuffling.
getExternalUsesCost(const std::pair<Value *,int> & LHS,const std::pair<Value *,int> & RHS)1036480093f4SDimitry Andric int getExternalUsesCost(const std::pair<Value *, int> &LHS,
1037480093f4SDimitry Andric const std::pair<Value *, int> &RHS) {
1038480093f4SDimitry Andric int Cost = 0;
10395ffd83dbSDimitry Andric std::array<std::pair<Value *, int>, 2> Values = {{LHS, RHS}};
1040480093f4SDimitry Andric for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
1041480093f4SDimitry Andric Value *V = Values[Idx].first;
1042af732203SDimitry Andric if (isa<Constant>(V)) {
1043af732203SDimitry Andric // Since this is a function pass, it doesn't make semantic sense to
1044af732203SDimitry Andric // walk the users of a subclass of Constant. The users could be in
1045af732203SDimitry Andric // another function, or even another module that happens to be in
1046af732203SDimitry Andric // the same LLVMContext.
1047af732203SDimitry Andric continue;
1048af732203SDimitry Andric }
1049af732203SDimitry Andric
1050480093f4SDimitry Andric // Calculate the absolute lane, using the minimum relative lane of LHS
1051480093f4SDimitry Andric // and RHS as base and Idx as the offset.
1052480093f4SDimitry Andric int Ln = std::min(LHS.second, RHS.second) + Idx;
1053480093f4SDimitry Andric assert(Ln >= 0 && "Bad lane calculation");
1054480093f4SDimitry Andric unsigned UsersBudget = LookAheadUsersBudget;
1055480093f4SDimitry Andric for (User *U : V->users()) {
1056480093f4SDimitry Andric if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
1057480093f4SDimitry Andric // The user is in the VectorizableTree. Check if we need to insert.
1058480093f4SDimitry Andric auto It = llvm::find(UserTE->Scalars, U);
1059480093f4SDimitry Andric assert(It != UserTE->Scalars.end() && "U is in UserTE");
1060480093f4SDimitry Andric int UserLn = std::distance(UserTE->Scalars.begin(), It);
1061480093f4SDimitry Andric assert(UserLn >= 0 && "Bad lane");
1062480093f4SDimitry Andric if (UserLn != Ln)
1063480093f4SDimitry Andric Cost += UserInDiffLaneCost;
1064480093f4SDimitry Andric } else {
1065480093f4SDimitry Andric // Check if the user is in the look-ahead code.
1066480093f4SDimitry Andric auto It2 = InLookAheadValues.find(U);
1067480093f4SDimitry Andric if (It2 != InLookAheadValues.end()) {
1068480093f4SDimitry Andric // The user is in the look-ahead code. Check the lane.
1069480093f4SDimitry Andric if (It2->second != Ln)
1070480093f4SDimitry Andric Cost += UserInDiffLaneCost;
1071480093f4SDimitry Andric } else {
1072480093f4SDimitry Andric // The user is neither in SLP tree nor in the look-ahead code.
1073480093f4SDimitry Andric Cost += ExternalUseCost;
1074480093f4SDimitry Andric }
1075480093f4SDimitry Andric }
1076480093f4SDimitry Andric // Limit the number of visited uses to cap compilation time.
1077480093f4SDimitry Andric if (--UsersBudget == 0)
1078480093f4SDimitry Andric break;
1079480093f4SDimitry Andric }
1080480093f4SDimitry Andric }
1081480093f4SDimitry Andric return Cost;
1082480093f4SDimitry Andric }
1083480093f4SDimitry Andric
1084480093f4SDimitry Andric /// Go through the operands of \p LHS and \p RHS recursively until \p
1085480093f4SDimitry Andric /// MaxLevel, and return the cummulative score. For example:
1086480093f4SDimitry Andric /// \verbatim
1087480093f4SDimitry Andric /// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1]
1088480093f4SDimitry Andric /// \ / \ / \ / \ /
1089480093f4SDimitry Andric /// + + + +
1090480093f4SDimitry Andric /// G1 G2 G3 G4
1091480093f4SDimitry Andric /// \endverbatim
1092480093f4SDimitry Andric /// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at
1093480093f4SDimitry Andric /// each level recursively, accumulating the score. It starts from matching
1094480093f4SDimitry Andric /// the additions at level 0, then moves on to the loads (level 1). The
1095480093f4SDimitry Andric /// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
1096480093f4SDimitry Andric /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
1097480093f4SDimitry Andric /// {A[0],C[0]} has a score of VLOperands::ScoreFail.
1098480093f4SDimitry Andric /// Please note that the order of the operands does not matter, as we
1099480093f4SDimitry Andric /// evaluate the score of all profitable combinations of operands. In
1100480093f4SDimitry Andric /// other words the score of G1 and G4 is the same as G1 and G2. This
1101480093f4SDimitry Andric /// heuristic is based on ideas described in:
1102480093f4SDimitry Andric /// Look-ahead SLP: Auto-vectorization in the presence of commutative
1103480093f4SDimitry Andric /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
1104480093f4SDimitry Andric /// Luís F. W. Góes
getScoreAtLevelRec(const std::pair<Value *,int> & LHS,const std::pair<Value *,int> & RHS,int CurrLevel,int MaxLevel)1105480093f4SDimitry Andric int getScoreAtLevelRec(const std::pair<Value *, int> &LHS,
1106480093f4SDimitry Andric const std::pair<Value *, int> &RHS, int CurrLevel,
1107480093f4SDimitry Andric int MaxLevel) {
1108480093f4SDimitry Andric
1109480093f4SDimitry Andric Value *V1 = LHS.first;
1110480093f4SDimitry Andric Value *V2 = RHS.first;
1111480093f4SDimitry Andric // Get the shallow score of V1 and V2.
1112480093f4SDimitry Andric int ShallowScoreAtThisLevel =
1113480093f4SDimitry Andric std::max((int)ScoreFail, getShallowScore(V1, V2, DL, SE) -
1114480093f4SDimitry Andric getExternalUsesCost(LHS, RHS));
1115480093f4SDimitry Andric int Lane1 = LHS.second;
1116480093f4SDimitry Andric int Lane2 = RHS.second;
1117480093f4SDimitry Andric
1118480093f4SDimitry Andric // If reached MaxLevel,
1119480093f4SDimitry Andric // or if V1 and V2 are not instructions,
1120480093f4SDimitry Andric // or if they are SPLAT,
1121480093f4SDimitry Andric // or if they are not consecutive, early return the current cost.
1122480093f4SDimitry Andric auto *I1 = dyn_cast<Instruction>(V1);
1123480093f4SDimitry Andric auto *I2 = dyn_cast<Instruction>(V2);
1124480093f4SDimitry Andric if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
1125480093f4SDimitry Andric ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
1126480093f4SDimitry Andric (isa<LoadInst>(I1) && isa<LoadInst>(I2) && ShallowScoreAtThisLevel))
1127480093f4SDimitry Andric return ShallowScoreAtThisLevel;
1128480093f4SDimitry Andric assert(I1 && I2 && "Should have early exited.");
1129480093f4SDimitry Andric
1130480093f4SDimitry Andric // Keep track of in-tree values for determining the external-use cost.
1131480093f4SDimitry Andric InLookAheadValues[V1] = Lane1;
1132480093f4SDimitry Andric InLookAheadValues[V2] = Lane2;
1133480093f4SDimitry Andric
1134480093f4SDimitry Andric // Contains the I2 operand indexes that got matched with I1 operands.
1135480093f4SDimitry Andric SmallSet<unsigned, 4> Op2Used;
1136480093f4SDimitry Andric
1137480093f4SDimitry Andric // Recursion towards the operands of I1 and I2. We are trying all possbile
1138480093f4SDimitry Andric // operand pairs, and keeping track of the best score.
1139480093f4SDimitry Andric for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
1140480093f4SDimitry Andric OpIdx1 != NumOperands1; ++OpIdx1) {
1141480093f4SDimitry Andric // Try to pair op1I with the best operand of I2.
1142480093f4SDimitry Andric int MaxTmpScore = 0;
1143480093f4SDimitry Andric unsigned MaxOpIdx2 = 0;
1144480093f4SDimitry Andric bool FoundBest = false;
1145480093f4SDimitry Andric // If I2 is commutative try all combinations.
1146480093f4SDimitry Andric unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1;
1147480093f4SDimitry Andric unsigned ToIdx = isCommutative(I2)
1148480093f4SDimitry Andric ? I2->getNumOperands()
1149480093f4SDimitry Andric : std::min(I2->getNumOperands(), OpIdx1 + 1);
1150480093f4SDimitry Andric assert(FromIdx <= ToIdx && "Bad index");
1151480093f4SDimitry Andric for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) {
1152480093f4SDimitry Andric // Skip operands already paired with OpIdx1.
1153480093f4SDimitry Andric if (Op2Used.count(OpIdx2))
1154480093f4SDimitry Andric continue;
1155480093f4SDimitry Andric // Recursively calculate the cost at each level
1156480093f4SDimitry Andric int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1},
1157480093f4SDimitry Andric {I2->getOperand(OpIdx2), Lane2},
1158480093f4SDimitry Andric CurrLevel + 1, MaxLevel);
1159480093f4SDimitry Andric // Look for the best score.
1160480093f4SDimitry Andric if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
1161480093f4SDimitry Andric MaxTmpScore = TmpScore;
1162480093f4SDimitry Andric MaxOpIdx2 = OpIdx2;
1163480093f4SDimitry Andric FoundBest = true;
1164480093f4SDimitry Andric }
1165480093f4SDimitry Andric }
1166480093f4SDimitry Andric if (FoundBest) {
1167480093f4SDimitry Andric // Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it.
1168480093f4SDimitry Andric Op2Used.insert(MaxOpIdx2);
1169480093f4SDimitry Andric ShallowScoreAtThisLevel += MaxTmpScore;
1170480093f4SDimitry Andric }
1171480093f4SDimitry Andric }
1172480093f4SDimitry Andric return ShallowScoreAtThisLevel;
1173480093f4SDimitry Andric }
1174480093f4SDimitry Andric
1175480093f4SDimitry Andric /// \Returns the look-ahead score, which tells us how much the sub-trees
1176480093f4SDimitry Andric /// rooted at \p LHS and \p RHS match, the more they match the higher the
1177480093f4SDimitry Andric /// score. This helps break ties in an informed way when we cannot decide on
1178480093f4SDimitry Andric /// the order of the operands by just considering the immediate
1179480093f4SDimitry Andric /// predecessors.
getLookAheadScore(const std::pair<Value *,int> & LHS,const std::pair<Value *,int> & RHS)1180480093f4SDimitry Andric int getLookAheadScore(const std::pair<Value *, int> &LHS,
1181480093f4SDimitry Andric const std::pair<Value *, int> &RHS) {
1182480093f4SDimitry Andric InLookAheadValues.clear();
1183480093f4SDimitry Andric return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth);
1184480093f4SDimitry Andric }
1185480093f4SDimitry Andric
11860b57cec5SDimitry Andric // Search all operands in Ops[*][Lane] for the one that matches best
11870b57cec5SDimitry Andric // Ops[OpIdx][LastLane] and return its opreand index.
11880b57cec5SDimitry Andric // If no good match can be found, return None.
11890b57cec5SDimitry Andric Optional<unsigned>
getBestOperand(unsigned OpIdx,int Lane,int LastLane,ArrayRef<ReorderingMode> ReorderingModes)11900b57cec5SDimitry Andric getBestOperand(unsigned OpIdx, int Lane, int LastLane,
11910b57cec5SDimitry Andric ArrayRef<ReorderingMode> ReorderingModes) {
11920b57cec5SDimitry Andric unsigned NumOperands = getNumOperands();
11930b57cec5SDimitry Andric
11940b57cec5SDimitry Andric // The operand of the previous lane at OpIdx.
11950b57cec5SDimitry Andric Value *OpLastLane = getData(OpIdx, LastLane).V;
11960b57cec5SDimitry Andric
11970b57cec5SDimitry Andric // Our strategy mode for OpIdx.
11980b57cec5SDimitry Andric ReorderingMode RMode = ReorderingModes[OpIdx];
11990b57cec5SDimitry Andric
12000b57cec5SDimitry Andric // The linearized opcode of the operand at OpIdx, Lane.
12010b57cec5SDimitry Andric bool OpIdxAPO = getData(OpIdx, Lane).APO;
12020b57cec5SDimitry Andric
12030b57cec5SDimitry Andric // The best operand index and its score.
12040b57cec5SDimitry Andric // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
12050b57cec5SDimitry Andric // are using the score to differentiate between the two.
12060b57cec5SDimitry Andric struct BestOpData {
12070b57cec5SDimitry Andric Optional<unsigned> Idx = None;
12080b57cec5SDimitry Andric unsigned Score = 0;
12090b57cec5SDimitry Andric } BestOp;
12100b57cec5SDimitry Andric
12110b57cec5SDimitry Andric // Iterate through all unused operands and look for the best.
12120b57cec5SDimitry Andric for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
12130b57cec5SDimitry Andric // Get the operand at Idx and Lane.
12140b57cec5SDimitry Andric OperandData &OpData = getData(Idx, Lane);
12150b57cec5SDimitry Andric Value *Op = OpData.V;
12160b57cec5SDimitry Andric bool OpAPO = OpData.APO;
12170b57cec5SDimitry Andric
12180b57cec5SDimitry Andric // Skip already selected operands.
12190b57cec5SDimitry Andric if (OpData.IsUsed)
12200b57cec5SDimitry Andric continue;
12210b57cec5SDimitry Andric
12220b57cec5SDimitry Andric // Skip if we are trying to move the operand to a position with a
12230b57cec5SDimitry Andric // different opcode in the linearized tree form. This would break the
12240b57cec5SDimitry Andric // semantics.
12250b57cec5SDimitry Andric if (OpAPO != OpIdxAPO)
12260b57cec5SDimitry Andric continue;
12270b57cec5SDimitry Andric
12280b57cec5SDimitry Andric // Look for an operand that matches the current mode.
12290b57cec5SDimitry Andric switch (RMode) {
12300b57cec5SDimitry Andric case ReorderingMode::Load:
1231480093f4SDimitry Andric case ReorderingMode::Constant:
1232480093f4SDimitry Andric case ReorderingMode::Opcode: {
12330b57cec5SDimitry Andric bool LeftToRight = Lane > LastLane;
12340b57cec5SDimitry Andric Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
12350b57cec5SDimitry Andric Value *OpRight = (LeftToRight) ? Op : OpLastLane;
1236480093f4SDimitry Andric unsigned Score =
1237480093f4SDimitry Andric getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane});
12380b57cec5SDimitry Andric if (Score > BestOp.Score) {
12390b57cec5SDimitry Andric BestOp.Idx = Idx;
12400b57cec5SDimitry Andric BestOp.Score = Score;
12410b57cec5SDimitry Andric }
12420b57cec5SDimitry Andric break;
12430b57cec5SDimitry Andric }
12440b57cec5SDimitry Andric case ReorderingMode::Splat:
12450b57cec5SDimitry Andric if (Op == OpLastLane)
12460b57cec5SDimitry Andric BestOp.Idx = Idx;
12470b57cec5SDimitry Andric break;
12480b57cec5SDimitry Andric case ReorderingMode::Failed:
12490b57cec5SDimitry Andric return None;
12500b57cec5SDimitry Andric }
12510b57cec5SDimitry Andric }
12520b57cec5SDimitry Andric
12530b57cec5SDimitry Andric if (BestOp.Idx) {
12540b57cec5SDimitry Andric getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
12550b57cec5SDimitry Andric return BestOp.Idx;
12560b57cec5SDimitry Andric }
12570b57cec5SDimitry Andric // If we could not find a good match return None.
12580b57cec5SDimitry Andric return None;
12590b57cec5SDimitry Andric }
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric /// Helper for reorderOperandVecs. \Returns the lane that we should start
12620b57cec5SDimitry Andric /// reordering from. This is the one which has the least number of operands
12630b57cec5SDimitry Andric /// that can freely move about.
getBestLaneToStartReordering() const12640b57cec5SDimitry Andric unsigned getBestLaneToStartReordering() const {
12650b57cec5SDimitry Andric unsigned BestLane = 0;
12660b57cec5SDimitry Andric unsigned Min = UINT_MAX;
12670b57cec5SDimitry Andric for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
12680b57cec5SDimitry Andric ++Lane) {
12690b57cec5SDimitry Andric unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
12700b57cec5SDimitry Andric if (NumFreeOps < Min) {
12710b57cec5SDimitry Andric Min = NumFreeOps;
12720b57cec5SDimitry Andric BestLane = Lane;
12730b57cec5SDimitry Andric }
12740b57cec5SDimitry Andric }
12750b57cec5SDimitry Andric return BestLane;
12760b57cec5SDimitry Andric }
12770b57cec5SDimitry Andric
12780b57cec5SDimitry Andric /// \Returns the maximum number of operands that are allowed to be reordered
12790b57cec5SDimitry Andric /// for \p Lane. This is used as a heuristic for selecting the first lane to
12800b57cec5SDimitry Andric /// start operand reordering.
getMaxNumOperandsThatCanBeReordered(unsigned Lane) const12810b57cec5SDimitry Andric unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
12820b57cec5SDimitry Andric unsigned CntTrue = 0;
12830b57cec5SDimitry Andric unsigned NumOperands = getNumOperands();
12840b57cec5SDimitry Andric // Operands with the same APO can be reordered. We therefore need to count
12850b57cec5SDimitry Andric // how many of them we have for each APO, like this: Cnt[APO] = x.
12860b57cec5SDimitry Andric // Since we only have two APOs, namely true and false, we can avoid using
12870b57cec5SDimitry Andric // a map. Instead we can simply count the number of operands that
12880b57cec5SDimitry Andric // correspond to one of them (in this case the 'true' APO), and calculate
12890b57cec5SDimitry Andric // the other by subtracting it from the total number of operands.
12900b57cec5SDimitry Andric for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
12910b57cec5SDimitry Andric if (getData(OpIdx, Lane).APO)
12920b57cec5SDimitry Andric ++CntTrue;
12930b57cec5SDimitry Andric unsigned CntFalse = NumOperands - CntTrue;
12940b57cec5SDimitry Andric return std::max(CntTrue, CntFalse);
12950b57cec5SDimitry Andric }
12960b57cec5SDimitry Andric
12970b57cec5SDimitry Andric /// Go through the instructions in VL and append their operands.
appendOperandsOfVL(ArrayRef<Value * > VL)12980b57cec5SDimitry Andric void appendOperandsOfVL(ArrayRef<Value *> VL) {
12990b57cec5SDimitry Andric assert(!VL.empty() && "Bad VL");
13000b57cec5SDimitry Andric assert((empty() || VL.size() == getNumLanes()) &&
13010b57cec5SDimitry Andric "Expected same number of lanes");
13020b57cec5SDimitry Andric assert(isa<Instruction>(VL[0]) && "Expected instruction");
13030b57cec5SDimitry Andric unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
13040b57cec5SDimitry Andric OpsVec.resize(NumOperands);
13050b57cec5SDimitry Andric unsigned NumLanes = VL.size();
13060b57cec5SDimitry Andric for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
13070b57cec5SDimitry Andric OpsVec[OpIdx].resize(NumLanes);
13080b57cec5SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
13090b57cec5SDimitry Andric assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
13100b57cec5SDimitry Andric // Our tree has just 3 nodes: the root and two operands.
13110b57cec5SDimitry Andric // It is therefore trivial to get the APO. We only need to check the
13120b57cec5SDimitry Andric // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
13130b57cec5SDimitry Andric // RHS operand. The LHS operand of both add and sub is never attached
13140b57cec5SDimitry Andric // to an inversese operation in the linearized form, therefore its APO
13150b57cec5SDimitry Andric // is false. The RHS is true only if VL[Lane] is an inverse operation.
13160b57cec5SDimitry Andric
13170b57cec5SDimitry Andric // Since operand reordering is performed on groups of commutative
13180b57cec5SDimitry Andric // operations or alternating sequences (e.g., +, -), we can safely
13190b57cec5SDimitry Andric // tell the inverse operations by checking commutativity.
13200b57cec5SDimitry Andric bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
13210b57cec5SDimitry Andric bool APO = (OpIdx == 0) ? false : IsInverseOperation;
13220b57cec5SDimitry Andric OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
13230b57cec5SDimitry Andric APO, false};
13240b57cec5SDimitry Andric }
13250b57cec5SDimitry Andric }
13260b57cec5SDimitry Andric }
13270b57cec5SDimitry Andric
13280b57cec5SDimitry Andric /// \returns the number of operands.
getNumOperands() const13290b57cec5SDimitry Andric unsigned getNumOperands() const { return OpsVec.size(); }
13300b57cec5SDimitry Andric
13310b57cec5SDimitry Andric /// \returns the number of lanes.
getNumLanes() const13320b57cec5SDimitry Andric unsigned getNumLanes() const { return OpsVec[0].size(); }
13330b57cec5SDimitry Andric
13340b57cec5SDimitry Andric /// \returns the operand value at \p OpIdx and \p Lane.
getValue(unsigned OpIdx,unsigned Lane) const13350b57cec5SDimitry Andric Value *getValue(unsigned OpIdx, unsigned Lane) const {
13360b57cec5SDimitry Andric return getData(OpIdx, Lane).V;
13370b57cec5SDimitry Andric }
13380b57cec5SDimitry Andric
13390b57cec5SDimitry Andric /// \returns true if the data structure is empty.
empty() const13400b57cec5SDimitry Andric bool empty() const { return OpsVec.empty(); }
13410b57cec5SDimitry Andric
13420b57cec5SDimitry Andric /// Clears the data.
clear()13430b57cec5SDimitry Andric void clear() { OpsVec.clear(); }
13440b57cec5SDimitry Andric
13450b57cec5SDimitry Andric /// \Returns true if there are enough operands identical to \p Op to fill
13460b57cec5SDimitry Andric /// the whole vector.
13470b57cec5SDimitry Andric /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
shouldBroadcast(Value * Op,unsigned OpIdx,unsigned Lane)13480b57cec5SDimitry Andric bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
13490b57cec5SDimitry Andric bool OpAPO = getData(OpIdx, Lane).APO;
13500b57cec5SDimitry Andric for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
13510b57cec5SDimitry Andric if (Ln == Lane)
13520b57cec5SDimitry Andric continue;
13530b57cec5SDimitry Andric // This is set to true if we found a candidate for broadcast at Lane.
13540b57cec5SDimitry Andric bool FoundCandidate = false;
13550b57cec5SDimitry Andric for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
13560b57cec5SDimitry Andric OperandData &Data = getData(OpI, Ln);
13570b57cec5SDimitry Andric if (Data.APO != OpAPO || Data.IsUsed)
13580b57cec5SDimitry Andric continue;
13590b57cec5SDimitry Andric if (Data.V == Op) {
13600b57cec5SDimitry Andric FoundCandidate = true;
13610b57cec5SDimitry Andric Data.IsUsed = true;
13620b57cec5SDimitry Andric break;
13630b57cec5SDimitry Andric }
13640b57cec5SDimitry Andric }
13650b57cec5SDimitry Andric if (!FoundCandidate)
13660b57cec5SDimitry Andric return false;
13670b57cec5SDimitry Andric }
13680b57cec5SDimitry Andric return true;
13690b57cec5SDimitry Andric }
13700b57cec5SDimitry Andric
13710b57cec5SDimitry Andric public:
13720b57cec5SDimitry Andric /// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value * > RootVL,const DataLayout & DL,ScalarEvolution & SE,const BoUpSLP & R)13730b57cec5SDimitry Andric VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
1374480093f4SDimitry Andric ScalarEvolution &SE, const BoUpSLP &R)
1375480093f4SDimitry Andric : DL(DL), SE(SE), R(R) {
13760b57cec5SDimitry Andric // Append all the operands of RootVL.
13770b57cec5SDimitry Andric appendOperandsOfVL(RootVL);
13780b57cec5SDimitry Andric }
13790b57cec5SDimitry Andric
13800b57cec5SDimitry Andric /// \Returns a value vector with the operands across all lanes for the
13810b57cec5SDimitry Andric /// opearnd at \p OpIdx.
getVL(unsigned OpIdx) const13820b57cec5SDimitry Andric ValueList getVL(unsigned OpIdx) const {
13830b57cec5SDimitry Andric ValueList OpVL(OpsVec[OpIdx].size());
13840b57cec5SDimitry Andric assert(OpsVec[OpIdx].size() == getNumLanes() &&
13850b57cec5SDimitry Andric "Expected same num of lanes across all operands");
13860b57cec5SDimitry Andric for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
13870b57cec5SDimitry Andric OpVL[Lane] = OpsVec[OpIdx][Lane].V;
13880b57cec5SDimitry Andric return OpVL;
13890b57cec5SDimitry Andric }
13900b57cec5SDimitry Andric
13910b57cec5SDimitry Andric // Performs operand reordering for 2 or more operands.
13920b57cec5SDimitry Andric // The original operands are in OrigOps[OpIdx][Lane].
13930b57cec5SDimitry Andric // The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
reorder()13940b57cec5SDimitry Andric void reorder() {
13950b57cec5SDimitry Andric unsigned NumOperands = getNumOperands();
13960b57cec5SDimitry Andric unsigned NumLanes = getNumLanes();
13970b57cec5SDimitry Andric // Each operand has its own mode. We are using this mode to help us select
13980b57cec5SDimitry Andric // the instructions for each lane, so that they match best with the ones
13990b57cec5SDimitry Andric // we have selected so far.
14000b57cec5SDimitry Andric SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
14010b57cec5SDimitry Andric
14020b57cec5SDimitry Andric // This is a greedy single-pass algorithm. We are going over each lane
14030b57cec5SDimitry Andric // once and deciding on the best order right away with no back-tracking.
14040b57cec5SDimitry Andric // However, in order to increase its effectiveness, we start with the lane
14050b57cec5SDimitry Andric // that has operands that can move the least. For example, given the
14060b57cec5SDimitry Andric // following lanes:
14070b57cec5SDimitry Andric // Lane 0 : A[0] = B[0] + C[0] // Visited 3rd
14080b57cec5SDimitry Andric // Lane 1 : A[1] = C[1] - B[1] // Visited 1st
14090b57cec5SDimitry Andric // Lane 2 : A[2] = B[2] + C[2] // Visited 2nd
14100b57cec5SDimitry Andric // Lane 3 : A[3] = C[3] - B[3] // Visited 4th
14110b57cec5SDimitry Andric // we will start at Lane 1, since the operands of the subtraction cannot
14120b57cec5SDimitry Andric // be reordered. Then we will visit the rest of the lanes in a circular
14130b57cec5SDimitry Andric // fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
14140b57cec5SDimitry Andric
14150b57cec5SDimitry Andric // Find the first lane that we will start our search from.
14160b57cec5SDimitry Andric unsigned FirstLane = getBestLaneToStartReordering();
14170b57cec5SDimitry Andric
14180b57cec5SDimitry Andric // Initialize the modes.
14190b57cec5SDimitry Andric for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
14200b57cec5SDimitry Andric Value *OpLane0 = getValue(OpIdx, FirstLane);
14210b57cec5SDimitry Andric // Keep track if we have instructions with all the same opcode on one
14220b57cec5SDimitry Andric // side.
14230b57cec5SDimitry Andric if (isa<LoadInst>(OpLane0))
14240b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Load;
14250b57cec5SDimitry Andric else if (isa<Instruction>(OpLane0)) {
14260b57cec5SDimitry Andric // Check if OpLane0 should be broadcast.
14270b57cec5SDimitry Andric if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
14280b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Splat;
14290b57cec5SDimitry Andric else
14300b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Opcode;
14310b57cec5SDimitry Andric }
14320b57cec5SDimitry Andric else if (isa<Constant>(OpLane0))
14330b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Constant;
14340b57cec5SDimitry Andric else if (isa<Argument>(OpLane0))
14350b57cec5SDimitry Andric // Our best hope is a Splat. It may save some cost in some cases.
14360b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Splat;
14370b57cec5SDimitry Andric else
14380b57cec5SDimitry Andric // NOTE: This should be unreachable.
14390b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Failed;
14400b57cec5SDimitry Andric }
14410b57cec5SDimitry Andric
14420b57cec5SDimitry Andric // If the initial strategy fails for any of the operand indexes, then we
14430b57cec5SDimitry Andric // perform reordering again in a second pass. This helps avoid assigning
14440b57cec5SDimitry Andric // high priority to the failed strategy, and should improve reordering for
14450b57cec5SDimitry Andric // the non-failed operand indexes.
14460b57cec5SDimitry Andric for (int Pass = 0; Pass != 2; ++Pass) {
14470b57cec5SDimitry Andric // Skip the second pass if the first pass did not fail.
14480b57cec5SDimitry Andric bool StrategyFailed = false;
14490b57cec5SDimitry Andric // Mark all operand data as free to use.
14500b57cec5SDimitry Andric clearUsed();
14510b57cec5SDimitry Andric // We keep the original operand order for the FirstLane, so reorder the
14520b57cec5SDimitry Andric // rest of the lanes. We are visiting the nodes in a circular fashion,
14530b57cec5SDimitry Andric // using FirstLane as the center point and increasing the radius
14540b57cec5SDimitry Andric // distance.
14550b57cec5SDimitry Andric for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
14560b57cec5SDimitry Andric // Visit the lane on the right and then the lane on the left.
14570b57cec5SDimitry Andric for (int Direction : {+1, -1}) {
14580b57cec5SDimitry Andric int Lane = FirstLane + Direction * Distance;
14590b57cec5SDimitry Andric if (Lane < 0 || Lane >= (int)NumLanes)
14600b57cec5SDimitry Andric continue;
14610b57cec5SDimitry Andric int LastLane = Lane - Direction;
14620b57cec5SDimitry Andric assert(LastLane >= 0 && LastLane < (int)NumLanes &&
14630b57cec5SDimitry Andric "Out of bounds");
14640b57cec5SDimitry Andric // Look for a good match for each operand.
14650b57cec5SDimitry Andric for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
14660b57cec5SDimitry Andric // Search for the operand that matches SortedOps[OpIdx][Lane-1].
14670b57cec5SDimitry Andric Optional<unsigned> BestIdx =
14680b57cec5SDimitry Andric getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
14690b57cec5SDimitry Andric // By not selecting a value, we allow the operands that follow to
14700b57cec5SDimitry Andric // select a better matching value. We will get a non-null value in
14710b57cec5SDimitry Andric // the next run of getBestOperand().
14720b57cec5SDimitry Andric if (BestIdx) {
14730b57cec5SDimitry Andric // Swap the current operand with the one returned by
14740b57cec5SDimitry Andric // getBestOperand().
14750b57cec5SDimitry Andric swap(OpIdx, BestIdx.getValue(), Lane);
14760b57cec5SDimitry Andric } else {
14770b57cec5SDimitry Andric // We failed to find a best operand, set mode to 'Failed'.
14780b57cec5SDimitry Andric ReorderingModes[OpIdx] = ReorderingMode::Failed;
14790b57cec5SDimitry Andric // Enable the second pass.
14800b57cec5SDimitry Andric StrategyFailed = true;
14810b57cec5SDimitry Andric }
14820b57cec5SDimitry Andric }
14830b57cec5SDimitry Andric }
14840b57cec5SDimitry Andric }
14850b57cec5SDimitry Andric // Skip second pass if the strategy did not fail.
14860b57cec5SDimitry Andric if (!StrategyFailed)
14870b57cec5SDimitry Andric break;
14880b57cec5SDimitry Andric }
14890b57cec5SDimitry Andric }
14900b57cec5SDimitry Andric
14910b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
getModeStr(ReorderingMode RMode)14920b57cec5SDimitry Andric LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
14930b57cec5SDimitry Andric switch (RMode) {
14940b57cec5SDimitry Andric case ReorderingMode::Load:
14950b57cec5SDimitry Andric return "Load";
14960b57cec5SDimitry Andric case ReorderingMode::Opcode:
14970b57cec5SDimitry Andric return "Opcode";
14980b57cec5SDimitry Andric case ReorderingMode::Constant:
14990b57cec5SDimitry Andric return "Constant";
15000b57cec5SDimitry Andric case ReorderingMode::Splat:
15010b57cec5SDimitry Andric return "Splat";
15020b57cec5SDimitry Andric case ReorderingMode::Failed:
15030b57cec5SDimitry Andric return "Failed";
15040b57cec5SDimitry Andric }
15050b57cec5SDimitry Andric llvm_unreachable("Unimplemented Reordering Type");
15060b57cec5SDimitry Andric }
15070b57cec5SDimitry Andric
printMode(ReorderingMode RMode,raw_ostream & OS)15080b57cec5SDimitry Andric LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
15090b57cec5SDimitry Andric raw_ostream &OS) {
15100b57cec5SDimitry Andric return OS << getModeStr(RMode);
15110b57cec5SDimitry Andric }
15120b57cec5SDimitry Andric
15130b57cec5SDimitry Andric /// Debug print.
dumpMode(ReorderingMode RMode)15140b57cec5SDimitry Andric LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
15150b57cec5SDimitry Andric printMode(RMode, dbgs());
15160b57cec5SDimitry Andric }
15170b57cec5SDimitry Andric
operator <<(raw_ostream & OS,ReorderingMode RMode)15180b57cec5SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
15190b57cec5SDimitry Andric return printMode(RMode, OS);
15200b57cec5SDimitry Andric }
15210b57cec5SDimitry Andric
print(raw_ostream & OS) const15220b57cec5SDimitry Andric LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
15230b57cec5SDimitry Andric const unsigned Indent = 2;
15240b57cec5SDimitry Andric unsigned Cnt = 0;
15250b57cec5SDimitry Andric for (const OperandDataVec &OpDataVec : OpsVec) {
15260b57cec5SDimitry Andric OS << "Operand " << Cnt++ << "\n";
15270b57cec5SDimitry Andric for (const OperandData &OpData : OpDataVec) {
15280b57cec5SDimitry Andric OS.indent(Indent) << "{";
15290b57cec5SDimitry Andric if (Value *V = OpData.V)
15300b57cec5SDimitry Andric OS << *V;
15310b57cec5SDimitry Andric else
15320b57cec5SDimitry Andric OS << "null";
15330b57cec5SDimitry Andric OS << ", APO:" << OpData.APO << "}\n";
15340b57cec5SDimitry Andric }
15350b57cec5SDimitry Andric OS << "\n";
15360b57cec5SDimitry Andric }
15370b57cec5SDimitry Andric return OS;
15380b57cec5SDimitry Andric }
15390b57cec5SDimitry Andric
15400b57cec5SDimitry Andric /// Debug print.
dump() const15410b57cec5SDimitry Andric LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
15420b57cec5SDimitry Andric #endif
15430b57cec5SDimitry Andric };
15440b57cec5SDimitry Andric
15458bcb0991SDimitry Andric /// Checks if the instruction is marked for deletion.
isDeleted(Instruction * I) const15468bcb0991SDimitry Andric bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
15478bcb0991SDimitry Andric
15488bcb0991SDimitry Andric /// Marks values operands for later deletion by replacing them with Undefs.
15498bcb0991SDimitry Andric void eraseInstructions(ArrayRef<Value *> AV);
15508bcb0991SDimitry Andric
15518bcb0991SDimitry Andric ~BoUpSLP();
15528bcb0991SDimitry Andric
15530b57cec5SDimitry Andric private:
15540b57cec5SDimitry Andric /// Checks if all users of \p I are the part of the vectorization tree.
15555f7ddb14SDimitry Andric bool areAllUsersVectorized(Instruction *I,
15565f7ddb14SDimitry Andric ArrayRef<Value *> VectorizedVals) const;
15570b57cec5SDimitry Andric
15580b57cec5SDimitry Andric /// \returns the cost of the vectorizable entry.
15595f7ddb14SDimitry Andric InstructionCost getEntryCost(const TreeEntry *E,
15605f7ddb14SDimitry Andric ArrayRef<Value *> VectorizedVals);
15610b57cec5SDimitry Andric
15620b57cec5SDimitry Andric /// This is the recursive part of buildTree.
15630b57cec5SDimitry Andric void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
15640b57cec5SDimitry Andric const EdgeInfo &EI);
15650b57cec5SDimitry Andric
15660b57cec5SDimitry Andric /// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
15670b57cec5SDimitry Andric /// be vectorized to use the original vector (or aggregate "bitcast" to a
15680b57cec5SDimitry Andric /// vector) and sets \p CurrentOrder to the identity permutation; otherwise
15690b57cec5SDimitry Andric /// returns false, setting \p CurrentOrder to either an empty vector or a
15700b57cec5SDimitry Andric /// non-identity permutation that allows to reuse extract instructions.
15710b57cec5SDimitry Andric bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
15720b57cec5SDimitry Andric SmallVectorImpl<unsigned> &CurrentOrder) const;
15730b57cec5SDimitry Andric
15740b57cec5SDimitry Andric /// Vectorize a single entry in the tree.
15750b57cec5SDimitry Andric Value *vectorizeTree(TreeEntry *E);
15760b57cec5SDimitry Andric
15770b57cec5SDimitry Andric /// Vectorize a single entry in the tree, starting in \p VL.
15780b57cec5SDimitry Andric Value *vectorizeTree(ArrayRef<Value *> VL);
15790b57cec5SDimitry Andric
15800b57cec5SDimitry Andric /// \returns the scalarization cost for this type. Scalarization in this
15810b57cec5SDimitry Andric /// context means the creation of vectors from a group of scalars.
1582af732203SDimitry Andric InstructionCost
1583af732203SDimitry Andric getGatherCost(FixedVectorType *Ty,
15845ffd83dbSDimitry Andric const DenseSet<unsigned> &ShuffledIndices) const;
15850b57cec5SDimitry Andric
15865f7ddb14SDimitry Andric /// Checks if the gathered \p VL can be represented as shuffle(s) of previous
15875f7ddb14SDimitry Andric /// tree entries.
15885f7ddb14SDimitry Andric /// \returns ShuffleKind, if gathered values can be represented as shuffles of
15895f7ddb14SDimitry Andric /// previous tree entries. \p Mask is filled with the shuffle mask.
15905f7ddb14SDimitry Andric Optional<TargetTransformInfo::ShuffleKind>
15915f7ddb14SDimitry Andric isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
15925f7ddb14SDimitry Andric SmallVectorImpl<const TreeEntry *> &Entries);
15935f7ddb14SDimitry Andric
15940b57cec5SDimitry Andric /// \returns the scalarization cost for this list of values. Assuming that
15950b57cec5SDimitry Andric /// this subtree gets vectorized, we may need to extract the values from the
15960b57cec5SDimitry Andric /// roots. This method calculates the cost of extracting the values.
1597af732203SDimitry Andric InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
15980b57cec5SDimitry Andric
15990b57cec5SDimitry Andric /// Set the Builder insert point to one after the last instruction in
16000b57cec5SDimitry Andric /// the bundle
16015f7ddb14SDimitry Andric void setInsertPointAfterBundle(const TreeEntry *E);
16020b57cec5SDimitry Andric
16030b57cec5SDimitry Andric /// \returns a vector from a collection of scalars in \p VL.
1604af732203SDimitry Andric Value *gather(ArrayRef<Value *> VL);
16050b57cec5SDimitry Andric
16060b57cec5SDimitry Andric /// \returns whether the VectorizableTree is fully vectorizable and will
16070b57cec5SDimitry Andric /// be beneficial even the tree height is tiny.
16080b57cec5SDimitry Andric bool isFullyVectorizableTinyTree() const;
16090b57cec5SDimitry Andric
16100b57cec5SDimitry Andric /// Reorder commutative or alt operands to get better probability of
16110b57cec5SDimitry Andric /// generating vectorized code.
16120b57cec5SDimitry Andric static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
16130b57cec5SDimitry Andric SmallVectorImpl<Value *> &Left,
16140b57cec5SDimitry Andric SmallVectorImpl<Value *> &Right,
16150b57cec5SDimitry Andric const DataLayout &DL,
1616480093f4SDimitry Andric ScalarEvolution &SE,
1617480093f4SDimitry Andric const BoUpSLP &R);
16180b57cec5SDimitry Andric struct TreeEntry {
16190b57cec5SDimitry Andric using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntryllvm::slpvectorizer::BoUpSLP::TreeEntry16200b57cec5SDimitry Andric TreeEntry(VecTreeTy &Container) : Container(Container) {}
16210b57cec5SDimitry Andric
16220b57cec5SDimitry Andric /// \returns true if the scalars in VL are equal to this entry.
isSamellvm::slpvectorizer::BoUpSLP::TreeEntry16230b57cec5SDimitry Andric bool isSame(ArrayRef<Value *> VL) const {
16240b57cec5SDimitry Andric if (VL.size() == Scalars.size())
16250b57cec5SDimitry Andric return std::equal(VL.begin(), VL.end(), Scalars.begin());
16260b57cec5SDimitry Andric return VL.size() == ReuseShuffleIndices.size() &&
16270b57cec5SDimitry Andric std::equal(
16280b57cec5SDimitry Andric VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
16295ffd83dbSDimitry Andric [this](Value *V, int Idx) { return V == Scalars[Idx]; });
16300b57cec5SDimitry Andric }
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andric /// A vector of scalars.
16330b57cec5SDimitry Andric ValueList Scalars;
16340b57cec5SDimitry Andric
16350b57cec5SDimitry Andric /// The Scalars are vectorized into this value. It is initialized to Null.
16360b57cec5SDimitry Andric Value *VectorizedValue = nullptr;
16370b57cec5SDimitry Andric
1638af732203SDimitry Andric /// Do we need to gather this sequence or vectorize it
1639af732203SDimitry Andric /// (either with vector instruction or with scatter/gather
1640af732203SDimitry Andric /// intrinsics for store/load)?
1641af732203SDimitry Andric enum EntryState { Vectorize, ScatterVectorize, NeedToGather };
1642480093f4SDimitry Andric EntryState State;
16430b57cec5SDimitry Andric
16440b57cec5SDimitry Andric /// Does this sequence require some shuffling?
16455ffd83dbSDimitry Andric SmallVector<int, 4> ReuseShuffleIndices;
16460b57cec5SDimitry Andric
16470b57cec5SDimitry Andric /// Does this entry require reordering?
1648af732203SDimitry Andric SmallVector<unsigned, 4> ReorderIndices;
16490b57cec5SDimitry Andric
16500b57cec5SDimitry Andric /// Points back to the VectorizableTree.
16510b57cec5SDimitry Andric ///
16520b57cec5SDimitry Andric /// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
16530b57cec5SDimitry Andric /// to be a pointer and needs to be able to initialize the child iterator.
16540b57cec5SDimitry Andric /// Thus we need a reference back to the container to translate the indices
16550b57cec5SDimitry Andric /// to entries.
16560b57cec5SDimitry Andric VecTreeTy &Container;
16570b57cec5SDimitry Andric
16580b57cec5SDimitry Andric /// The TreeEntry index containing the user of this entry. We can actually
16590b57cec5SDimitry Andric /// have multiple users so the data structure is not truly a tree.
16600b57cec5SDimitry Andric SmallVector<EdgeInfo, 1> UserTreeIndices;
16610b57cec5SDimitry Andric
16620b57cec5SDimitry Andric /// The index of this treeEntry in VectorizableTree.
16630b57cec5SDimitry Andric int Idx = -1;
16640b57cec5SDimitry Andric
16650b57cec5SDimitry Andric private:
16660b57cec5SDimitry Andric /// The operands of each instruction in each lane Operands[op_index][lane].
16670b57cec5SDimitry Andric /// Note: This helps avoid the replication of the code that performs the
16680b57cec5SDimitry Andric /// reordering of operands during buildTree_rec() and vectorizeTree().
16690b57cec5SDimitry Andric SmallVector<ValueList, 2> Operands;
16700b57cec5SDimitry Andric
16718bcb0991SDimitry Andric /// The main/alternate instruction.
16728bcb0991SDimitry Andric Instruction *MainOp = nullptr;
16738bcb0991SDimitry Andric Instruction *AltOp = nullptr;
16748bcb0991SDimitry Andric
16750b57cec5SDimitry Andric public:
16760b57cec5SDimitry Andric /// Set this bundle's \p OpIdx'th operand to \p OpVL.
setOperandllvm::slpvectorizer::BoUpSLP::TreeEntry16778bcb0991SDimitry Andric void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
16780b57cec5SDimitry Andric if (Operands.size() < OpIdx + 1)
16790b57cec5SDimitry Andric Operands.resize(OpIdx + 1);
16805f7ddb14SDimitry Andric assert(Operands[OpIdx].empty() && "Already resized?");
16810b57cec5SDimitry Andric Operands[OpIdx].resize(Scalars.size());
16820b57cec5SDimitry Andric for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
16838bcb0991SDimitry Andric Operands[OpIdx][Lane] = OpVL[Lane];
16840b57cec5SDimitry Andric }
16850b57cec5SDimitry Andric
16868bcb0991SDimitry Andric /// Set the operands of this bundle in their original order.
setOperandsInOrderllvm::slpvectorizer::BoUpSLP::TreeEntry16878bcb0991SDimitry Andric void setOperandsInOrder() {
16888bcb0991SDimitry Andric assert(Operands.empty() && "Already initialized?");
16898bcb0991SDimitry Andric auto *I0 = cast<Instruction>(Scalars[0]);
16908bcb0991SDimitry Andric Operands.resize(I0->getNumOperands());
16918bcb0991SDimitry Andric unsigned NumLanes = Scalars.size();
16928bcb0991SDimitry Andric for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
16938bcb0991SDimitry Andric OpIdx != NumOperands; ++OpIdx) {
16948bcb0991SDimitry Andric Operands[OpIdx].resize(NumLanes);
16958bcb0991SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
16968bcb0991SDimitry Andric auto *I = cast<Instruction>(Scalars[Lane]);
16978bcb0991SDimitry Andric assert(I->getNumOperands() == NumOperands &&
16988bcb0991SDimitry Andric "Expected same number of operands");
16998bcb0991SDimitry Andric Operands[OpIdx][Lane] = I->getOperand(OpIdx);
17008bcb0991SDimitry Andric }
17018bcb0991SDimitry Andric }
17020b57cec5SDimitry Andric }
17030b57cec5SDimitry Andric
17040b57cec5SDimitry Andric /// \returns the \p OpIdx operand of this TreeEntry.
getOperandllvm::slpvectorizer::BoUpSLP::TreeEntry17050b57cec5SDimitry Andric ValueList &getOperand(unsigned OpIdx) {
17060b57cec5SDimitry Andric assert(OpIdx < Operands.size() && "Off bounds");
17070b57cec5SDimitry Andric return Operands[OpIdx];
17080b57cec5SDimitry Andric }
17090b57cec5SDimitry Andric
17108bcb0991SDimitry Andric /// \returns the number of operands.
getNumOperandsllvm::slpvectorizer::BoUpSLP::TreeEntry17118bcb0991SDimitry Andric unsigned getNumOperands() const { return Operands.size(); }
17128bcb0991SDimitry Andric
17130b57cec5SDimitry Andric /// \return the single \p OpIdx operand.
getSingleOperandllvm::slpvectorizer::BoUpSLP::TreeEntry17140b57cec5SDimitry Andric Value *getSingleOperand(unsigned OpIdx) const {
17150b57cec5SDimitry Andric assert(OpIdx < Operands.size() && "Off bounds");
17160b57cec5SDimitry Andric assert(!Operands[OpIdx].empty() && "No operand available");
17170b57cec5SDimitry Andric return Operands[OpIdx][0];
17180b57cec5SDimitry Andric }
17190b57cec5SDimitry Andric
17208bcb0991SDimitry Andric /// Some of the instructions in the list have alternate opcodes.
isAltShufflellvm::slpvectorizer::BoUpSLP::TreeEntry17218bcb0991SDimitry Andric bool isAltShuffle() const {
17228bcb0991SDimitry Andric return getOpcode() != getAltOpcode();
17238bcb0991SDimitry Andric }
17248bcb0991SDimitry Andric
isOpcodeOrAltllvm::slpvectorizer::BoUpSLP::TreeEntry17258bcb0991SDimitry Andric bool isOpcodeOrAlt(Instruction *I) const {
17268bcb0991SDimitry Andric unsigned CheckedOpcode = I->getOpcode();
17278bcb0991SDimitry Andric return (getOpcode() == CheckedOpcode ||
17288bcb0991SDimitry Andric getAltOpcode() == CheckedOpcode);
17298bcb0991SDimitry Andric }
17308bcb0991SDimitry Andric
17318bcb0991SDimitry Andric /// Chooses the correct key for scheduling data. If \p Op has the same (or
17328bcb0991SDimitry Andric /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
17338bcb0991SDimitry Andric /// \p OpValue.
isOneOfllvm::slpvectorizer::BoUpSLP::TreeEntry17348bcb0991SDimitry Andric Value *isOneOf(Value *Op) const {
17358bcb0991SDimitry Andric auto *I = dyn_cast<Instruction>(Op);
17368bcb0991SDimitry Andric if (I && isOpcodeOrAlt(I))
17378bcb0991SDimitry Andric return Op;
17388bcb0991SDimitry Andric return MainOp;
17398bcb0991SDimitry Andric }
17408bcb0991SDimitry Andric
setOperationsllvm::slpvectorizer::BoUpSLP::TreeEntry17418bcb0991SDimitry Andric void setOperations(const InstructionsState &S) {
17428bcb0991SDimitry Andric MainOp = S.MainOp;
17438bcb0991SDimitry Andric AltOp = S.AltOp;
17448bcb0991SDimitry Andric }
17458bcb0991SDimitry Andric
getMainOpllvm::slpvectorizer::BoUpSLP::TreeEntry17468bcb0991SDimitry Andric Instruction *getMainOp() const {
17478bcb0991SDimitry Andric return MainOp;
17488bcb0991SDimitry Andric }
17498bcb0991SDimitry Andric
getAltOpllvm::slpvectorizer::BoUpSLP::TreeEntry17508bcb0991SDimitry Andric Instruction *getAltOp() const {
17518bcb0991SDimitry Andric return AltOp;
17528bcb0991SDimitry Andric }
17538bcb0991SDimitry Andric
17548bcb0991SDimitry Andric /// The main/alternate opcodes for the list of instructions.
getOpcodellvm::slpvectorizer::BoUpSLP::TreeEntry17558bcb0991SDimitry Andric unsigned getOpcode() const {
17568bcb0991SDimitry Andric return MainOp ? MainOp->getOpcode() : 0;
17578bcb0991SDimitry Andric }
17588bcb0991SDimitry Andric
getAltOpcodellvm::slpvectorizer::BoUpSLP::TreeEntry17598bcb0991SDimitry Andric unsigned getAltOpcode() const {
17608bcb0991SDimitry Andric return AltOp ? AltOp->getOpcode() : 0;
17618bcb0991SDimitry Andric }
17628bcb0991SDimitry Andric
17638bcb0991SDimitry Andric /// Update operations state of this entry if reorder occurred.
updateStateIfReorderllvm::slpvectorizer::BoUpSLP::TreeEntry17648bcb0991SDimitry Andric bool updateStateIfReorder() {
17658bcb0991SDimitry Andric if (ReorderIndices.empty())
17668bcb0991SDimitry Andric return false;
17678bcb0991SDimitry Andric InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front());
17688bcb0991SDimitry Andric setOperations(S);
17698bcb0991SDimitry Andric return true;
17708bcb0991SDimitry Andric }
17715f7ddb14SDimitry Andric /// When ReuseShuffleIndices is empty it just returns position of \p V
17725f7ddb14SDimitry Andric /// within vector of Scalars. Otherwise, try to remap on its reuse index.
findLaneForValuellvm::slpvectorizer::BoUpSLP::TreeEntry17735f7ddb14SDimitry Andric int findLaneForValue(Value *V) const {
17745f7ddb14SDimitry Andric unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
17755f7ddb14SDimitry Andric assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
17765f7ddb14SDimitry Andric if (!ReuseShuffleIndices.empty()) {
17775f7ddb14SDimitry Andric FoundLane = std::distance(ReuseShuffleIndices.begin(),
17785f7ddb14SDimitry Andric find(ReuseShuffleIndices, FoundLane));
17795f7ddb14SDimitry Andric }
17805f7ddb14SDimitry Andric return FoundLane;
17815f7ddb14SDimitry Andric }
17828bcb0991SDimitry Andric
17830b57cec5SDimitry Andric #ifndef NDEBUG
17840b57cec5SDimitry Andric /// Debug printer.
dumpllvm::slpvectorizer::BoUpSLP::TreeEntry17850b57cec5SDimitry Andric LLVM_DUMP_METHOD void dump() const {
17860b57cec5SDimitry Andric dbgs() << Idx << ".\n";
17870b57cec5SDimitry Andric for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
17880b57cec5SDimitry Andric dbgs() << "Operand " << OpI << ":\n";
17890b57cec5SDimitry Andric for (const Value *V : Operands[OpI])
17900b57cec5SDimitry Andric dbgs().indent(2) << *V << "\n";
17910b57cec5SDimitry Andric }
17920b57cec5SDimitry Andric dbgs() << "Scalars: \n";
17930b57cec5SDimitry Andric for (Value *V : Scalars)
17940b57cec5SDimitry Andric dbgs().indent(2) << *V << "\n";
1795480093f4SDimitry Andric dbgs() << "State: ";
1796480093f4SDimitry Andric switch (State) {
1797480093f4SDimitry Andric case Vectorize:
1798480093f4SDimitry Andric dbgs() << "Vectorize\n";
1799480093f4SDimitry Andric break;
1800af732203SDimitry Andric case ScatterVectorize:
1801af732203SDimitry Andric dbgs() << "ScatterVectorize\n";
1802af732203SDimitry Andric break;
1803480093f4SDimitry Andric case NeedToGather:
1804480093f4SDimitry Andric dbgs() << "NeedToGather\n";
1805480093f4SDimitry Andric break;
1806480093f4SDimitry Andric }
1807480093f4SDimitry Andric dbgs() << "MainOp: ";
1808480093f4SDimitry Andric if (MainOp)
1809480093f4SDimitry Andric dbgs() << *MainOp << "\n";
1810480093f4SDimitry Andric else
1811480093f4SDimitry Andric dbgs() << "NULL\n";
1812480093f4SDimitry Andric dbgs() << "AltOp: ";
1813480093f4SDimitry Andric if (AltOp)
1814480093f4SDimitry Andric dbgs() << *AltOp << "\n";
1815480093f4SDimitry Andric else
1816480093f4SDimitry Andric dbgs() << "NULL\n";
18170b57cec5SDimitry Andric dbgs() << "VectorizedValue: ";
18180b57cec5SDimitry Andric if (VectorizedValue)
1819480093f4SDimitry Andric dbgs() << *VectorizedValue << "\n";
18200b57cec5SDimitry Andric else
1821480093f4SDimitry Andric dbgs() << "NULL\n";
18220b57cec5SDimitry Andric dbgs() << "ReuseShuffleIndices: ";
18230b57cec5SDimitry Andric if (ReuseShuffleIndices.empty())
1824af732203SDimitry Andric dbgs() << "Empty";
18250b57cec5SDimitry Andric else
18268bcb0991SDimitry Andric for (unsigned ReuseIdx : ReuseShuffleIndices)
18278bcb0991SDimitry Andric dbgs() << ReuseIdx << ", ";
18280b57cec5SDimitry Andric dbgs() << "\n";
18290b57cec5SDimitry Andric dbgs() << "ReorderIndices: ";
18308bcb0991SDimitry Andric for (unsigned ReorderIdx : ReorderIndices)
18318bcb0991SDimitry Andric dbgs() << ReorderIdx << ", ";
18320b57cec5SDimitry Andric dbgs() << "\n";
18330b57cec5SDimitry Andric dbgs() << "UserTreeIndices: ";
18340b57cec5SDimitry Andric for (const auto &EInfo : UserTreeIndices)
18350b57cec5SDimitry Andric dbgs() << EInfo << ", ";
18360b57cec5SDimitry Andric dbgs() << "\n";
18370b57cec5SDimitry Andric }
18380b57cec5SDimitry Andric #endif
18390b57cec5SDimitry Andric };
18400b57cec5SDimitry Andric
1841af732203SDimitry Andric #ifndef NDEBUG
dumpTreeCosts(const TreeEntry * E,InstructionCost ReuseShuffleCost,InstructionCost VecCost,InstructionCost ScalarCost) const18425f7ddb14SDimitry Andric void dumpTreeCosts(const TreeEntry *E, InstructionCost ReuseShuffleCost,
1843af732203SDimitry Andric InstructionCost VecCost,
1844af732203SDimitry Andric InstructionCost ScalarCost) const {
1845af732203SDimitry Andric dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
1846af732203SDimitry Andric dbgs() << "SLP: Costs:\n";
1847af732203SDimitry Andric dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
1848af732203SDimitry Andric dbgs() << "SLP: VectorCost = " << VecCost << "\n";
1849af732203SDimitry Andric dbgs() << "SLP: ScalarCost = " << ScalarCost << "\n";
1850af732203SDimitry Andric dbgs() << "SLP: ReuseShuffleCost + VecCost - ScalarCost = " <<
1851af732203SDimitry Andric ReuseShuffleCost + VecCost - ScalarCost << "\n";
1852af732203SDimitry Andric }
1853af732203SDimitry Andric #endif
1854af732203SDimitry Andric
18550b57cec5SDimitry Andric /// Create a new VectorizableTree entry.
newTreeEntry(ArrayRef<Value * > VL,Optional<ScheduleData * > Bundle,const InstructionsState & S,const EdgeInfo & UserTreeIdx,ArrayRef<unsigned> ReuseShuffleIndices=None,ArrayRef<unsigned> ReorderIndices=None)18568bcb0991SDimitry Andric TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
18578bcb0991SDimitry Andric const InstructionsState &S,
18580b57cec5SDimitry Andric const EdgeInfo &UserTreeIdx,
18590b57cec5SDimitry Andric ArrayRef<unsigned> ReuseShuffleIndices = None,
18600b57cec5SDimitry Andric ArrayRef<unsigned> ReorderIndices = None) {
1861af732203SDimitry Andric TreeEntry::EntryState EntryState =
1862af732203SDimitry Andric Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
1863af732203SDimitry Andric return newTreeEntry(VL, EntryState, Bundle, S, UserTreeIdx,
1864af732203SDimitry Andric ReuseShuffleIndices, ReorderIndices);
1865af732203SDimitry Andric }
1866af732203SDimitry Andric
newTreeEntry(ArrayRef<Value * > VL,TreeEntry::EntryState EntryState,Optional<ScheduleData * > Bundle,const InstructionsState & S,const EdgeInfo & UserTreeIdx,ArrayRef<unsigned> ReuseShuffleIndices=None,ArrayRef<unsigned> ReorderIndices=None)1867af732203SDimitry Andric TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
1868af732203SDimitry Andric TreeEntry::EntryState EntryState,
1869af732203SDimitry Andric Optional<ScheduleData *> Bundle,
1870af732203SDimitry Andric const InstructionsState &S,
1871af732203SDimitry Andric const EdgeInfo &UserTreeIdx,
1872af732203SDimitry Andric ArrayRef<unsigned> ReuseShuffleIndices = None,
1873af732203SDimitry Andric ArrayRef<unsigned> ReorderIndices = None) {
1874af732203SDimitry Andric assert(((!Bundle && EntryState == TreeEntry::NeedToGather) ||
1875af732203SDimitry Andric (Bundle && EntryState != TreeEntry::NeedToGather)) &&
1876af732203SDimitry Andric "Need to vectorize gather entry?");
18778bcb0991SDimitry Andric VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
18780b57cec5SDimitry Andric TreeEntry *Last = VectorizableTree.back().get();
18790b57cec5SDimitry Andric Last->Idx = VectorizableTree.size() - 1;
18800b57cec5SDimitry Andric Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
1881af732203SDimitry Andric Last->State = EntryState;
18820b57cec5SDimitry Andric Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
18830b57cec5SDimitry Andric ReuseShuffleIndices.end());
1884af732203SDimitry Andric Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
18858bcb0991SDimitry Andric Last->setOperations(S);
1886af732203SDimitry Andric if (Last->State != TreeEntry::NeedToGather) {
1887af732203SDimitry Andric for (Value *V : VL) {
1888af732203SDimitry Andric assert(!getTreeEntry(V) && "Scalar already in tree!");
1889af732203SDimitry Andric ScalarToTreeEntry[V] = Last;
18900b57cec5SDimitry Andric }
18918bcb0991SDimitry Andric // Update the scheduler bundle to point to this TreeEntry.
18928bcb0991SDimitry Andric unsigned Lane = 0;
18938bcb0991SDimitry Andric for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
18948bcb0991SDimitry Andric BundleMember = BundleMember->NextInBundle) {
18958bcb0991SDimitry Andric BundleMember->TE = Last;
18968bcb0991SDimitry Andric BundleMember->Lane = Lane;
18978bcb0991SDimitry Andric ++Lane;
18988bcb0991SDimitry Andric }
18998bcb0991SDimitry Andric assert((!Bundle.getValue() || Lane == VL.size()) &&
19008bcb0991SDimitry Andric "Bundle and VL out of sync");
19010b57cec5SDimitry Andric } else {
19020b57cec5SDimitry Andric MustGather.insert(VL.begin(), VL.end());
19030b57cec5SDimitry Andric }
19040b57cec5SDimitry Andric
19050b57cec5SDimitry Andric if (UserTreeIdx.UserTE)
19060b57cec5SDimitry Andric Last->UserTreeIndices.push_back(UserTreeIdx);
19070b57cec5SDimitry Andric
19080b57cec5SDimitry Andric return Last;
19090b57cec5SDimitry Andric }
19100b57cec5SDimitry Andric
19110b57cec5SDimitry Andric /// -- Vectorization State --
19120b57cec5SDimitry Andric /// Holds all of the tree entries.
19130b57cec5SDimitry Andric TreeEntry::VecTreeTy VectorizableTree;
19140b57cec5SDimitry Andric
19150b57cec5SDimitry Andric #ifndef NDEBUG
19160b57cec5SDimitry Andric /// Debug printer.
dumpVectorizableTree() const19170b57cec5SDimitry Andric LLVM_DUMP_METHOD void dumpVectorizableTree() const {
19180b57cec5SDimitry Andric for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
19190b57cec5SDimitry Andric VectorizableTree[Id]->dump();
19200b57cec5SDimitry Andric dbgs() << "\n";
19210b57cec5SDimitry Andric }
19220b57cec5SDimitry Andric }
19230b57cec5SDimitry Andric #endif
19240b57cec5SDimitry Andric
getTreeEntry(Value * V)1925af732203SDimitry Andric TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); }
19260b57cec5SDimitry Andric
getTreeEntry(Value * V) const19270b57cec5SDimitry Andric const TreeEntry *getTreeEntry(Value *V) const {
1928af732203SDimitry Andric return ScalarToTreeEntry.lookup(V);
19290b57cec5SDimitry Andric }
19300b57cec5SDimitry Andric
19310b57cec5SDimitry Andric /// Maps a specific scalar to its tree entry.
19328bcb0991SDimitry Andric SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
19330b57cec5SDimitry Andric
19345ffd83dbSDimitry Andric /// Maps a value to the proposed vectorizable size.
19355ffd83dbSDimitry Andric SmallDenseMap<Value *, unsigned> InstrElementSize;
19365ffd83dbSDimitry Andric
19370b57cec5SDimitry Andric /// A list of scalars that we found that we need to keep as scalars.
19380b57cec5SDimitry Andric ValueSet MustGather;
19390b57cec5SDimitry Andric
19400b57cec5SDimitry Andric /// This POD struct describes one external user in the vectorized tree.
19410b57cec5SDimitry Andric struct ExternalUser {
ExternalUserllvm::slpvectorizer::BoUpSLP::ExternalUser19420b57cec5SDimitry Andric ExternalUser(Value *S, llvm::User *U, int L)
19430b57cec5SDimitry Andric : Scalar(S), User(U), Lane(L) {}
19440b57cec5SDimitry Andric
19450b57cec5SDimitry Andric // Which scalar in our function.
19460b57cec5SDimitry Andric Value *Scalar;
19470b57cec5SDimitry Andric
19480b57cec5SDimitry Andric // Which user that uses the scalar.
19490b57cec5SDimitry Andric llvm::User *User;
19500b57cec5SDimitry Andric
19510b57cec5SDimitry Andric // Which lane does the scalar belong to.
19520b57cec5SDimitry Andric int Lane;
19530b57cec5SDimitry Andric };
19540b57cec5SDimitry Andric using UserList = SmallVector<ExternalUser, 16>;
19550b57cec5SDimitry Andric
19560b57cec5SDimitry Andric /// Checks if two instructions may access the same memory.
19570b57cec5SDimitry Andric ///
19580b57cec5SDimitry Andric /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
19590b57cec5SDimitry Andric /// is invariant in the calling loop.
isAliased(const MemoryLocation & Loc1,Instruction * Inst1,Instruction * Inst2)19600b57cec5SDimitry Andric bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
19610b57cec5SDimitry Andric Instruction *Inst2) {
19620b57cec5SDimitry Andric // First check if the result is already in the cache.
19630b57cec5SDimitry Andric AliasCacheKey key = std::make_pair(Inst1, Inst2);
19640b57cec5SDimitry Andric Optional<bool> &result = AliasCache[key];
19650b57cec5SDimitry Andric if (result.hasValue()) {
19660b57cec5SDimitry Andric return result.getValue();
19670b57cec5SDimitry Andric }
19680b57cec5SDimitry Andric MemoryLocation Loc2 = getLocation(Inst2, AA);
19690b57cec5SDimitry Andric bool aliased = true;
19700b57cec5SDimitry Andric if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
19710b57cec5SDimitry Andric // Do the alias check.
19725f7ddb14SDimitry Andric aliased = !AA->isNoAlias(Loc1, Loc2);
19730b57cec5SDimitry Andric }
19740b57cec5SDimitry Andric // Store the result in the cache.
19750b57cec5SDimitry Andric result = aliased;
19760b57cec5SDimitry Andric return aliased;
19770b57cec5SDimitry Andric }
19780b57cec5SDimitry Andric
19790b57cec5SDimitry Andric using AliasCacheKey = std::pair<Instruction *, Instruction *>;
19800b57cec5SDimitry Andric
19810b57cec5SDimitry Andric /// Cache for alias results.
19820b57cec5SDimitry Andric /// TODO: consider moving this to the AliasAnalysis itself.
19830b57cec5SDimitry Andric DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
19840b57cec5SDimitry Andric
19850b57cec5SDimitry Andric /// Removes an instruction from its block and eventually deletes it.
19860b57cec5SDimitry Andric /// It's like Instruction::eraseFromParent() except that the actual deletion
19870b57cec5SDimitry Andric /// is delayed until BoUpSLP is destructed.
19880b57cec5SDimitry Andric /// This is required to ensure that there are no incorrect collisions in the
19890b57cec5SDimitry Andric /// AliasCache, which can happen if a new instruction is allocated at the
19900b57cec5SDimitry Andric /// same address as a previously deleted instruction.
eraseInstruction(Instruction * I,bool ReplaceOpsWithUndef=false)19918bcb0991SDimitry Andric void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) {
19928bcb0991SDimitry Andric auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first;
19938bcb0991SDimitry Andric It->getSecond() = It->getSecond() && ReplaceOpsWithUndef;
19940b57cec5SDimitry Andric }
19950b57cec5SDimitry Andric
19960b57cec5SDimitry Andric /// Temporary store for deleted instructions. Instructions will be deleted
19970b57cec5SDimitry Andric /// eventually when the BoUpSLP is destructed.
19988bcb0991SDimitry Andric DenseMap<Instruction *, bool> DeletedInstructions;
19990b57cec5SDimitry Andric
20000b57cec5SDimitry Andric /// A list of values that need to extracted out of the tree.
20010b57cec5SDimitry Andric /// This list holds pairs of (Internal Scalar : External User). External User
20020b57cec5SDimitry Andric /// can be nullptr, it means that this Internal Scalar will be used later,
20030b57cec5SDimitry Andric /// after vectorization.
20040b57cec5SDimitry Andric UserList ExternalUses;
20050b57cec5SDimitry Andric
20060b57cec5SDimitry Andric /// Values used only by @llvm.assume calls.
20070b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues;
20080b57cec5SDimitry Andric
20090b57cec5SDimitry Andric /// Holds all of the instructions that we gathered.
20100b57cec5SDimitry Andric SetVector<Instruction *> GatherSeq;
20110b57cec5SDimitry Andric
20120b57cec5SDimitry Andric /// A list of blocks that we are going to CSE.
20130b57cec5SDimitry Andric SetVector<BasicBlock *> CSEBlocks;
20140b57cec5SDimitry Andric
20150b57cec5SDimitry Andric /// Contains all scheduling relevant data for an instruction.
20160b57cec5SDimitry Andric /// A ScheduleData either represents a single instruction or a member of an
20170b57cec5SDimitry Andric /// instruction bundle (= a group of instructions which is combined into a
20180b57cec5SDimitry Andric /// vector instruction).
20190b57cec5SDimitry Andric struct ScheduleData {
20200b57cec5SDimitry Andric // The initial value for the dependency counters. It means that the
20210b57cec5SDimitry Andric // dependencies are not calculated yet.
20220b57cec5SDimitry Andric enum { InvalidDeps = -1 };
20230b57cec5SDimitry Andric
20240b57cec5SDimitry Andric ScheduleData() = default;
20250b57cec5SDimitry Andric
initllvm::slpvectorizer::BoUpSLP::ScheduleData20260b57cec5SDimitry Andric void init(int BlockSchedulingRegionID, Value *OpVal) {
20270b57cec5SDimitry Andric FirstInBundle = this;
20280b57cec5SDimitry Andric NextInBundle = nullptr;
20290b57cec5SDimitry Andric NextLoadStore = nullptr;
20300b57cec5SDimitry Andric IsScheduled = false;
20310b57cec5SDimitry Andric SchedulingRegionID = BlockSchedulingRegionID;
20320b57cec5SDimitry Andric UnscheduledDepsInBundle = UnscheduledDeps;
20330b57cec5SDimitry Andric clearDependencies();
20340b57cec5SDimitry Andric OpValue = OpVal;
20358bcb0991SDimitry Andric TE = nullptr;
20368bcb0991SDimitry Andric Lane = -1;
20370b57cec5SDimitry Andric }
20380b57cec5SDimitry Andric
20390b57cec5SDimitry Andric /// Returns true if the dependency information has been calculated.
hasValidDependenciesllvm::slpvectorizer::BoUpSLP::ScheduleData20400b57cec5SDimitry Andric bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
20410b57cec5SDimitry Andric
20420b57cec5SDimitry Andric /// Returns true for single instructions and for bundle representatives
20430b57cec5SDimitry Andric /// (= the head of a bundle).
isSchedulingEntityllvm::slpvectorizer::BoUpSLP::ScheduleData20440b57cec5SDimitry Andric bool isSchedulingEntity() const { return FirstInBundle == this; }
20450b57cec5SDimitry Andric
20460b57cec5SDimitry Andric /// Returns true if it represents an instruction bundle and not only a
20470b57cec5SDimitry Andric /// single instruction.
isPartOfBundlellvm::slpvectorizer::BoUpSLP::ScheduleData20480b57cec5SDimitry Andric bool isPartOfBundle() const {
20490b57cec5SDimitry Andric return NextInBundle != nullptr || FirstInBundle != this;
20500b57cec5SDimitry Andric }
20510b57cec5SDimitry Andric
20520b57cec5SDimitry Andric /// Returns true if it is ready for scheduling, i.e. it has no more
20530b57cec5SDimitry Andric /// unscheduled depending instructions/bundles.
isReadyllvm::slpvectorizer::BoUpSLP::ScheduleData20540b57cec5SDimitry Andric bool isReady() const {
20550b57cec5SDimitry Andric assert(isSchedulingEntity() &&
20560b57cec5SDimitry Andric "can't consider non-scheduling entity for ready list");
20570b57cec5SDimitry Andric return UnscheduledDepsInBundle == 0 && !IsScheduled;
20580b57cec5SDimitry Andric }
20590b57cec5SDimitry Andric
20600b57cec5SDimitry Andric /// Modifies the number of unscheduled dependencies, also updating it for
20610b57cec5SDimitry Andric /// the whole bundle.
incrementUnscheduledDepsllvm::slpvectorizer::BoUpSLP::ScheduleData20620b57cec5SDimitry Andric int incrementUnscheduledDeps(int Incr) {
20630b57cec5SDimitry Andric UnscheduledDeps += Incr;
20640b57cec5SDimitry Andric return FirstInBundle->UnscheduledDepsInBundle += Incr;
20650b57cec5SDimitry Andric }
20660b57cec5SDimitry Andric
20670b57cec5SDimitry Andric /// Sets the number of unscheduled dependencies to the number of
20680b57cec5SDimitry Andric /// dependencies.
resetUnscheduledDepsllvm::slpvectorizer::BoUpSLP::ScheduleData20690b57cec5SDimitry Andric void resetUnscheduledDeps() {
20700b57cec5SDimitry Andric incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
20710b57cec5SDimitry Andric }
20720b57cec5SDimitry Andric
20730b57cec5SDimitry Andric /// Clears all dependency information.
clearDependenciesllvm::slpvectorizer::BoUpSLP::ScheduleData20740b57cec5SDimitry Andric void clearDependencies() {
20750b57cec5SDimitry Andric Dependencies = InvalidDeps;
20760b57cec5SDimitry Andric resetUnscheduledDeps();
20770b57cec5SDimitry Andric MemoryDependencies.clear();
20780b57cec5SDimitry Andric }
20790b57cec5SDimitry Andric
dumpllvm::slpvectorizer::BoUpSLP::ScheduleData20800b57cec5SDimitry Andric void dump(raw_ostream &os) const {
20810b57cec5SDimitry Andric if (!isSchedulingEntity()) {
20820b57cec5SDimitry Andric os << "/ " << *Inst;
20830b57cec5SDimitry Andric } else if (NextInBundle) {
20840b57cec5SDimitry Andric os << '[' << *Inst;
20850b57cec5SDimitry Andric ScheduleData *SD = NextInBundle;
20860b57cec5SDimitry Andric while (SD) {
20870b57cec5SDimitry Andric os << ';' << *SD->Inst;
20880b57cec5SDimitry Andric SD = SD->NextInBundle;
20890b57cec5SDimitry Andric }
20900b57cec5SDimitry Andric os << ']';
20910b57cec5SDimitry Andric } else {
20920b57cec5SDimitry Andric os << *Inst;
20930b57cec5SDimitry Andric }
20940b57cec5SDimitry Andric }
20950b57cec5SDimitry Andric
20960b57cec5SDimitry Andric Instruction *Inst = nullptr;
20970b57cec5SDimitry Andric
20980b57cec5SDimitry Andric /// Points to the head in an instruction bundle (and always to this for
20990b57cec5SDimitry Andric /// single instructions).
21000b57cec5SDimitry Andric ScheduleData *FirstInBundle = nullptr;
21010b57cec5SDimitry Andric
21020b57cec5SDimitry Andric /// Single linked list of all instructions in a bundle. Null if it is a
21030b57cec5SDimitry Andric /// single instruction.
21040b57cec5SDimitry Andric ScheduleData *NextInBundle = nullptr;
21050b57cec5SDimitry Andric
21060b57cec5SDimitry Andric /// Single linked list of all memory instructions (e.g. load, store, call)
21070b57cec5SDimitry Andric /// in the block - until the end of the scheduling region.
21080b57cec5SDimitry Andric ScheduleData *NextLoadStore = nullptr;
21090b57cec5SDimitry Andric
21100b57cec5SDimitry Andric /// The dependent memory instructions.
21110b57cec5SDimitry Andric /// This list is derived on demand in calculateDependencies().
21120b57cec5SDimitry Andric SmallVector<ScheduleData *, 4> MemoryDependencies;
21130b57cec5SDimitry Andric
21140b57cec5SDimitry Andric /// This ScheduleData is in the current scheduling region if this matches
21150b57cec5SDimitry Andric /// the current SchedulingRegionID of BlockScheduling.
21160b57cec5SDimitry Andric int SchedulingRegionID = 0;
21170b57cec5SDimitry Andric
21180b57cec5SDimitry Andric /// Used for getting a "good" final ordering of instructions.
21190b57cec5SDimitry Andric int SchedulingPriority = 0;
21200b57cec5SDimitry Andric
21210b57cec5SDimitry Andric /// The number of dependencies. Constitutes of the number of users of the
21220b57cec5SDimitry Andric /// instruction plus the number of dependent memory instructions (if any).
21230b57cec5SDimitry Andric /// This value is calculated on demand.
21240b57cec5SDimitry Andric /// If InvalidDeps, the number of dependencies is not calculated yet.
21250b57cec5SDimitry Andric int Dependencies = InvalidDeps;
21260b57cec5SDimitry Andric
21270b57cec5SDimitry Andric /// The number of dependencies minus the number of dependencies of scheduled
21280b57cec5SDimitry Andric /// instructions. As soon as this is zero, the instruction/bundle gets ready
21290b57cec5SDimitry Andric /// for scheduling.
21300b57cec5SDimitry Andric /// Note that this is negative as long as Dependencies is not calculated.
21310b57cec5SDimitry Andric int UnscheduledDeps = InvalidDeps;
21320b57cec5SDimitry Andric
21330b57cec5SDimitry Andric /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
21340b57cec5SDimitry Andric /// single instructions.
21350b57cec5SDimitry Andric int UnscheduledDepsInBundle = InvalidDeps;
21360b57cec5SDimitry Andric
21370b57cec5SDimitry Andric /// True if this instruction is scheduled (or considered as scheduled in the
21380b57cec5SDimitry Andric /// dry-run).
21390b57cec5SDimitry Andric bool IsScheduled = false;
21400b57cec5SDimitry Andric
21410b57cec5SDimitry Andric /// Opcode of the current instruction in the schedule data.
21420b57cec5SDimitry Andric Value *OpValue = nullptr;
21438bcb0991SDimitry Andric
21448bcb0991SDimitry Andric /// The TreeEntry that this instruction corresponds to.
21458bcb0991SDimitry Andric TreeEntry *TE = nullptr;
21468bcb0991SDimitry Andric
21478bcb0991SDimitry Andric /// The lane of this node in the TreeEntry.
21488bcb0991SDimitry Andric int Lane = -1;
21490b57cec5SDimitry Andric };
21500b57cec5SDimitry Andric
21510b57cec5SDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & os,const BoUpSLP::ScheduleData & SD)21520b57cec5SDimitry Andric friend inline raw_ostream &operator<<(raw_ostream &os,
21530b57cec5SDimitry Andric const BoUpSLP::ScheduleData &SD) {
21540b57cec5SDimitry Andric SD.dump(os);
21550b57cec5SDimitry Andric return os;
21560b57cec5SDimitry Andric }
21570b57cec5SDimitry Andric #endif
21580b57cec5SDimitry Andric
21590b57cec5SDimitry Andric friend struct GraphTraits<BoUpSLP *>;
21600b57cec5SDimitry Andric friend struct DOTGraphTraits<BoUpSLP *>;
21610b57cec5SDimitry Andric
21620b57cec5SDimitry Andric /// Contains all scheduling data for a basic block.
21630b57cec5SDimitry Andric struct BlockScheduling {
BlockSchedulingllvm::slpvectorizer::BoUpSLP::BlockScheduling21640b57cec5SDimitry Andric BlockScheduling(BasicBlock *BB)
21650b57cec5SDimitry Andric : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
21660b57cec5SDimitry Andric
clearllvm::slpvectorizer::BoUpSLP::BlockScheduling21670b57cec5SDimitry Andric void clear() {
21680b57cec5SDimitry Andric ReadyInsts.clear();
21690b57cec5SDimitry Andric ScheduleStart = nullptr;
21700b57cec5SDimitry Andric ScheduleEnd = nullptr;
21710b57cec5SDimitry Andric FirstLoadStoreInRegion = nullptr;
21720b57cec5SDimitry Andric LastLoadStoreInRegion = nullptr;
21730b57cec5SDimitry Andric
21740b57cec5SDimitry Andric // Reduce the maximum schedule region size by the size of the
21750b57cec5SDimitry Andric // previous scheduling run.
21760b57cec5SDimitry Andric ScheduleRegionSizeLimit -= ScheduleRegionSize;
21770b57cec5SDimitry Andric if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
21780b57cec5SDimitry Andric ScheduleRegionSizeLimit = MinScheduleRegionSize;
21790b57cec5SDimitry Andric ScheduleRegionSize = 0;
21800b57cec5SDimitry Andric
21810b57cec5SDimitry Andric // Make a new scheduling region, i.e. all existing ScheduleData is not
21820b57cec5SDimitry Andric // in the new region yet.
21830b57cec5SDimitry Andric ++SchedulingRegionID;
21840b57cec5SDimitry Andric }
21850b57cec5SDimitry Andric
getScheduleDatallvm::slpvectorizer::BoUpSLP::BlockScheduling21860b57cec5SDimitry Andric ScheduleData *getScheduleData(Value *V) {
21870b57cec5SDimitry Andric ScheduleData *SD = ScheduleDataMap[V];
21880b57cec5SDimitry Andric if (SD && SD->SchedulingRegionID == SchedulingRegionID)
21890b57cec5SDimitry Andric return SD;
21900b57cec5SDimitry Andric return nullptr;
21910b57cec5SDimitry Andric }
21920b57cec5SDimitry Andric
getScheduleDatallvm::slpvectorizer::BoUpSLP::BlockScheduling21930b57cec5SDimitry Andric ScheduleData *getScheduleData(Value *V, Value *Key) {
21940b57cec5SDimitry Andric if (V == Key)
21950b57cec5SDimitry Andric return getScheduleData(V);
21960b57cec5SDimitry Andric auto I = ExtraScheduleDataMap.find(V);
21970b57cec5SDimitry Andric if (I != ExtraScheduleDataMap.end()) {
21980b57cec5SDimitry Andric ScheduleData *SD = I->second[Key];
21990b57cec5SDimitry Andric if (SD && SD->SchedulingRegionID == SchedulingRegionID)
22000b57cec5SDimitry Andric return SD;
22010b57cec5SDimitry Andric }
22020b57cec5SDimitry Andric return nullptr;
22030b57cec5SDimitry Andric }
22040b57cec5SDimitry Andric
isInSchedulingRegionllvm::slpvectorizer::BoUpSLP::BlockScheduling2205480093f4SDimitry Andric bool isInSchedulingRegion(ScheduleData *SD) const {
22060b57cec5SDimitry Andric return SD->SchedulingRegionID == SchedulingRegionID;
22070b57cec5SDimitry Andric }
22080b57cec5SDimitry Andric
22090b57cec5SDimitry Andric /// Marks an instruction as scheduled and puts all dependent ready
22100b57cec5SDimitry Andric /// instructions into the ready-list.
22110b57cec5SDimitry Andric template <typename ReadyListType>
schedulellvm::slpvectorizer::BoUpSLP::BlockScheduling22120b57cec5SDimitry Andric void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
22130b57cec5SDimitry Andric SD->IsScheduled = true;
22140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: schedule " << *SD << "\n");
22150b57cec5SDimitry Andric
22160b57cec5SDimitry Andric ScheduleData *BundleMember = SD;
22170b57cec5SDimitry Andric while (BundleMember) {
22180b57cec5SDimitry Andric if (BundleMember->Inst != BundleMember->OpValue) {
22190b57cec5SDimitry Andric BundleMember = BundleMember->NextInBundle;
22200b57cec5SDimitry Andric continue;
22210b57cec5SDimitry Andric }
22220b57cec5SDimitry Andric // Handle the def-use chain dependencies.
22238bcb0991SDimitry Andric
22248bcb0991SDimitry Andric // Decrement the unscheduled counter and insert to ready list if ready.
22258bcb0991SDimitry Andric auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
22260b57cec5SDimitry Andric doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
22270b57cec5SDimitry Andric if (OpDef && OpDef->hasValidDependencies() &&
22280b57cec5SDimitry Andric OpDef->incrementUnscheduledDeps(-1) == 0) {
22290b57cec5SDimitry Andric // There are no more unscheduled dependencies after
22300b57cec5SDimitry Andric // decrementing, so we can put the dependent instruction
22310b57cec5SDimitry Andric // into the ready list.
22320b57cec5SDimitry Andric ScheduleData *DepBundle = OpDef->FirstInBundle;
22330b57cec5SDimitry Andric assert(!DepBundle->IsScheduled &&
22340b57cec5SDimitry Andric "already scheduled bundle gets ready");
22350b57cec5SDimitry Andric ReadyList.insert(DepBundle);
22360b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
22370b57cec5SDimitry Andric << "SLP: gets ready (def): " << *DepBundle << "\n");
22380b57cec5SDimitry Andric }
22390b57cec5SDimitry Andric });
22408bcb0991SDimitry Andric };
22418bcb0991SDimitry Andric
22428bcb0991SDimitry Andric // If BundleMember is a vector bundle, its operands may have been
22438bcb0991SDimitry Andric // reordered duiring buildTree(). We therefore need to get its operands
22448bcb0991SDimitry Andric // through the TreeEntry.
22458bcb0991SDimitry Andric if (TreeEntry *TE = BundleMember->TE) {
22468bcb0991SDimitry Andric int Lane = BundleMember->Lane;
22478bcb0991SDimitry Andric assert(Lane >= 0 && "Lane not set");
22485ffd83dbSDimitry Andric
22495ffd83dbSDimitry Andric // Since vectorization tree is being built recursively this assertion
22505ffd83dbSDimitry Andric // ensures that the tree entry has all operands set before reaching
22515ffd83dbSDimitry Andric // this code. Couple of exceptions known at the moment are extracts
22525ffd83dbSDimitry Andric // where their second (immediate) operand is not added. Since
22535ffd83dbSDimitry Andric // immediates do not affect scheduler behavior this is considered
22545ffd83dbSDimitry Andric // okay.
22555ffd83dbSDimitry Andric auto *In = TE->getMainOp();
22565ffd83dbSDimitry Andric assert(In &&
22575ffd83dbSDimitry Andric (isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
22585ffd83dbSDimitry Andric In->getNumOperands() == TE->getNumOperands()) &&
22595ffd83dbSDimitry Andric "Missed TreeEntry operands?");
22605ffd83dbSDimitry Andric (void)In; // fake use to avoid build failure when assertions disabled
22615ffd83dbSDimitry Andric
22628bcb0991SDimitry Andric for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
22638bcb0991SDimitry Andric OpIdx != NumOperands; ++OpIdx)
22648bcb0991SDimitry Andric if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
22658bcb0991SDimitry Andric DecrUnsched(I);
22668bcb0991SDimitry Andric } else {
22678bcb0991SDimitry Andric // If BundleMember is a stand-alone instruction, no operand reordering
22688bcb0991SDimitry Andric // has taken place, so we directly access its operands.
22698bcb0991SDimitry Andric for (Use &U : BundleMember->Inst->operands())
22708bcb0991SDimitry Andric if (auto *I = dyn_cast<Instruction>(U.get()))
22718bcb0991SDimitry Andric DecrUnsched(I);
22720b57cec5SDimitry Andric }
22730b57cec5SDimitry Andric // Handle the memory dependencies.
22740b57cec5SDimitry Andric for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
22750b57cec5SDimitry Andric if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
22760b57cec5SDimitry Andric // There are no more unscheduled dependencies after decrementing,
22770b57cec5SDimitry Andric // so we can put the dependent instruction into the ready list.
22780b57cec5SDimitry Andric ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
22790b57cec5SDimitry Andric assert(!DepBundle->IsScheduled &&
22800b57cec5SDimitry Andric "already scheduled bundle gets ready");
22810b57cec5SDimitry Andric ReadyList.insert(DepBundle);
22820b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
22830b57cec5SDimitry Andric << "SLP: gets ready (mem): " << *DepBundle << "\n");
22840b57cec5SDimitry Andric }
22850b57cec5SDimitry Andric }
22860b57cec5SDimitry Andric BundleMember = BundleMember->NextInBundle;
22870b57cec5SDimitry Andric }
22880b57cec5SDimitry Andric }
22890b57cec5SDimitry Andric
doForAllOpcodesllvm::slpvectorizer::BoUpSLP::BlockScheduling22900b57cec5SDimitry Andric void doForAllOpcodes(Value *V,
22910b57cec5SDimitry Andric function_ref<void(ScheduleData *SD)> Action) {
22920b57cec5SDimitry Andric if (ScheduleData *SD = getScheduleData(V))
22930b57cec5SDimitry Andric Action(SD);
22940b57cec5SDimitry Andric auto I = ExtraScheduleDataMap.find(V);
22950b57cec5SDimitry Andric if (I != ExtraScheduleDataMap.end())
22960b57cec5SDimitry Andric for (auto &P : I->second)
22970b57cec5SDimitry Andric if (P.second->SchedulingRegionID == SchedulingRegionID)
22980b57cec5SDimitry Andric Action(P.second);
22990b57cec5SDimitry Andric }
23000b57cec5SDimitry Andric
23010b57cec5SDimitry Andric /// Put all instructions into the ReadyList which are ready for scheduling.
23020b57cec5SDimitry Andric template <typename ReadyListType>
initialFillReadyListllvm::slpvectorizer::BoUpSLP::BlockScheduling23030b57cec5SDimitry Andric void initialFillReadyList(ReadyListType &ReadyList) {
23040b57cec5SDimitry Andric for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
23050b57cec5SDimitry Andric doForAllOpcodes(I, [&](ScheduleData *SD) {
23060b57cec5SDimitry Andric if (SD->isSchedulingEntity() && SD->isReady()) {
23070b57cec5SDimitry Andric ReadyList.insert(SD);
23080b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
23090b57cec5SDimitry Andric << "SLP: initially in ready list: " << *I << "\n");
23100b57cec5SDimitry Andric }
23110b57cec5SDimitry Andric });
23120b57cec5SDimitry Andric }
23130b57cec5SDimitry Andric }
23140b57cec5SDimitry Andric
23150b57cec5SDimitry Andric /// Checks if a bundle of instructions can be scheduled, i.e. has no
23160b57cec5SDimitry Andric /// cyclic dependencies. This is only a dry-run, no instructions are
23170b57cec5SDimitry Andric /// actually moved at this stage.
23188bcb0991SDimitry Andric /// \returns the scheduling bundle. The returned Optional value is non-None
23198bcb0991SDimitry Andric /// if \p VL is allowed to be scheduled.
23208bcb0991SDimitry Andric Optional<ScheduleData *>
23218bcb0991SDimitry Andric tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
23220b57cec5SDimitry Andric const InstructionsState &S);
23230b57cec5SDimitry Andric
23240b57cec5SDimitry Andric /// Un-bundles a group of instructions.
23250b57cec5SDimitry Andric void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
23260b57cec5SDimitry Andric
23270b57cec5SDimitry Andric /// Allocates schedule data chunk.
23280b57cec5SDimitry Andric ScheduleData *allocateScheduleDataChunks();
23290b57cec5SDimitry Andric
23300b57cec5SDimitry Andric /// Extends the scheduling region so that V is inside the region.
23310b57cec5SDimitry Andric /// \returns true if the region size is within the limit.
23320b57cec5SDimitry Andric bool extendSchedulingRegion(Value *V, const InstructionsState &S);
23330b57cec5SDimitry Andric
23340b57cec5SDimitry Andric /// Initialize the ScheduleData structures for new instructions in the
23350b57cec5SDimitry Andric /// scheduling region.
23360b57cec5SDimitry Andric void initScheduleData(Instruction *FromI, Instruction *ToI,
23370b57cec5SDimitry Andric ScheduleData *PrevLoadStore,
23380b57cec5SDimitry Andric ScheduleData *NextLoadStore);
23390b57cec5SDimitry Andric
23400b57cec5SDimitry Andric /// Updates the dependency information of a bundle and of all instructions/
23410b57cec5SDimitry Andric /// bundles which depend on the original bundle.
23420b57cec5SDimitry Andric void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
23430b57cec5SDimitry Andric BoUpSLP *SLP);
23440b57cec5SDimitry Andric
23450b57cec5SDimitry Andric /// Sets all instruction in the scheduling region to un-scheduled.
23460b57cec5SDimitry Andric void resetSchedule();
23470b57cec5SDimitry Andric
23480b57cec5SDimitry Andric BasicBlock *BB;
23490b57cec5SDimitry Andric
23500b57cec5SDimitry Andric /// Simple memory allocation for ScheduleData.
23510b57cec5SDimitry Andric std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
23520b57cec5SDimitry Andric
23530b57cec5SDimitry Andric /// The size of a ScheduleData array in ScheduleDataChunks.
23540b57cec5SDimitry Andric int ChunkSize;
23550b57cec5SDimitry Andric
23560b57cec5SDimitry Andric /// The allocator position in the current chunk, which is the last entry
23570b57cec5SDimitry Andric /// of ScheduleDataChunks.
23580b57cec5SDimitry Andric int ChunkPos;
23590b57cec5SDimitry Andric
23600b57cec5SDimitry Andric /// Attaches ScheduleData to Instruction.
23610b57cec5SDimitry Andric /// Note that the mapping survives during all vectorization iterations, i.e.
23620b57cec5SDimitry Andric /// ScheduleData structures are recycled.
23630b57cec5SDimitry Andric DenseMap<Value *, ScheduleData *> ScheduleDataMap;
23640b57cec5SDimitry Andric
23650b57cec5SDimitry Andric /// Attaches ScheduleData to Instruction with the leading key.
23660b57cec5SDimitry Andric DenseMap<Value *, SmallDenseMap<Value *, ScheduleData *>>
23670b57cec5SDimitry Andric ExtraScheduleDataMap;
23680b57cec5SDimitry Andric
23690b57cec5SDimitry Andric struct ReadyList : SmallVector<ScheduleData *, 8> {
insertllvm::slpvectorizer::BoUpSLP::BlockScheduling::ReadyList23700b57cec5SDimitry Andric void insert(ScheduleData *SD) { push_back(SD); }
23710b57cec5SDimitry Andric };
23720b57cec5SDimitry Andric
23730b57cec5SDimitry Andric /// The ready-list for scheduling (only used for the dry-run).
23740b57cec5SDimitry Andric ReadyList ReadyInsts;
23750b57cec5SDimitry Andric
23760b57cec5SDimitry Andric /// The first instruction of the scheduling region.
23770b57cec5SDimitry Andric Instruction *ScheduleStart = nullptr;
23780b57cec5SDimitry Andric
23790b57cec5SDimitry Andric /// The first instruction _after_ the scheduling region.
23800b57cec5SDimitry Andric Instruction *ScheduleEnd = nullptr;
23810b57cec5SDimitry Andric
23820b57cec5SDimitry Andric /// The first memory accessing instruction in the scheduling region
23830b57cec5SDimitry Andric /// (can be null).
23840b57cec5SDimitry Andric ScheduleData *FirstLoadStoreInRegion = nullptr;
23850b57cec5SDimitry Andric
23860b57cec5SDimitry Andric /// The last memory accessing instruction in the scheduling region
23870b57cec5SDimitry Andric /// (can be null).
23880b57cec5SDimitry Andric ScheduleData *LastLoadStoreInRegion = nullptr;
23890b57cec5SDimitry Andric
23900b57cec5SDimitry Andric /// The current size of the scheduling region.
23910b57cec5SDimitry Andric int ScheduleRegionSize = 0;
23920b57cec5SDimitry Andric
23930b57cec5SDimitry Andric /// The maximum size allowed for the scheduling region.
23940b57cec5SDimitry Andric int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
23950b57cec5SDimitry Andric
23960b57cec5SDimitry Andric /// The ID of the scheduling region. For a new vectorization iteration this
23970b57cec5SDimitry Andric /// is incremented which "removes" all ScheduleData from the region.
23980b57cec5SDimitry Andric // Make sure that the initial SchedulingRegionID is greater than the
23990b57cec5SDimitry Andric // initial SchedulingRegionID in ScheduleData (which is 0).
24000b57cec5SDimitry Andric int SchedulingRegionID = 1;
24010b57cec5SDimitry Andric };
24020b57cec5SDimitry Andric
24030b57cec5SDimitry Andric /// Attaches the BlockScheduling structures to basic blocks.
24040b57cec5SDimitry Andric MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;
24050b57cec5SDimitry Andric
24060b57cec5SDimitry Andric /// Performs the "real" scheduling. Done before vectorization is actually
24070b57cec5SDimitry Andric /// performed in a basic block.
24080b57cec5SDimitry Andric void scheduleBlock(BlockScheduling *BS);
24090b57cec5SDimitry Andric
24100b57cec5SDimitry Andric /// List of users to ignore during scheduling and that don't need extracting.
24110b57cec5SDimitry Andric ArrayRef<Value *> UserIgnoreList;
24120b57cec5SDimitry Andric
24130b57cec5SDimitry Andric /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
24140b57cec5SDimitry Andric /// sorted SmallVectors of unsigned.
24150b57cec5SDimitry Andric struct OrdersTypeDenseMapInfo {
getEmptyKeyllvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24160b57cec5SDimitry Andric static OrdersType getEmptyKey() {
24170b57cec5SDimitry Andric OrdersType V;
24180b57cec5SDimitry Andric V.push_back(~1U);
24190b57cec5SDimitry Andric return V;
24200b57cec5SDimitry Andric }
24210b57cec5SDimitry Andric
getTombstoneKeyllvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24220b57cec5SDimitry Andric static OrdersType getTombstoneKey() {
24230b57cec5SDimitry Andric OrdersType V;
24240b57cec5SDimitry Andric V.push_back(~2U);
24250b57cec5SDimitry Andric return V;
24260b57cec5SDimitry Andric }
24270b57cec5SDimitry Andric
getHashValuellvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24280b57cec5SDimitry Andric static unsigned getHashValue(const OrdersType &V) {
24290b57cec5SDimitry Andric return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
24300b57cec5SDimitry Andric }
24310b57cec5SDimitry Andric
isEqualllvm::slpvectorizer::BoUpSLP::OrdersTypeDenseMapInfo24320b57cec5SDimitry Andric static bool isEqual(const OrdersType &LHS, const OrdersType &RHS) {
24330b57cec5SDimitry Andric return LHS == RHS;
24340b57cec5SDimitry Andric }
24350b57cec5SDimitry Andric };
24360b57cec5SDimitry Andric
24370b57cec5SDimitry Andric /// Contains orders of operations along with the number of bundles that have
24380b57cec5SDimitry Andric /// operations in this order. It stores only those orders that require
24390b57cec5SDimitry Andric /// reordering, if reordering is not required it is counted using \a
24400b57cec5SDimitry Andric /// NumOpsWantToKeepOriginalOrder.
24410b57cec5SDimitry Andric DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> NumOpsWantToKeepOrder;
24420b57cec5SDimitry Andric /// Number of bundles that do not require reordering.
24430b57cec5SDimitry Andric unsigned NumOpsWantToKeepOriginalOrder = 0;
24440b57cec5SDimitry Andric
24450b57cec5SDimitry Andric // Analysis and block reference.
24460b57cec5SDimitry Andric Function *F;
24470b57cec5SDimitry Andric ScalarEvolution *SE;
24480b57cec5SDimitry Andric TargetTransformInfo *TTI;
24490b57cec5SDimitry Andric TargetLibraryInfo *TLI;
2450af732203SDimitry Andric AAResults *AA;
24510b57cec5SDimitry Andric LoopInfo *LI;
24520b57cec5SDimitry Andric DominatorTree *DT;
24530b57cec5SDimitry Andric AssumptionCache *AC;
24540b57cec5SDimitry Andric DemandedBits *DB;
24550b57cec5SDimitry Andric const DataLayout *DL;
24560b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE;
24570b57cec5SDimitry Andric
24580b57cec5SDimitry Andric unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
24590b57cec5SDimitry Andric unsigned MinVecRegSize; // Set by cl::opt (default: 128).
24600b57cec5SDimitry Andric
24610b57cec5SDimitry Andric /// Instruction builder to construct the vectorized tree.
24620b57cec5SDimitry Andric IRBuilder<> Builder;
24630b57cec5SDimitry Andric
24640b57cec5SDimitry Andric /// A map of scalar integer values to the smallest bit width with which they
24650b57cec5SDimitry Andric /// can legally be represented. The values map to (width, signed) pairs,
24660b57cec5SDimitry Andric /// where "width" indicates the minimum bit width and "signed" is True if the
24670b57cec5SDimitry Andric /// value must be signed-extended, rather than zero-extended, back to its
24680b57cec5SDimitry Andric /// original width.
24690b57cec5SDimitry Andric MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
24700b57cec5SDimitry Andric };
24710b57cec5SDimitry Andric
24720b57cec5SDimitry Andric } // end namespace slpvectorizer
24730b57cec5SDimitry Andric
24740b57cec5SDimitry Andric template <> struct GraphTraits<BoUpSLP *> {
24750b57cec5SDimitry Andric using TreeEntry = BoUpSLP::TreeEntry;
24760b57cec5SDimitry Andric
24770b57cec5SDimitry Andric /// NodeRef has to be a pointer per the GraphWriter.
24780b57cec5SDimitry Andric using NodeRef = TreeEntry *;
24790b57cec5SDimitry Andric
24800b57cec5SDimitry Andric using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;
24810b57cec5SDimitry Andric
24820b57cec5SDimitry Andric /// Add the VectorizableTree to the index iterator to be able to return
24830b57cec5SDimitry Andric /// TreeEntry pointers.
24840b57cec5SDimitry Andric struct ChildIteratorType
24850b57cec5SDimitry Andric : public iterator_adaptor_base<
24860b57cec5SDimitry Andric ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
24870b57cec5SDimitry Andric ContainerTy &VectorizableTree;
24880b57cec5SDimitry Andric
ChildIteratorTypellvm::GraphTraits::ChildIteratorType24890b57cec5SDimitry Andric ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
24900b57cec5SDimitry Andric ContainerTy &VT)
24910b57cec5SDimitry Andric : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
24920b57cec5SDimitry Andric
operator *llvm::GraphTraits::ChildIteratorType24930b57cec5SDimitry Andric NodeRef operator*() { return I->UserTE; }
24940b57cec5SDimitry Andric };
24950b57cec5SDimitry Andric
getEntryNodellvm::GraphTraits24960b57cec5SDimitry Andric static NodeRef getEntryNode(BoUpSLP &R) {
24970b57cec5SDimitry Andric return R.VectorizableTree[0].get();
24980b57cec5SDimitry Andric }
24990b57cec5SDimitry Andric
child_beginllvm::GraphTraits25000b57cec5SDimitry Andric static ChildIteratorType child_begin(NodeRef N) {
25010b57cec5SDimitry Andric return {N->UserTreeIndices.begin(), N->Container};
25020b57cec5SDimitry Andric }
25030b57cec5SDimitry Andric
child_endllvm::GraphTraits25040b57cec5SDimitry Andric static ChildIteratorType child_end(NodeRef N) {
25050b57cec5SDimitry Andric return {N->UserTreeIndices.end(), N->Container};
25060b57cec5SDimitry Andric }
25070b57cec5SDimitry Andric
25080b57cec5SDimitry Andric /// For the node iterator we just need to turn the TreeEntry iterator into a
25090b57cec5SDimitry Andric /// TreeEntry* iterator so that it dereferences to NodeRef.
25100b57cec5SDimitry Andric class nodes_iterator {
25110b57cec5SDimitry Andric using ItTy = ContainerTy::iterator;
25120b57cec5SDimitry Andric ItTy It;
25130b57cec5SDimitry Andric
25140b57cec5SDimitry Andric public:
nodes_iterator(const ItTy & It2)25150b57cec5SDimitry Andric nodes_iterator(const ItTy &It2) : It(It2) {}
operator *()25160b57cec5SDimitry Andric NodeRef operator*() { return It->get(); }
operator ++()25170b57cec5SDimitry Andric nodes_iterator operator++() {
25180b57cec5SDimitry Andric ++It;
25190b57cec5SDimitry Andric return *this;
25200b57cec5SDimitry Andric }
operator !=(const nodes_iterator & N2) const25210b57cec5SDimitry Andric bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
25220b57cec5SDimitry Andric };
25230b57cec5SDimitry Andric
nodes_beginllvm::GraphTraits25240b57cec5SDimitry Andric static nodes_iterator nodes_begin(BoUpSLP *R) {
25250b57cec5SDimitry Andric return nodes_iterator(R->VectorizableTree.begin());
25260b57cec5SDimitry Andric }
25270b57cec5SDimitry Andric
nodes_endllvm::GraphTraits25280b57cec5SDimitry Andric static nodes_iterator nodes_end(BoUpSLP *R) {
25290b57cec5SDimitry Andric return nodes_iterator(R->VectorizableTree.end());
25300b57cec5SDimitry Andric }
25310b57cec5SDimitry Andric
sizellvm::GraphTraits25320b57cec5SDimitry Andric static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
25330b57cec5SDimitry Andric };
25340b57cec5SDimitry Andric
25350b57cec5SDimitry Andric template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
25360b57cec5SDimitry Andric using TreeEntry = BoUpSLP::TreeEntry;
25370b57cec5SDimitry Andric
DOTGraphTraitsllvm::DOTGraphTraits25380b57cec5SDimitry Andric DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
25390b57cec5SDimitry Andric
getNodeLabelllvm::DOTGraphTraits25400b57cec5SDimitry Andric std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
25410b57cec5SDimitry Andric std::string Str;
25420b57cec5SDimitry Andric raw_string_ostream OS(Str);
25430b57cec5SDimitry Andric if (isSplat(Entry->Scalars)) {
25440b57cec5SDimitry Andric OS << "<splat> " << *Entry->Scalars[0];
25450b57cec5SDimitry Andric return Str;
25460b57cec5SDimitry Andric }
25470b57cec5SDimitry Andric for (auto V : Entry->Scalars) {
25480b57cec5SDimitry Andric OS << *V;
2549af732203SDimitry Andric if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
2550af732203SDimitry Andric return EU.Scalar == V;
2551af732203SDimitry Andric }))
25520b57cec5SDimitry Andric OS << " <extract>";
25530b57cec5SDimitry Andric OS << "\n";
25540b57cec5SDimitry Andric }
25550b57cec5SDimitry Andric return Str;
25560b57cec5SDimitry Andric }
25570b57cec5SDimitry Andric
getNodeAttributesllvm::DOTGraphTraits25580b57cec5SDimitry Andric static std::string getNodeAttributes(const TreeEntry *Entry,
25590b57cec5SDimitry Andric const BoUpSLP *) {
2560480093f4SDimitry Andric if (Entry->State == TreeEntry::NeedToGather)
25610b57cec5SDimitry Andric return "color=red";
25620b57cec5SDimitry Andric return "";
25630b57cec5SDimitry Andric }
25640b57cec5SDimitry Andric };
25650b57cec5SDimitry Andric
25660b57cec5SDimitry Andric } // end namespace llvm
25670b57cec5SDimitry Andric
~BoUpSLP()25688bcb0991SDimitry Andric BoUpSLP::~BoUpSLP() {
25698bcb0991SDimitry Andric for (const auto &Pair : DeletedInstructions) {
25708bcb0991SDimitry Andric // Replace operands of ignored instructions with Undefs in case if they were
25718bcb0991SDimitry Andric // marked for deletion.
25728bcb0991SDimitry Andric if (Pair.getSecond()) {
25738bcb0991SDimitry Andric Value *Undef = UndefValue::get(Pair.getFirst()->getType());
25748bcb0991SDimitry Andric Pair.getFirst()->replaceAllUsesWith(Undef);
25758bcb0991SDimitry Andric }
25768bcb0991SDimitry Andric Pair.getFirst()->dropAllReferences();
25778bcb0991SDimitry Andric }
25788bcb0991SDimitry Andric for (const auto &Pair : DeletedInstructions) {
25798bcb0991SDimitry Andric assert(Pair.getFirst()->use_empty() &&
25808bcb0991SDimitry Andric "trying to erase instruction with users.");
25818bcb0991SDimitry Andric Pair.getFirst()->eraseFromParent();
25828bcb0991SDimitry Andric }
2583af732203SDimitry Andric #ifdef EXPENSIVE_CHECKS
2584af732203SDimitry Andric // If we could guarantee that this call is not extremely slow, we could
2585af732203SDimitry Andric // remove the ifdef limitation (see PR47712).
25865ffd83dbSDimitry Andric assert(!verifyFunction(*F, &dbgs()));
2587af732203SDimitry Andric #endif
25888bcb0991SDimitry Andric }
25898bcb0991SDimitry Andric
eraseInstructions(ArrayRef<Value * > AV)25908bcb0991SDimitry Andric void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
25918bcb0991SDimitry Andric for (auto *V : AV) {
25928bcb0991SDimitry Andric if (auto *I = dyn_cast<Instruction>(V))
2593af732203SDimitry Andric eraseInstruction(I, /*ReplaceOpsWithUndef=*/true);
25948bcb0991SDimitry Andric };
25958bcb0991SDimitry Andric }
25968bcb0991SDimitry Andric
buildTree(ArrayRef<Value * > Roots,ArrayRef<Value * > UserIgnoreLst)25970b57cec5SDimitry Andric void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
25980b57cec5SDimitry Andric ArrayRef<Value *> UserIgnoreLst) {
25990b57cec5SDimitry Andric ExtraValueToDebugLocsMap ExternallyUsedValues;
26000b57cec5SDimitry Andric buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
26010b57cec5SDimitry Andric }
26020b57cec5SDimitry Andric
buildTree(ArrayRef<Value * > Roots,ExtraValueToDebugLocsMap & ExternallyUsedValues,ArrayRef<Value * > UserIgnoreLst)26030b57cec5SDimitry Andric void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
26040b57cec5SDimitry Andric ExtraValueToDebugLocsMap &ExternallyUsedValues,
26050b57cec5SDimitry Andric ArrayRef<Value *> UserIgnoreLst) {
26060b57cec5SDimitry Andric deleteTree();
26070b57cec5SDimitry Andric UserIgnoreList = UserIgnoreLst;
26080b57cec5SDimitry Andric if (!allSameType(Roots))
26090b57cec5SDimitry Andric return;
26100b57cec5SDimitry Andric buildTree_rec(Roots, 0, EdgeInfo());
26110b57cec5SDimitry Andric
26120b57cec5SDimitry Andric // Collect the values that we need to extract from the tree.
26130b57cec5SDimitry Andric for (auto &TEPtr : VectorizableTree) {
26140b57cec5SDimitry Andric TreeEntry *Entry = TEPtr.get();
26150b57cec5SDimitry Andric
26160b57cec5SDimitry Andric // No need to handle users of gathered values.
2617480093f4SDimitry Andric if (Entry->State == TreeEntry::NeedToGather)
26180b57cec5SDimitry Andric continue;
26190b57cec5SDimitry Andric
26200b57cec5SDimitry Andric // For each lane:
26210b57cec5SDimitry Andric for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
26220b57cec5SDimitry Andric Value *Scalar = Entry->Scalars[Lane];
26235f7ddb14SDimitry Andric int FoundLane = Entry->findLaneForValue(Scalar);
26240b57cec5SDimitry Andric
26250b57cec5SDimitry Andric // Check if the scalar is externally used as an extra arg.
26260b57cec5SDimitry Andric auto ExtI = ExternallyUsedValues.find(Scalar);
26270b57cec5SDimitry Andric if (ExtI != ExternallyUsedValues.end()) {
26280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
26290b57cec5SDimitry Andric << Lane << " from " << *Scalar << ".\n");
26300b57cec5SDimitry Andric ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
26310b57cec5SDimitry Andric }
26320b57cec5SDimitry Andric for (User *U : Scalar->users()) {
26330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
26340b57cec5SDimitry Andric
26350b57cec5SDimitry Andric Instruction *UserInst = dyn_cast<Instruction>(U);
26360b57cec5SDimitry Andric if (!UserInst)
26370b57cec5SDimitry Andric continue;
26380b57cec5SDimitry Andric
26390b57cec5SDimitry Andric // Skip in-tree scalars that become vectors
26400b57cec5SDimitry Andric if (TreeEntry *UseEntry = getTreeEntry(U)) {
26410b57cec5SDimitry Andric Value *UseScalar = UseEntry->Scalars[0];
26420b57cec5SDimitry Andric // Some in-tree scalars will remain as scalar in vectorized
26430b57cec5SDimitry Andric // instructions. If that is the case, the one in Lane 0 will
26440b57cec5SDimitry Andric // be used.
26450b57cec5SDimitry Andric if (UseScalar != U ||
26465f7ddb14SDimitry Andric UseEntry->State == TreeEntry::ScatterVectorize ||
26470b57cec5SDimitry Andric !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
26480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
26490b57cec5SDimitry Andric << ".\n");
2650480093f4SDimitry Andric assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
26510b57cec5SDimitry Andric continue;
26520b57cec5SDimitry Andric }
26530b57cec5SDimitry Andric }
26540b57cec5SDimitry Andric
26550b57cec5SDimitry Andric // Ignore users in the user ignore list.
26560b57cec5SDimitry Andric if (is_contained(UserIgnoreList, UserInst))
26570b57cec5SDimitry Andric continue;
26580b57cec5SDimitry Andric
26590b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
26600b57cec5SDimitry Andric << Lane << " from " << *Scalar << ".\n");
26610b57cec5SDimitry Andric ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane));
26620b57cec5SDimitry Andric }
26630b57cec5SDimitry Andric }
26640b57cec5SDimitry Andric }
26650b57cec5SDimitry Andric }
26660b57cec5SDimitry Andric
buildTree_rec(ArrayRef<Value * > VL,unsigned Depth,const EdgeInfo & UserTreeIdx)26670b57cec5SDimitry Andric void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
26680b57cec5SDimitry Andric const EdgeInfo &UserTreeIdx) {
26690b57cec5SDimitry Andric assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
26700b57cec5SDimitry Andric
26710b57cec5SDimitry Andric InstructionsState S = getSameOpcode(VL);
26720b57cec5SDimitry Andric if (Depth == RecursionMaxDepth) {
26730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
26748bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26750b57cec5SDimitry Andric return;
26760b57cec5SDimitry Andric }
26770b57cec5SDimitry Andric
26785f7ddb14SDimitry Andric // Don't handle scalable vectors
26795f7ddb14SDimitry Andric if (S.getOpcode() == Instruction::ExtractElement &&
26805f7ddb14SDimitry Andric isa<ScalableVectorType>(
26815f7ddb14SDimitry Andric cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
26825f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
26835f7ddb14SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26845f7ddb14SDimitry Andric return;
26855f7ddb14SDimitry Andric }
26865f7ddb14SDimitry Andric
26870b57cec5SDimitry Andric // Don't handle vectors.
26885f7ddb14SDimitry Andric if (S.OpValue->getType()->isVectorTy() &&
26895f7ddb14SDimitry Andric !isa<InsertElementInst>(S.OpValue)) {
26900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
26918bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26920b57cec5SDimitry Andric return;
26930b57cec5SDimitry Andric }
26940b57cec5SDimitry Andric
26950b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
26960b57cec5SDimitry Andric if (SI->getValueOperand()->getType()->isVectorTy()) {
26970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
26988bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
26990b57cec5SDimitry Andric return;
27000b57cec5SDimitry Andric }
27010b57cec5SDimitry Andric
27020b57cec5SDimitry Andric // If all of the operands are identical or constant we have a simple solution.
27030b57cec5SDimitry Andric if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
27040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
27058bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27060b57cec5SDimitry Andric return;
27070b57cec5SDimitry Andric }
27080b57cec5SDimitry Andric
27090b57cec5SDimitry Andric // We now know that this is a vector of instructions of the same type from
27100b57cec5SDimitry Andric // the same block.
27110b57cec5SDimitry Andric
27120b57cec5SDimitry Andric // Don't vectorize ephemeral values.
27138bcb0991SDimitry Andric for (Value *V : VL) {
27148bcb0991SDimitry Andric if (EphValues.count(V)) {
27158bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
27160b57cec5SDimitry Andric << ") is ephemeral.\n");
27178bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27180b57cec5SDimitry Andric return;
27190b57cec5SDimitry Andric }
27200b57cec5SDimitry Andric }
27210b57cec5SDimitry Andric
27220b57cec5SDimitry Andric // Check if this is a duplicate of another entry.
27230b57cec5SDimitry Andric if (TreeEntry *E = getTreeEntry(S.OpValue)) {
27240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
27250b57cec5SDimitry Andric if (!E->isSame(VL)) {
27260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
27278bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27280b57cec5SDimitry Andric return;
27290b57cec5SDimitry Andric }
27300b57cec5SDimitry Andric // Record the reuse of the tree node. FIXME, currently this is only used to
27310b57cec5SDimitry Andric // properly draw the graph rather than for the actual vectorization.
27320b57cec5SDimitry Andric E->UserTreeIndices.push_back(UserTreeIdx);
27330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
27340b57cec5SDimitry Andric << ".\n");
27350b57cec5SDimitry Andric return;
27360b57cec5SDimitry Andric }
27370b57cec5SDimitry Andric
27380b57cec5SDimitry Andric // Check that none of the instructions in the bundle are already in the tree.
27398bcb0991SDimitry Andric for (Value *V : VL) {
27408bcb0991SDimitry Andric auto *I = dyn_cast<Instruction>(V);
27410b57cec5SDimitry Andric if (!I)
27420b57cec5SDimitry Andric continue;
27430b57cec5SDimitry Andric if (getTreeEntry(I)) {
27448bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
27450b57cec5SDimitry Andric << ") is already in tree.\n");
27468bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27470b57cec5SDimitry Andric return;
27480b57cec5SDimitry Andric }
27490b57cec5SDimitry Andric }
27500b57cec5SDimitry Andric
27510b57cec5SDimitry Andric // If any of the scalars is marked as a value that needs to stay scalar, then
27520b57cec5SDimitry Andric // we need to gather the scalars.
27530b57cec5SDimitry Andric // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
27548bcb0991SDimitry Andric for (Value *V : VL) {
27558bcb0991SDimitry Andric if (MustGather.count(V) || is_contained(UserIgnoreList, V)) {
27560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
27578bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27580b57cec5SDimitry Andric return;
27590b57cec5SDimitry Andric }
27600b57cec5SDimitry Andric }
27610b57cec5SDimitry Andric
27620b57cec5SDimitry Andric // Check that all of the users of the scalars that we want to vectorize are
27630b57cec5SDimitry Andric // schedulable.
27640b57cec5SDimitry Andric auto *VL0 = cast<Instruction>(S.OpValue);
27650b57cec5SDimitry Andric BasicBlock *BB = VL0->getParent();
27660b57cec5SDimitry Andric
27670b57cec5SDimitry Andric if (!DT->isReachableFromEntry(BB)) {
27680b57cec5SDimitry Andric // Don't go into unreachable blocks. They may contain instructions with
27690b57cec5SDimitry Andric // dependency cycles which confuse the final scheduling.
27700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
27718bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27720b57cec5SDimitry Andric return;
27730b57cec5SDimitry Andric }
27740b57cec5SDimitry Andric
27750b57cec5SDimitry Andric // Check that every instruction appears once in this bundle.
27760b57cec5SDimitry Andric SmallVector<unsigned, 4> ReuseShuffleIndicies;
27770b57cec5SDimitry Andric SmallVector<Value *, 4> UniqueValues;
27780b57cec5SDimitry Andric DenseMap<Value *, unsigned> UniquePositions;
27790b57cec5SDimitry Andric for (Value *V : VL) {
27800b57cec5SDimitry Andric auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
27810b57cec5SDimitry Andric ReuseShuffleIndicies.emplace_back(Res.first->second);
27820b57cec5SDimitry Andric if (Res.second)
27830b57cec5SDimitry Andric UniqueValues.emplace_back(V);
27840b57cec5SDimitry Andric }
27858bcb0991SDimitry Andric size_t NumUniqueScalarValues = UniqueValues.size();
27868bcb0991SDimitry Andric if (NumUniqueScalarValues == VL.size()) {
27870b57cec5SDimitry Andric ReuseShuffleIndicies.clear();
27880b57cec5SDimitry Andric } else {
27890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
27908bcb0991SDimitry Andric if (NumUniqueScalarValues <= 1 ||
27918bcb0991SDimitry Andric !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
27920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
27938bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
27940b57cec5SDimitry Andric return;
27950b57cec5SDimitry Andric }
27960b57cec5SDimitry Andric VL = UniqueValues;
27970b57cec5SDimitry Andric }
27980b57cec5SDimitry Andric
27990b57cec5SDimitry Andric auto &BSRef = BlocksSchedules[BB];
28000b57cec5SDimitry Andric if (!BSRef)
28018bcb0991SDimitry Andric BSRef = std::make_unique<BlockScheduling>(BB);
28020b57cec5SDimitry Andric
28030b57cec5SDimitry Andric BlockScheduling &BS = *BSRef.get();
28040b57cec5SDimitry Andric
28058bcb0991SDimitry Andric Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
28068bcb0991SDimitry Andric if (!Bundle) {
28070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
28080b57cec5SDimitry Andric assert((!BS.getScheduleData(VL0) ||
28090b57cec5SDimitry Andric !BS.getScheduleData(VL0)->isPartOfBundle()) &&
28100b57cec5SDimitry Andric "tryScheduleBundle should cancelScheduling on failure");
28118bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
28128bcb0991SDimitry Andric ReuseShuffleIndicies);
28130b57cec5SDimitry Andric return;
28140b57cec5SDimitry Andric }
28150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
28160b57cec5SDimitry Andric
28170b57cec5SDimitry Andric unsigned ShuffleOrOp = S.isAltShuffle() ?
28180b57cec5SDimitry Andric (unsigned) Instruction::ShuffleVector : S.getOpcode();
28190b57cec5SDimitry Andric switch (ShuffleOrOp) {
28200b57cec5SDimitry Andric case Instruction::PHI: {
28218bcb0991SDimitry Andric auto *PH = cast<PHINode>(VL0);
28220b57cec5SDimitry Andric
28230b57cec5SDimitry Andric // Check for terminator values (e.g. invoke).
2824af732203SDimitry Andric for (Value *V : VL)
2825af732203SDimitry Andric for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
28260b57cec5SDimitry Andric Instruction *Term = dyn_cast<Instruction>(
2827af732203SDimitry Andric cast<PHINode>(V)->getIncomingValueForBlock(
2828af732203SDimitry Andric PH->getIncomingBlock(I)));
28290b57cec5SDimitry Andric if (Term && Term->isTerminator()) {
28300b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
28310b57cec5SDimitry Andric << "SLP: Need to swizzle PHINodes (terminator use).\n");
28320b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
28338bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
28348bcb0991SDimitry Andric ReuseShuffleIndicies);
28350b57cec5SDimitry Andric return;
28360b57cec5SDimitry Andric }
28370b57cec5SDimitry Andric }
28380b57cec5SDimitry Andric
28398bcb0991SDimitry Andric TreeEntry *TE =
28408bcb0991SDimitry Andric newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies);
28410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
28420b57cec5SDimitry Andric
28438bcb0991SDimitry Andric // Keeps the reordered operands to avoid code duplication.
28448bcb0991SDimitry Andric SmallVector<ValueList, 2> OperandsVec;
2845af732203SDimitry Andric for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
28465f7ddb14SDimitry Andric if (!DT->isReachableFromEntry(PH->getIncomingBlock(I))) {
28475f7ddb14SDimitry Andric ValueList Operands(VL.size(), PoisonValue::get(PH->getType()));
28485f7ddb14SDimitry Andric TE->setOperand(I, Operands);
28495f7ddb14SDimitry Andric OperandsVec.push_back(Operands);
28505f7ddb14SDimitry Andric continue;
28515f7ddb14SDimitry Andric }
28520b57cec5SDimitry Andric ValueList Operands;
28530b57cec5SDimitry Andric // Prepare the operand vector.
2854af732203SDimitry Andric for (Value *V : VL)
2855af732203SDimitry Andric Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
2856af732203SDimitry Andric PH->getIncomingBlock(I)));
2857af732203SDimitry Andric TE->setOperand(I, Operands);
28588bcb0991SDimitry Andric OperandsVec.push_back(Operands);
28590b57cec5SDimitry Andric }
28608bcb0991SDimitry Andric for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
28618bcb0991SDimitry Andric buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
28620b57cec5SDimitry Andric return;
28630b57cec5SDimitry Andric }
28640b57cec5SDimitry Andric case Instruction::ExtractValue:
28650b57cec5SDimitry Andric case Instruction::ExtractElement: {
28660b57cec5SDimitry Andric OrdersType CurrentOrder;
28670b57cec5SDimitry Andric bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
28680b57cec5SDimitry Andric if (Reuse) {
28690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
28700b57cec5SDimitry Andric ++NumOpsWantToKeepOriginalOrder;
28718bcb0991SDimitry Andric newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
28720b57cec5SDimitry Andric ReuseShuffleIndicies);
28730b57cec5SDimitry Andric // This is a special case, as it does not gather, but at the same time
28740b57cec5SDimitry Andric // we are not extending buildTree_rec() towards the operands.
28750b57cec5SDimitry Andric ValueList Op0;
28760b57cec5SDimitry Andric Op0.assign(VL.size(), VL0->getOperand(0));
28778bcb0991SDimitry Andric VectorizableTree.back()->setOperand(0, Op0);
28780b57cec5SDimitry Andric return;
28790b57cec5SDimitry Andric }
28800b57cec5SDimitry Andric if (!CurrentOrder.empty()) {
28810b57cec5SDimitry Andric LLVM_DEBUG({
28820b57cec5SDimitry Andric dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
28830b57cec5SDimitry Andric "with order";
28840b57cec5SDimitry Andric for (unsigned Idx : CurrentOrder)
28850b57cec5SDimitry Andric dbgs() << " " << Idx;
28860b57cec5SDimitry Andric dbgs() << "\n";
28870b57cec5SDimitry Andric });
28880b57cec5SDimitry Andric // Insert new order with initial value 0, if it does not exist,
28890b57cec5SDimitry Andric // otherwise return the iterator to the existing one.
28908bcb0991SDimitry Andric newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
2891af732203SDimitry Andric ReuseShuffleIndicies, CurrentOrder);
2892af732203SDimitry Andric findRootOrder(CurrentOrder);
2893af732203SDimitry Andric ++NumOpsWantToKeepOrder[CurrentOrder];
28940b57cec5SDimitry Andric // This is a special case, as it does not gather, but at the same time
28950b57cec5SDimitry Andric // we are not extending buildTree_rec() towards the operands.
28960b57cec5SDimitry Andric ValueList Op0;
28970b57cec5SDimitry Andric Op0.assign(VL.size(), VL0->getOperand(0));
28988bcb0991SDimitry Andric VectorizableTree.back()->setOperand(0, Op0);
28990b57cec5SDimitry Andric return;
29000b57cec5SDimitry Andric }
29010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
29028bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29038bcb0991SDimitry Andric ReuseShuffleIndicies);
29040b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
29050b57cec5SDimitry Andric return;
29060b57cec5SDimitry Andric }
29075f7ddb14SDimitry Andric case Instruction::InsertElement: {
29085f7ddb14SDimitry Andric assert(ReuseShuffleIndicies.empty() && "All inserts should be unique");
29095f7ddb14SDimitry Andric
29105f7ddb14SDimitry Andric // Check that we have a buildvector and not a shuffle of 2 or more
29115f7ddb14SDimitry Andric // different vectors.
29125f7ddb14SDimitry Andric ValueSet SourceVectors;
29135f7ddb14SDimitry Andric for (Value *V : VL)
29145f7ddb14SDimitry Andric SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
29155f7ddb14SDimitry Andric
29165f7ddb14SDimitry Andric if (count_if(VL, [&SourceVectors](Value *V) {
29175f7ddb14SDimitry Andric return !SourceVectors.contains(V);
29185f7ddb14SDimitry Andric }) >= 2) {
29195f7ddb14SDimitry Andric // Found 2nd source vector - cancel.
29205f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
29215f7ddb14SDimitry Andric "different source vectors.\n");
29225f7ddb14SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29235f7ddb14SDimitry Andric ReuseShuffleIndicies);
29245f7ddb14SDimitry Andric BS.cancelScheduling(VL, VL0);
29255f7ddb14SDimitry Andric return;
29265f7ddb14SDimitry Andric }
29275f7ddb14SDimitry Andric
29285f7ddb14SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx);
29295f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
29305f7ddb14SDimitry Andric
29315f7ddb14SDimitry Andric constexpr int NumOps = 2;
29325f7ddb14SDimitry Andric ValueList VectorOperands[NumOps];
29335f7ddb14SDimitry Andric for (int I = 0; I < NumOps; ++I) {
29345f7ddb14SDimitry Andric for (Value *V : VL)
29355f7ddb14SDimitry Andric VectorOperands[I].push_back(cast<Instruction>(V)->getOperand(I));
29365f7ddb14SDimitry Andric
29375f7ddb14SDimitry Andric TE->setOperand(I, VectorOperands[I]);
29385f7ddb14SDimitry Andric }
29395f7ddb14SDimitry Andric buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, 0});
29405f7ddb14SDimitry Andric return;
29415f7ddb14SDimitry Andric }
29420b57cec5SDimitry Andric case Instruction::Load: {
29430b57cec5SDimitry Andric // Check that a vectorized load would load the same memory as a scalar
29440b57cec5SDimitry Andric // load. For example, we don't want to vectorize loads that are smaller
29450b57cec5SDimitry Andric // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
29460b57cec5SDimitry Andric // treats loading/storing it as an i8 struct. If we vectorize loads/stores
29470b57cec5SDimitry Andric // from such a struct, we read/write packed bits disagreeing with the
29480b57cec5SDimitry Andric // unvectorized version.
29490b57cec5SDimitry Andric Type *ScalarTy = VL0->getType();
29500b57cec5SDimitry Andric
29510b57cec5SDimitry Andric if (DL->getTypeSizeInBits(ScalarTy) !=
29520b57cec5SDimitry Andric DL->getTypeAllocSizeInBits(ScalarTy)) {
29530b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
29548bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29558bcb0991SDimitry Andric ReuseShuffleIndicies);
29560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
29570b57cec5SDimitry Andric return;
29580b57cec5SDimitry Andric }
29590b57cec5SDimitry Andric
29600b57cec5SDimitry Andric // Make sure all loads in the bundle are simple - we can't vectorize
29610b57cec5SDimitry Andric // atomic or volatile loads.
29620b57cec5SDimitry Andric SmallVector<Value *, 4> PointerOps(VL.size());
29630b57cec5SDimitry Andric auto POIter = PointerOps.begin();
29640b57cec5SDimitry Andric for (Value *V : VL) {
29650b57cec5SDimitry Andric auto *L = cast<LoadInst>(V);
29660b57cec5SDimitry Andric if (!L->isSimple()) {
29670b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
29688bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
29698bcb0991SDimitry Andric ReuseShuffleIndicies);
29700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
29710b57cec5SDimitry Andric return;
29720b57cec5SDimitry Andric }
29730b57cec5SDimitry Andric *POIter = L->getPointerOperand();
29740b57cec5SDimitry Andric ++POIter;
29750b57cec5SDimitry Andric }
29760b57cec5SDimitry Andric
29770b57cec5SDimitry Andric OrdersType CurrentOrder;
29780b57cec5SDimitry Andric // Check the order of pointer operands.
29795f7ddb14SDimitry Andric if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
29800b57cec5SDimitry Andric Value *Ptr0;
29810b57cec5SDimitry Andric Value *PtrN;
29820b57cec5SDimitry Andric if (CurrentOrder.empty()) {
29830b57cec5SDimitry Andric Ptr0 = PointerOps.front();
29840b57cec5SDimitry Andric PtrN = PointerOps.back();
29850b57cec5SDimitry Andric } else {
29860b57cec5SDimitry Andric Ptr0 = PointerOps[CurrentOrder.front()];
29870b57cec5SDimitry Andric PtrN = PointerOps[CurrentOrder.back()];
29880b57cec5SDimitry Andric }
29895f7ddb14SDimitry Andric Optional<int> Diff = getPointersDiff(
29905f7ddb14SDimitry Andric ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
29910b57cec5SDimitry Andric // Check that the sorted loads are consecutive.
29925f7ddb14SDimitry Andric if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
29930b57cec5SDimitry Andric if (CurrentOrder.empty()) {
29940b57cec5SDimitry Andric // Original loads are consecutive and does not require reordering.
29950b57cec5SDimitry Andric ++NumOpsWantToKeepOriginalOrder;
29968bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
29978bcb0991SDimitry Andric UserTreeIdx, ReuseShuffleIndicies);
29988bcb0991SDimitry Andric TE->setOperandsInOrder();
29990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
30000b57cec5SDimitry Andric } else {
30010b57cec5SDimitry Andric // Need to reorder.
30028bcb0991SDimitry Andric TreeEntry *TE =
30038bcb0991SDimitry Andric newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
3004af732203SDimitry Andric ReuseShuffleIndicies, CurrentOrder);
30058bcb0991SDimitry Andric TE->setOperandsInOrder();
30060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
3007af732203SDimitry Andric findRootOrder(CurrentOrder);
3008af732203SDimitry Andric ++NumOpsWantToKeepOrder[CurrentOrder];
30090b57cec5SDimitry Andric }
30100b57cec5SDimitry Andric return;
30110b57cec5SDimitry Andric }
30125f7ddb14SDimitry Andric Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
30135f7ddb14SDimitry Andric for (Value *V : VL)
30145f7ddb14SDimitry Andric CommonAlignment =
30155f7ddb14SDimitry Andric commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
30165f7ddb14SDimitry Andric if (TTI->isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
30175f7ddb14SDimitry Andric CommonAlignment)) {
3018af732203SDimitry Andric // Vectorizing non-consecutive loads with `llvm.masked.gather`.
30195f7ddb14SDimitry Andric TreeEntry *TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle,
30205f7ddb14SDimitry Andric S, UserTreeIdx, ReuseShuffleIndicies);
3021af732203SDimitry Andric TE->setOperandsInOrder();
3022af732203SDimitry Andric buildTree_rec(PointerOps, Depth + 1, {TE, 0});
30235f7ddb14SDimitry Andric LLVM_DEBUG(dbgs()
30245f7ddb14SDimitry Andric << "SLP: added a vector of non-consecutive loads.\n");
3025af732203SDimitry Andric return;
30260b57cec5SDimitry Andric }
30275f7ddb14SDimitry Andric }
30280b57cec5SDimitry Andric
30290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
30300b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
30318bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
30328bcb0991SDimitry Andric ReuseShuffleIndicies);
30330b57cec5SDimitry Andric return;
30340b57cec5SDimitry Andric }
30350b57cec5SDimitry Andric case Instruction::ZExt:
30360b57cec5SDimitry Andric case Instruction::SExt:
30370b57cec5SDimitry Andric case Instruction::FPToUI:
30380b57cec5SDimitry Andric case Instruction::FPToSI:
30390b57cec5SDimitry Andric case Instruction::FPExt:
30400b57cec5SDimitry Andric case Instruction::PtrToInt:
30410b57cec5SDimitry Andric case Instruction::IntToPtr:
30420b57cec5SDimitry Andric case Instruction::SIToFP:
30430b57cec5SDimitry Andric case Instruction::UIToFP:
30440b57cec5SDimitry Andric case Instruction::Trunc:
30450b57cec5SDimitry Andric case Instruction::FPTrunc:
30460b57cec5SDimitry Andric case Instruction::BitCast: {
30470b57cec5SDimitry Andric Type *SrcTy = VL0->getOperand(0)->getType();
30488bcb0991SDimitry Andric for (Value *V : VL) {
30498bcb0991SDimitry Andric Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
30500b57cec5SDimitry Andric if (Ty != SrcTy || !isValidElementType(Ty)) {
30510b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
30528bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
30538bcb0991SDimitry Andric ReuseShuffleIndicies);
30540b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
30550b57cec5SDimitry Andric << "SLP: Gathering casts with different src types.\n");
30560b57cec5SDimitry Andric return;
30570b57cec5SDimitry Andric }
30580b57cec5SDimitry Andric }
30598bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
30608bcb0991SDimitry Andric ReuseShuffleIndicies);
30610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
30620b57cec5SDimitry Andric
30638bcb0991SDimitry Andric TE->setOperandsInOrder();
30640b57cec5SDimitry Andric for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
30650b57cec5SDimitry Andric ValueList Operands;
30660b57cec5SDimitry Andric // Prepare the operand vector.
30678bcb0991SDimitry Andric for (Value *V : VL)
30688bcb0991SDimitry Andric Operands.push_back(cast<Instruction>(V)->getOperand(i));
30690b57cec5SDimitry Andric
30700b57cec5SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, i});
30710b57cec5SDimitry Andric }
30720b57cec5SDimitry Andric return;
30730b57cec5SDimitry Andric }
30740b57cec5SDimitry Andric case Instruction::ICmp:
30750b57cec5SDimitry Andric case Instruction::FCmp: {
30760b57cec5SDimitry Andric // Check that all of the compares have the same predicate.
30770b57cec5SDimitry Andric CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
30780b57cec5SDimitry Andric CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
30790b57cec5SDimitry Andric Type *ComparedTy = VL0->getOperand(0)->getType();
30808bcb0991SDimitry Andric for (Value *V : VL) {
30818bcb0991SDimitry Andric CmpInst *Cmp = cast<CmpInst>(V);
30820b57cec5SDimitry Andric if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
30830b57cec5SDimitry Andric Cmp->getOperand(0)->getType() != ComparedTy) {
30840b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
30858bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
30868bcb0991SDimitry Andric ReuseShuffleIndicies);
30870b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
30880b57cec5SDimitry Andric << "SLP: Gathering cmp with different predicate.\n");
30890b57cec5SDimitry Andric return;
30900b57cec5SDimitry Andric }
30910b57cec5SDimitry Andric }
30920b57cec5SDimitry Andric
30938bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
30948bcb0991SDimitry Andric ReuseShuffleIndicies);
30950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
30960b57cec5SDimitry Andric
30970b57cec5SDimitry Andric ValueList Left, Right;
30980b57cec5SDimitry Andric if (cast<CmpInst>(VL0)->isCommutative()) {
30990b57cec5SDimitry Andric // Commutative predicate - collect + sort operands of the instructions
31000b57cec5SDimitry Andric // so that each side is more likely to have the same opcode.
31010b57cec5SDimitry Andric assert(P0 == SwapP0 && "Commutative Predicate mismatch");
3102480093f4SDimitry Andric reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
31030b57cec5SDimitry Andric } else {
31040b57cec5SDimitry Andric // Collect operands - commute if it uses the swapped predicate.
31050b57cec5SDimitry Andric for (Value *V : VL) {
31060b57cec5SDimitry Andric auto *Cmp = cast<CmpInst>(V);
31070b57cec5SDimitry Andric Value *LHS = Cmp->getOperand(0);
31080b57cec5SDimitry Andric Value *RHS = Cmp->getOperand(1);
31090b57cec5SDimitry Andric if (Cmp->getPredicate() != P0)
31100b57cec5SDimitry Andric std::swap(LHS, RHS);
31110b57cec5SDimitry Andric Left.push_back(LHS);
31120b57cec5SDimitry Andric Right.push_back(RHS);
31130b57cec5SDimitry Andric }
31140b57cec5SDimitry Andric }
31158bcb0991SDimitry Andric TE->setOperand(0, Left);
31168bcb0991SDimitry Andric TE->setOperand(1, Right);
31170b57cec5SDimitry Andric buildTree_rec(Left, Depth + 1, {TE, 0});
31180b57cec5SDimitry Andric buildTree_rec(Right, Depth + 1, {TE, 1});
31190b57cec5SDimitry Andric return;
31200b57cec5SDimitry Andric }
31210b57cec5SDimitry Andric case Instruction::Select:
31220b57cec5SDimitry Andric case Instruction::FNeg:
31230b57cec5SDimitry Andric case Instruction::Add:
31240b57cec5SDimitry Andric case Instruction::FAdd:
31250b57cec5SDimitry Andric case Instruction::Sub:
31260b57cec5SDimitry Andric case Instruction::FSub:
31270b57cec5SDimitry Andric case Instruction::Mul:
31280b57cec5SDimitry Andric case Instruction::FMul:
31290b57cec5SDimitry Andric case Instruction::UDiv:
31300b57cec5SDimitry Andric case Instruction::SDiv:
31310b57cec5SDimitry Andric case Instruction::FDiv:
31320b57cec5SDimitry Andric case Instruction::URem:
31330b57cec5SDimitry Andric case Instruction::SRem:
31340b57cec5SDimitry Andric case Instruction::FRem:
31350b57cec5SDimitry Andric case Instruction::Shl:
31360b57cec5SDimitry Andric case Instruction::LShr:
31370b57cec5SDimitry Andric case Instruction::AShr:
31380b57cec5SDimitry Andric case Instruction::And:
31390b57cec5SDimitry Andric case Instruction::Or:
31400b57cec5SDimitry Andric case Instruction::Xor: {
31418bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
31428bcb0991SDimitry Andric ReuseShuffleIndicies);
31430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
31440b57cec5SDimitry Andric
31450b57cec5SDimitry Andric // Sort operands of the instructions so that each side is more likely to
31460b57cec5SDimitry Andric // have the same opcode.
31470b57cec5SDimitry Andric if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
31480b57cec5SDimitry Andric ValueList Left, Right;
3149480093f4SDimitry Andric reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
31508bcb0991SDimitry Andric TE->setOperand(0, Left);
31518bcb0991SDimitry Andric TE->setOperand(1, Right);
31520b57cec5SDimitry Andric buildTree_rec(Left, Depth + 1, {TE, 0});
31530b57cec5SDimitry Andric buildTree_rec(Right, Depth + 1, {TE, 1});
31540b57cec5SDimitry Andric return;
31550b57cec5SDimitry Andric }
31560b57cec5SDimitry Andric
31578bcb0991SDimitry Andric TE->setOperandsInOrder();
31580b57cec5SDimitry Andric for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
31590b57cec5SDimitry Andric ValueList Operands;
31600b57cec5SDimitry Andric // Prepare the operand vector.
3161af732203SDimitry Andric for (Value *V : VL)
3162af732203SDimitry Andric Operands.push_back(cast<Instruction>(V)->getOperand(i));
31630b57cec5SDimitry Andric
31640b57cec5SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, i});
31650b57cec5SDimitry Andric }
31660b57cec5SDimitry Andric return;
31670b57cec5SDimitry Andric }
31680b57cec5SDimitry Andric case Instruction::GetElementPtr: {
31690b57cec5SDimitry Andric // We don't combine GEPs with complicated (nested) indexing.
31708bcb0991SDimitry Andric for (Value *V : VL) {
31718bcb0991SDimitry Andric if (cast<Instruction>(V)->getNumOperands() != 2) {
31720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
31730b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
31748bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
31758bcb0991SDimitry Andric ReuseShuffleIndicies);
31760b57cec5SDimitry Andric return;
31770b57cec5SDimitry Andric }
31780b57cec5SDimitry Andric }
31790b57cec5SDimitry Andric
31800b57cec5SDimitry Andric // We can't combine several GEPs into one vector if they operate on
31810b57cec5SDimitry Andric // different types.
31820b57cec5SDimitry Andric Type *Ty0 = VL0->getOperand(0)->getType();
31838bcb0991SDimitry Andric for (Value *V : VL) {
31848bcb0991SDimitry Andric Type *CurTy = cast<Instruction>(V)->getOperand(0)->getType();
31850b57cec5SDimitry Andric if (Ty0 != CurTy) {
31860b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
31870b57cec5SDimitry Andric << "SLP: not-vectorizable GEP (different types).\n");
31880b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
31898bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
31908bcb0991SDimitry Andric ReuseShuffleIndicies);
31910b57cec5SDimitry Andric return;
31920b57cec5SDimitry Andric }
31930b57cec5SDimitry Andric }
31940b57cec5SDimitry Andric
31950b57cec5SDimitry Andric // We don't combine GEPs with non-constant indexes.
3196480093f4SDimitry Andric Type *Ty1 = VL0->getOperand(1)->getType();
31978bcb0991SDimitry Andric for (Value *V : VL) {
31988bcb0991SDimitry Andric auto Op = cast<Instruction>(V)->getOperand(1);
3199480093f4SDimitry Andric if (!isa<ConstantInt>(Op) ||
3200480093f4SDimitry Andric (Op->getType() != Ty1 &&
3201480093f4SDimitry Andric Op->getType()->getScalarSizeInBits() >
3202480093f4SDimitry Andric DL->getIndexSizeInBits(
3203480093f4SDimitry Andric V->getType()->getPointerAddressSpace()))) {
32040b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
32050b57cec5SDimitry Andric << "SLP: not-vectorizable GEP (non-constant indexes).\n");
32060b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
32078bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
32088bcb0991SDimitry Andric ReuseShuffleIndicies);
32090b57cec5SDimitry Andric return;
32100b57cec5SDimitry Andric }
32110b57cec5SDimitry Andric }
32120b57cec5SDimitry Andric
32138bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
32148bcb0991SDimitry Andric ReuseShuffleIndicies);
32150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
32168bcb0991SDimitry Andric TE->setOperandsInOrder();
32170b57cec5SDimitry Andric for (unsigned i = 0, e = 2; i < e; ++i) {
32180b57cec5SDimitry Andric ValueList Operands;
32190b57cec5SDimitry Andric // Prepare the operand vector.
32208bcb0991SDimitry Andric for (Value *V : VL)
32218bcb0991SDimitry Andric Operands.push_back(cast<Instruction>(V)->getOperand(i));
32220b57cec5SDimitry Andric
32230b57cec5SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, i});
32240b57cec5SDimitry Andric }
32250b57cec5SDimitry Andric return;
32260b57cec5SDimitry Andric }
32270b57cec5SDimitry Andric case Instruction::Store: {
32288bcb0991SDimitry Andric // Check if the stores are consecutive or if we need to swizzle them.
3229480093f4SDimitry Andric llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
3230af732203SDimitry Andric // Avoid types that are padded when being allocated as scalars, while
3231af732203SDimitry Andric // being packed together in a vector (such as i1).
3232af732203SDimitry Andric if (DL->getTypeSizeInBits(ScalarTy) !=
3233af732203SDimitry Andric DL->getTypeAllocSizeInBits(ScalarTy)) {
3234af732203SDimitry Andric BS.cancelScheduling(VL, VL0);
3235af732203SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
3236af732203SDimitry Andric ReuseShuffleIndicies);
3237af732203SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
3238af732203SDimitry Andric return;
3239af732203SDimitry Andric }
3240480093f4SDimitry Andric // Make sure all stores in the bundle are simple - we can't vectorize
3241480093f4SDimitry Andric // atomic or volatile stores.
3242480093f4SDimitry Andric SmallVector<Value *, 4> PointerOps(VL.size());
3243480093f4SDimitry Andric ValueList Operands(VL.size());
3244480093f4SDimitry Andric auto POIter = PointerOps.begin();
3245480093f4SDimitry Andric auto OIter = Operands.begin();
3246480093f4SDimitry Andric for (Value *V : VL) {
3247480093f4SDimitry Andric auto *SI = cast<StoreInst>(V);
3248480093f4SDimitry Andric if (!SI->isSimple()) {
3249480093f4SDimitry Andric BS.cancelScheduling(VL, VL0);
3250480093f4SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
3251480093f4SDimitry Andric ReuseShuffleIndicies);
3252480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
3253480093f4SDimitry Andric return;
3254480093f4SDimitry Andric }
3255480093f4SDimitry Andric *POIter = SI->getPointerOperand();
3256480093f4SDimitry Andric *OIter = SI->getValueOperand();
3257480093f4SDimitry Andric ++POIter;
3258480093f4SDimitry Andric ++OIter;
3259480093f4SDimitry Andric }
3260480093f4SDimitry Andric
3261480093f4SDimitry Andric OrdersType CurrentOrder;
3262480093f4SDimitry Andric // Check the order of pointer operands.
32635f7ddb14SDimitry Andric if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
3264480093f4SDimitry Andric Value *Ptr0;
3265480093f4SDimitry Andric Value *PtrN;
3266480093f4SDimitry Andric if (CurrentOrder.empty()) {
3267480093f4SDimitry Andric Ptr0 = PointerOps.front();
3268480093f4SDimitry Andric PtrN = PointerOps.back();
3269480093f4SDimitry Andric } else {
3270480093f4SDimitry Andric Ptr0 = PointerOps[CurrentOrder.front()];
3271480093f4SDimitry Andric PtrN = PointerOps[CurrentOrder.back()];
3272480093f4SDimitry Andric }
32735f7ddb14SDimitry Andric Optional<int> Dist =
32745f7ddb14SDimitry Andric getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
3275480093f4SDimitry Andric // Check that the sorted pointer operands are consecutive.
32765f7ddb14SDimitry Andric if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
3277480093f4SDimitry Andric if (CurrentOrder.empty()) {
3278480093f4SDimitry Andric // Original stores are consecutive and does not require reordering.
3279480093f4SDimitry Andric ++NumOpsWantToKeepOriginalOrder;
3280480093f4SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
3281480093f4SDimitry Andric UserTreeIdx, ReuseShuffleIndicies);
3282480093f4SDimitry Andric TE->setOperandsInOrder();
3283480093f4SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, 0});
3284480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
3285480093f4SDimitry Andric } else {
3286480093f4SDimitry Andric TreeEntry *TE =
3287480093f4SDimitry Andric newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
3288af732203SDimitry Andric ReuseShuffleIndicies, CurrentOrder);
3289480093f4SDimitry Andric TE->setOperandsInOrder();
3290480093f4SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, 0});
3291480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
3292af732203SDimitry Andric findRootOrder(CurrentOrder);
3293af732203SDimitry Andric ++NumOpsWantToKeepOrder[CurrentOrder];
3294480093f4SDimitry Andric }
3295480093f4SDimitry Andric return;
3296480093f4SDimitry Andric }
3297480093f4SDimitry Andric }
3298480093f4SDimitry Andric
32990b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
33008bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33018bcb0991SDimitry Andric ReuseShuffleIndicies);
33020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
33030b57cec5SDimitry Andric return;
33040b57cec5SDimitry Andric }
33050b57cec5SDimitry Andric case Instruction::Call: {
33065ffd83dbSDimitry Andric // Check if the calls are all to the same vectorizable intrinsic or
33075ffd83dbSDimitry Andric // library function.
33080b57cec5SDimitry Andric CallInst *CI = cast<CallInst>(VL0);
33090b57cec5SDimitry Andric Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
33105ffd83dbSDimitry Andric
33115ffd83dbSDimitry Andric VFShape Shape = VFShape::get(
3312af732203SDimitry Andric *CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
33135ffd83dbSDimitry Andric false /*HasGlobalPred*/);
33145ffd83dbSDimitry Andric Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
33155ffd83dbSDimitry Andric
33165ffd83dbSDimitry Andric if (!VecFunc && !isTriviallyVectorizable(ID)) {
33170b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
33188bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33198bcb0991SDimitry Andric ReuseShuffleIndicies);
33200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
33210b57cec5SDimitry Andric return;
33220b57cec5SDimitry Andric }
33235ffd83dbSDimitry Andric Function *F = CI->getCalledFunction();
33240b57cec5SDimitry Andric unsigned NumArgs = CI->getNumArgOperands();
33250b57cec5SDimitry Andric SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
33260b57cec5SDimitry Andric for (unsigned j = 0; j != NumArgs; ++j)
33270b57cec5SDimitry Andric if (hasVectorInstrinsicScalarOpd(ID, j))
33280b57cec5SDimitry Andric ScalarArgs[j] = CI->getArgOperand(j);
33298bcb0991SDimitry Andric for (Value *V : VL) {
33308bcb0991SDimitry Andric CallInst *CI2 = dyn_cast<CallInst>(V);
33315ffd83dbSDimitry Andric if (!CI2 || CI2->getCalledFunction() != F ||
33320b57cec5SDimitry Andric getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
33335ffd83dbSDimitry Andric (VecFunc &&
33345ffd83dbSDimitry Andric VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
33350b57cec5SDimitry Andric !CI->hasIdenticalOperandBundleSchema(*CI2)) {
33360b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
33378bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33388bcb0991SDimitry Andric ReuseShuffleIndicies);
33398bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V
33400b57cec5SDimitry Andric << "\n");
33410b57cec5SDimitry Andric return;
33420b57cec5SDimitry Andric }
33430b57cec5SDimitry Andric // Some intrinsics have scalar arguments and should be same in order for
33440b57cec5SDimitry Andric // them to be vectorized.
33450b57cec5SDimitry Andric for (unsigned j = 0; j != NumArgs; ++j) {
33460b57cec5SDimitry Andric if (hasVectorInstrinsicScalarOpd(ID, j)) {
33470b57cec5SDimitry Andric Value *A1J = CI2->getArgOperand(j);
33480b57cec5SDimitry Andric if (ScalarArgs[j] != A1J) {
33490b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
33508bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33518bcb0991SDimitry Andric ReuseShuffleIndicies);
33520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
33530b57cec5SDimitry Andric << " argument " << ScalarArgs[j] << "!=" << A1J
33540b57cec5SDimitry Andric << "\n");
33550b57cec5SDimitry Andric return;
33560b57cec5SDimitry Andric }
33570b57cec5SDimitry Andric }
33580b57cec5SDimitry Andric }
33590b57cec5SDimitry Andric // Verify that the bundle operands are identical between the two calls.
33600b57cec5SDimitry Andric if (CI->hasOperandBundles() &&
33610b57cec5SDimitry Andric !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
33620b57cec5SDimitry Andric CI->op_begin() + CI->getBundleOperandsEndIndex(),
33630b57cec5SDimitry Andric CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
33640b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
33658bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33668bcb0991SDimitry Andric ReuseShuffleIndicies);
33670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
33688bcb0991SDimitry Andric << *CI << "!=" << *V << '\n');
33690b57cec5SDimitry Andric return;
33700b57cec5SDimitry Andric }
33710b57cec5SDimitry Andric }
33720b57cec5SDimitry Andric
33738bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
33748bcb0991SDimitry Andric ReuseShuffleIndicies);
33758bcb0991SDimitry Andric TE->setOperandsInOrder();
33760b57cec5SDimitry Andric for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
33770b57cec5SDimitry Andric ValueList Operands;
33780b57cec5SDimitry Andric // Prepare the operand vector.
33798bcb0991SDimitry Andric for (Value *V : VL) {
33808bcb0991SDimitry Andric auto *CI2 = cast<CallInst>(V);
33810b57cec5SDimitry Andric Operands.push_back(CI2->getArgOperand(i));
33820b57cec5SDimitry Andric }
33830b57cec5SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, i});
33840b57cec5SDimitry Andric }
33850b57cec5SDimitry Andric return;
33860b57cec5SDimitry Andric }
33870b57cec5SDimitry Andric case Instruction::ShuffleVector: {
33880b57cec5SDimitry Andric // If this is not an alternate sequence of opcode like add-sub
33890b57cec5SDimitry Andric // then do not vectorize this instruction.
33900b57cec5SDimitry Andric if (!S.isAltShuffle()) {
33910b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
33928bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
33938bcb0991SDimitry Andric ReuseShuffleIndicies);
33940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
33950b57cec5SDimitry Andric return;
33960b57cec5SDimitry Andric }
33978bcb0991SDimitry Andric TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
33988bcb0991SDimitry Andric ReuseShuffleIndicies);
33990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
34000b57cec5SDimitry Andric
34010b57cec5SDimitry Andric // Reorder operands if reordering would enable vectorization.
34020b57cec5SDimitry Andric if (isa<BinaryOperator>(VL0)) {
34030b57cec5SDimitry Andric ValueList Left, Right;
3404480093f4SDimitry Andric reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
34058bcb0991SDimitry Andric TE->setOperand(0, Left);
34068bcb0991SDimitry Andric TE->setOperand(1, Right);
34070b57cec5SDimitry Andric buildTree_rec(Left, Depth + 1, {TE, 0});
34080b57cec5SDimitry Andric buildTree_rec(Right, Depth + 1, {TE, 1});
34090b57cec5SDimitry Andric return;
34100b57cec5SDimitry Andric }
34110b57cec5SDimitry Andric
34128bcb0991SDimitry Andric TE->setOperandsInOrder();
34130b57cec5SDimitry Andric for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
34140b57cec5SDimitry Andric ValueList Operands;
34150b57cec5SDimitry Andric // Prepare the operand vector.
34168bcb0991SDimitry Andric for (Value *V : VL)
34178bcb0991SDimitry Andric Operands.push_back(cast<Instruction>(V)->getOperand(i));
34180b57cec5SDimitry Andric
34190b57cec5SDimitry Andric buildTree_rec(Operands, Depth + 1, {TE, i});
34200b57cec5SDimitry Andric }
34210b57cec5SDimitry Andric return;
34220b57cec5SDimitry Andric }
34230b57cec5SDimitry Andric default:
34240b57cec5SDimitry Andric BS.cancelScheduling(VL, VL0);
34258bcb0991SDimitry Andric newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
34268bcb0991SDimitry Andric ReuseShuffleIndicies);
34270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
34280b57cec5SDimitry Andric return;
34290b57cec5SDimitry Andric }
34300b57cec5SDimitry Andric }
34310b57cec5SDimitry Andric
canMapToVector(Type * T,const DataLayout & DL) const34320b57cec5SDimitry Andric unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
3433480093f4SDimitry Andric unsigned N = 1;
3434480093f4SDimitry Andric Type *EltTy = T;
3435480093f4SDimitry Andric
34365ffd83dbSDimitry Andric while (isa<StructType>(EltTy) || isa<ArrayType>(EltTy) ||
34375ffd83dbSDimitry Andric isa<VectorType>(EltTy)) {
3438480093f4SDimitry Andric if (auto *ST = dyn_cast<StructType>(EltTy)) {
3439480093f4SDimitry Andric // Check that struct is homogeneous.
3440480093f4SDimitry Andric for (const auto *Ty : ST->elements())
3441480093f4SDimitry Andric if (Ty != *ST->element_begin())
3442480093f4SDimitry Andric return 0;
3443480093f4SDimitry Andric N *= ST->getNumElements();
34440b57cec5SDimitry Andric EltTy = *ST->element_begin();
34455ffd83dbSDimitry Andric } else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {
34465ffd83dbSDimitry Andric N *= AT->getNumElements();
34475ffd83dbSDimitry Andric EltTy = AT->getElementType();
34480b57cec5SDimitry Andric } else {
3449af732203SDimitry Andric auto *VT = cast<FixedVectorType>(EltTy);
34505ffd83dbSDimitry Andric N *= VT->getNumElements();
34515ffd83dbSDimitry Andric EltTy = VT->getElementType();
34520b57cec5SDimitry Andric }
3453480093f4SDimitry Andric }
3454480093f4SDimitry Andric
34550b57cec5SDimitry Andric if (!isValidElementType(EltTy))
34560b57cec5SDimitry Andric return 0;
34575ffd83dbSDimitry Andric uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
34580b57cec5SDimitry Andric if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
34590b57cec5SDimitry Andric return 0;
34600b57cec5SDimitry Andric return N;
34610b57cec5SDimitry Andric }
34620b57cec5SDimitry Andric
canReuseExtract(ArrayRef<Value * > VL,Value * OpValue,SmallVectorImpl<unsigned> & CurrentOrder) const34630b57cec5SDimitry Andric bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
34640b57cec5SDimitry Andric SmallVectorImpl<unsigned> &CurrentOrder) const {
34650b57cec5SDimitry Andric Instruction *E0 = cast<Instruction>(OpValue);
34660b57cec5SDimitry Andric assert(E0->getOpcode() == Instruction::ExtractElement ||
34670b57cec5SDimitry Andric E0->getOpcode() == Instruction::ExtractValue);
34680b57cec5SDimitry Andric assert(E0->getOpcode() == getSameOpcode(VL).getOpcode() && "Invalid opcode");
34690b57cec5SDimitry Andric // Check if all of the extracts come from the same vector and from the
34700b57cec5SDimitry Andric // correct offset.
34710b57cec5SDimitry Andric Value *Vec = E0->getOperand(0);
34720b57cec5SDimitry Andric
34730b57cec5SDimitry Andric CurrentOrder.clear();
34740b57cec5SDimitry Andric
34750b57cec5SDimitry Andric // We have to extract from a vector/aggregate with the same number of elements.
34760b57cec5SDimitry Andric unsigned NElts;
34770b57cec5SDimitry Andric if (E0->getOpcode() == Instruction::ExtractValue) {
34780b57cec5SDimitry Andric const DataLayout &DL = E0->getModule()->getDataLayout();
34790b57cec5SDimitry Andric NElts = canMapToVector(Vec->getType(), DL);
34800b57cec5SDimitry Andric if (!NElts)
34810b57cec5SDimitry Andric return false;
34820b57cec5SDimitry Andric // Check if load can be rewritten as load of vector.
34830b57cec5SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(Vec);
34840b57cec5SDimitry Andric if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size()))
34850b57cec5SDimitry Andric return false;
34860b57cec5SDimitry Andric } else {
3487af732203SDimitry Andric NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
34880b57cec5SDimitry Andric }
34890b57cec5SDimitry Andric
34900b57cec5SDimitry Andric if (NElts != VL.size())
34910b57cec5SDimitry Andric return false;
34920b57cec5SDimitry Andric
34930b57cec5SDimitry Andric // Check that all of the indices extract from the correct offset.
34940b57cec5SDimitry Andric bool ShouldKeepOrder = true;
34950b57cec5SDimitry Andric unsigned E = VL.size();
34960b57cec5SDimitry Andric // Assign to all items the initial value E + 1 so we can check if the extract
34970b57cec5SDimitry Andric // instruction index was used already.
34980b57cec5SDimitry Andric // Also, later we can check that all the indices are used and we have a
34990b57cec5SDimitry Andric // consecutive access in the extract instructions, by checking that no
35000b57cec5SDimitry Andric // element of CurrentOrder still has value E + 1.
35010b57cec5SDimitry Andric CurrentOrder.assign(E, E + 1);
35020b57cec5SDimitry Andric unsigned I = 0;
35030b57cec5SDimitry Andric for (; I < E; ++I) {
35040b57cec5SDimitry Andric auto *Inst = cast<Instruction>(VL[I]);
35050b57cec5SDimitry Andric if (Inst->getOperand(0) != Vec)
35060b57cec5SDimitry Andric break;
35070b57cec5SDimitry Andric Optional<unsigned> Idx = getExtractIndex(Inst);
35080b57cec5SDimitry Andric if (!Idx)
35090b57cec5SDimitry Andric break;
35100b57cec5SDimitry Andric const unsigned ExtIdx = *Idx;
35110b57cec5SDimitry Andric if (ExtIdx != I) {
35120b57cec5SDimitry Andric if (ExtIdx >= E || CurrentOrder[ExtIdx] != E + 1)
35130b57cec5SDimitry Andric break;
35140b57cec5SDimitry Andric ShouldKeepOrder = false;
35150b57cec5SDimitry Andric CurrentOrder[ExtIdx] = I;
35160b57cec5SDimitry Andric } else {
35170b57cec5SDimitry Andric if (CurrentOrder[I] != E + 1)
35180b57cec5SDimitry Andric break;
35190b57cec5SDimitry Andric CurrentOrder[I] = I;
35200b57cec5SDimitry Andric }
35210b57cec5SDimitry Andric }
35220b57cec5SDimitry Andric if (I < E) {
35230b57cec5SDimitry Andric CurrentOrder.clear();
35240b57cec5SDimitry Andric return false;
35250b57cec5SDimitry Andric }
35260b57cec5SDimitry Andric
35270b57cec5SDimitry Andric return ShouldKeepOrder;
35280b57cec5SDimitry Andric }
35290b57cec5SDimitry Andric
areAllUsersVectorized(Instruction * I,ArrayRef<Value * > VectorizedVals) const35305f7ddb14SDimitry Andric bool BoUpSLP::areAllUsersVectorized(Instruction *I,
35315f7ddb14SDimitry Andric ArrayRef<Value *> VectorizedVals) const {
35325f7ddb14SDimitry Andric return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
35335f7ddb14SDimitry Andric llvm::all_of(I->users(), [this](User *U) {
35340b57cec5SDimitry Andric return ScalarToTreeEntry.count(U) > 0;
35350b57cec5SDimitry Andric });
35360b57cec5SDimitry Andric }
35370b57cec5SDimitry Andric
3538af732203SDimitry Andric static std::pair<InstructionCost, InstructionCost>
getVectorCallCosts(CallInst * CI,FixedVectorType * VecTy,TargetTransformInfo * TTI,TargetLibraryInfo * TLI)3539af732203SDimitry Andric getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
3540af732203SDimitry Andric TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
35415ffd83dbSDimitry Andric Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
35425ffd83dbSDimitry Andric
35435ffd83dbSDimitry Andric // Calculate the cost of the scalar and vector calls.
35445f7ddb14SDimitry Andric SmallVector<Type *, 4> VecTys;
35455f7ddb14SDimitry Andric for (Use &Arg : CI->args())
35465f7ddb14SDimitry Andric VecTys.push_back(
35475f7ddb14SDimitry Andric FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
35485f7ddb14SDimitry Andric FastMathFlags FMF;
35495f7ddb14SDimitry Andric if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
35505f7ddb14SDimitry Andric FMF = FPCI->getFastMathFlags();
35515f7ddb14SDimitry Andric SmallVector<const Value *> Arguments(CI->arg_begin(), CI->arg_end());
35525f7ddb14SDimitry Andric IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
35535f7ddb14SDimitry Andric dyn_cast<IntrinsicInst>(CI));
3554af732203SDimitry Andric auto IntrinsicCost =
35555ffd83dbSDimitry Andric TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
35565ffd83dbSDimitry Andric
3557af732203SDimitry Andric auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
3558af732203SDimitry Andric VecTy->getNumElements())),
35595ffd83dbSDimitry Andric false /*HasGlobalPred*/);
35605ffd83dbSDimitry Andric Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
3561af732203SDimitry Andric auto LibCost = IntrinsicCost;
35625ffd83dbSDimitry Andric if (!CI->isNoBuiltin() && VecFunc) {
35635ffd83dbSDimitry Andric // Calculate the cost of the vector library call.
35645ffd83dbSDimitry Andric // If the corresponding vector call is cheaper, return its cost.
35655ffd83dbSDimitry Andric LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
35665ffd83dbSDimitry Andric TTI::TCK_RecipThroughput);
35675ffd83dbSDimitry Andric }
35685ffd83dbSDimitry Andric return {IntrinsicCost, LibCost};
35695ffd83dbSDimitry Andric }
35705ffd83dbSDimitry Andric
35715f7ddb14SDimitry Andric /// Compute the cost of creating a vector of type \p VecTy containing the
35725f7ddb14SDimitry Andric /// extracted values from \p VL.
35735f7ddb14SDimitry Andric static InstructionCost
computeExtractCost(ArrayRef<Value * > VL,FixedVectorType * VecTy,TargetTransformInfo::ShuffleKind ShuffleKind,ArrayRef<int> Mask,TargetTransformInfo & TTI)35745f7ddb14SDimitry Andric computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
35755f7ddb14SDimitry Andric TargetTransformInfo::ShuffleKind ShuffleKind,
35765f7ddb14SDimitry Andric ArrayRef<int> Mask, TargetTransformInfo &TTI) {
35775f7ddb14SDimitry Andric unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
35785f7ddb14SDimitry Andric
35795f7ddb14SDimitry Andric if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || !NumOfParts ||
35805f7ddb14SDimitry Andric VecTy->getNumElements() < NumOfParts)
35815f7ddb14SDimitry Andric return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
35825f7ddb14SDimitry Andric
35835f7ddb14SDimitry Andric bool AllConsecutive = true;
35845f7ddb14SDimitry Andric unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
35855f7ddb14SDimitry Andric unsigned Idx = -1;
35865f7ddb14SDimitry Andric InstructionCost Cost = 0;
35875f7ddb14SDimitry Andric
35885f7ddb14SDimitry Andric // Process extracts in blocks of EltsPerVector to check if the source vector
35895f7ddb14SDimitry Andric // operand can be re-used directly. If not, add the cost of creating a shuffle
35905f7ddb14SDimitry Andric // to extract the values into a vector register.
35915f7ddb14SDimitry Andric for (auto *V : VL) {
35925f7ddb14SDimitry Andric ++Idx;
35935f7ddb14SDimitry Andric
35945f7ddb14SDimitry Andric // Reached the start of a new vector registers.
35955f7ddb14SDimitry Andric if (Idx % EltsPerVector == 0) {
35965f7ddb14SDimitry Andric AllConsecutive = true;
35975f7ddb14SDimitry Andric continue;
35985f7ddb14SDimitry Andric }
35995f7ddb14SDimitry Andric
36005f7ddb14SDimitry Andric // Check all extracts for a vector register on the target directly
36015f7ddb14SDimitry Andric // extract values in order.
36025f7ddb14SDimitry Andric unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
36035f7ddb14SDimitry Andric unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
36045f7ddb14SDimitry Andric AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
36055f7ddb14SDimitry Andric CurrentIdx % EltsPerVector == Idx % EltsPerVector;
36065f7ddb14SDimitry Andric
36075f7ddb14SDimitry Andric if (AllConsecutive)
36085f7ddb14SDimitry Andric continue;
36095f7ddb14SDimitry Andric
36105f7ddb14SDimitry Andric // Skip all indices, except for the last index per vector block.
36115f7ddb14SDimitry Andric if ((Idx + 1) % EltsPerVector != 0 && Idx + 1 != VL.size())
36125f7ddb14SDimitry Andric continue;
36135f7ddb14SDimitry Andric
36145f7ddb14SDimitry Andric // If we have a series of extracts which are not consecutive and hence
36155f7ddb14SDimitry Andric // cannot re-use the source vector register directly, compute the shuffle
36165f7ddb14SDimitry Andric // cost to extract the a vector with EltsPerVector elements.
36175f7ddb14SDimitry Andric Cost += TTI.getShuffleCost(
36185f7ddb14SDimitry Andric TargetTransformInfo::SK_PermuteSingleSrc,
36195f7ddb14SDimitry Andric FixedVectorType::get(VecTy->getElementType(), EltsPerVector));
36205f7ddb14SDimitry Andric }
36215f7ddb14SDimitry Andric return Cost;
36225f7ddb14SDimitry Andric }
36235f7ddb14SDimitry Andric
36245f7ddb14SDimitry Andric /// Shuffles \p Mask in accordance with the given \p SubMask.
addMask(SmallVectorImpl<int> & Mask,ArrayRef<int> SubMask)36255f7ddb14SDimitry Andric static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
36265f7ddb14SDimitry Andric if (SubMask.empty())
36275f7ddb14SDimitry Andric return;
36285f7ddb14SDimitry Andric if (Mask.empty()) {
36295f7ddb14SDimitry Andric Mask.append(SubMask.begin(), SubMask.end());
36305f7ddb14SDimitry Andric return;
36315f7ddb14SDimitry Andric }
36325f7ddb14SDimitry Andric SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
36335f7ddb14SDimitry Andric int TermValue = std::min(Mask.size(), SubMask.size());
36345f7ddb14SDimitry Andric for (int I = 0, E = SubMask.size(); I < E; ++I) {
36355f7ddb14SDimitry Andric if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
36365f7ddb14SDimitry Andric Mask[SubMask[I]] >= TermValue) {
36375f7ddb14SDimitry Andric NewMask[I] = UndefMaskElem;
36385f7ddb14SDimitry Andric continue;
36395f7ddb14SDimitry Andric }
36405f7ddb14SDimitry Andric NewMask[I] = Mask[SubMask[I]];
36415f7ddb14SDimitry Andric }
36425f7ddb14SDimitry Andric Mask.swap(NewMask);
36435f7ddb14SDimitry Andric }
36445f7ddb14SDimitry Andric
getEntryCost(const TreeEntry * E,ArrayRef<Value * > VectorizedVals)36455f7ddb14SDimitry Andric InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
36465f7ddb14SDimitry Andric ArrayRef<Value *> VectorizedVals) {
36470b57cec5SDimitry Andric ArrayRef<Value*> VL = E->Scalars;
36480b57cec5SDimitry Andric
36490b57cec5SDimitry Andric Type *ScalarTy = VL[0]->getType();
36500b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
36510b57cec5SDimitry Andric ScalarTy = SI->getValueOperand()->getType();
36520b57cec5SDimitry Andric else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
36530b57cec5SDimitry Andric ScalarTy = CI->getOperand(0)->getType();
36545f7ddb14SDimitry Andric else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
36555f7ddb14SDimitry Andric ScalarTy = IE->getOperand(1)->getType();
36565ffd83dbSDimitry Andric auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
36575ffd83dbSDimitry Andric TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
36580b57cec5SDimitry Andric
36590b57cec5SDimitry Andric // If we have computed a smaller type for the expression, update VecTy so
36600b57cec5SDimitry Andric // that the costs will be accurate.
36610b57cec5SDimitry Andric if (MinBWs.count(VL[0]))
36625ffd83dbSDimitry Andric VecTy = FixedVectorType::get(
36630b57cec5SDimitry Andric IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
36645f7ddb14SDimitry Andric auto *FinalVecTy = VecTy;
36650b57cec5SDimitry Andric
36660b57cec5SDimitry Andric unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
36670b57cec5SDimitry Andric bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
36685f7ddb14SDimitry Andric if (NeedToShuffleReuses)
36695f7ddb14SDimitry Andric FinalVecTy =
36705f7ddb14SDimitry Andric FixedVectorType::get(VecTy->getElementType(), ReuseShuffleNumbers);
36715f7ddb14SDimitry Andric // FIXME: it tries to fix a problem with MSVC buildbots.
36725f7ddb14SDimitry Andric TargetTransformInfo &TTIRef = *TTI;
36735f7ddb14SDimitry Andric auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
36745f7ddb14SDimitry Andric VectorizedVals](InstructionCost &Cost,
36755f7ddb14SDimitry Andric bool IsGather) {
36765f7ddb14SDimitry Andric DenseMap<Value *, int> ExtractVectorsTys;
36770b57cec5SDimitry Andric for (auto *V : VL) {
36780b57cec5SDimitry Andric // If all users of instruction are going to be vectorized and this
36790b57cec5SDimitry Andric // instruction itself is not going to be vectorized, consider this
36800b57cec5SDimitry Andric // instruction as dead and remove its cost from the final cost of the
36810b57cec5SDimitry Andric // vectorized tree.
36825f7ddb14SDimitry Andric if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
36835f7ddb14SDimitry Andric (IsGather && ScalarToTreeEntry.count(V)))
36845f7ddb14SDimitry Andric continue;
36855f7ddb14SDimitry Andric auto *EE = cast<ExtractElementInst>(V);
36865f7ddb14SDimitry Andric unsigned Idx = *getExtractIndex(EE);
36875f7ddb14SDimitry Andric if (TTIRef.getNumberOfParts(VecTy) !=
36885f7ddb14SDimitry Andric TTIRef.getNumberOfParts(EE->getVectorOperandType())) {
36895f7ddb14SDimitry Andric auto It =
36905f7ddb14SDimitry Andric ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
36915f7ddb14SDimitry Andric It->getSecond() = std::min<int>(It->second, Idx);
36925f7ddb14SDimitry Andric }
36935f7ddb14SDimitry Andric // Take credit for instruction that will become dead.
36945f7ddb14SDimitry Andric if (EE->hasOneUse()) {
36955f7ddb14SDimitry Andric Instruction *Ext = EE->user_back();
36965f7ddb14SDimitry Andric if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
36975f7ddb14SDimitry Andric all_of(Ext->users(),
36985f7ddb14SDimitry Andric [](User *U) { return isa<GetElementPtrInst>(U); })) {
36995f7ddb14SDimitry Andric // Use getExtractWithExtendCost() to calculate the cost of
37005f7ddb14SDimitry Andric // extractelement/ext pair.
37015f7ddb14SDimitry Andric Cost -=
37025f7ddb14SDimitry Andric TTIRef.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
37035f7ddb14SDimitry Andric EE->getVectorOperandType(), Idx);
37045f7ddb14SDimitry Andric // Add back the cost of s|zext which is subtracted separately.
37055f7ddb14SDimitry Andric Cost += TTIRef.getCastInstrCost(
37065f7ddb14SDimitry Andric Ext->getOpcode(), Ext->getType(), EE->getType(),
37075f7ddb14SDimitry Andric TTI::getCastContextHint(Ext), CostKind, Ext);
37085f7ddb14SDimitry Andric continue;
37090b57cec5SDimitry Andric }
37100b57cec5SDimitry Andric }
37115f7ddb14SDimitry Andric Cost -= TTIRef.getVectorInstrCost(Instruction::ExtractElement,
37125f7ddb14SDimitry Andric EE->getVectorOperandType(), Idx);
37135f7ddb14SDimitry Andric }
37145f7ddb14SDimitry Andric // Add a cost for subvector extracts/inserts if required.
37155f7ddb14SDimitry Andric for (const auto &Data : ExtractVectorsTys) {
37165f7ddb14SDimitry Andric auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
37175f7ddb14SDimitry Andric unsigned NumElts = VecTy->getNumElements();
37185f7ddb14SDimitry Andric if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
37195f7ddb14SDimitry Andric unsigned Idx = (Data.second / NumElts) * NumElts;
37205f7ddb14SDimitry Andric unsigned EENumElts = EEVTy->getNumElements();
37215f7ddb14SDimitry Andric if (Idx + NumElts <= EENumElts) {
37225f7ddb14SDimitry Andric Cost +=
37235f7ddb14SDimitry Andric TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
37245f7ddb14SDimitry Andric EEVTy, None, Idx, VecTy);
37255f7ddb14SDimitry Andric } else {
37265f7ddb14SDimitry Andric // Need to round up the subvector type vectorization factor to avoid a
37275f7ddb14SDimitry Andric // crash in cost model functions. Make SubVT so that Idx + VF of SubVT
37285f7ddb14SDimitry Andric // <= EENumElts.
37295f7ddb14SDimitry Andric auto *SubVT =
37305f7ddb14SDimitry Andric FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
37315f7ddb14SDimitry Andric Cost +=
37325f7ddb14SDimitry Andric TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
37335f7ddb14SDimitry Andric EEVTy, None, Idx, SubVT);
37345f7ddb14SDimitry Andric }
37355f7ddb14SDimitry Andric } else {
37365f7ddb14SDimitry Andric Cost += TTIRef.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
37375f7ddb14SDimitry Andric VecTy, None, 0, EEVTy);
37380b57cec5SDimitry Andric }
37390b57cec5SDimitry Andric }
37405f7ddb14SDimitry Andric };
37415f7ddb14SDimitry Andric if (E->State == TreeEntry::NeedToGather) {
37425f7ddb14SDimitry Andric if (allConstant(VL))
37435f7ddb14SDimitry Andric return 0;
37445f7ddb14SDimitry Andric if (isa<InsertElementInst>(VL[0]))
37455f7ddb14SDimitry Andric return InstructionCost::getInvalid();
37465f7ddb14SDimitry Andric SmallVector<int> Mask;
37475f7ddb14SDimitry Andric SmallVector<const TreeEntry *> Entries;
37485f7ddb14SDimitry Andric Optional<TargetTransformInfo::ShuffleKind> Shuffle =
37495f7ddb14SDimitry Andric isGatherShuffledEntry(E, Mask, Entries);
37505f7ddb14SDimitry Andric if (Shuffle.hasValue()) {
37515f7ddb14SDimitry Andric InstructionCost GatherCost = 0;
37525f7ddb14SDimitry Andric if (ShuffleVectorInst::isIdentityMask(Mask)) {
37535f7ddb14SDimitry Andric // Perfect match in the graph, will reuse the previously vectorized
37545f7ddb14SDimitry Andric // node. Cost is 0.
37555f7ddb14SDimitry Andric LLVM_DEBUG(
37565f7ddb14SDimitry Andric dbgs()
37575f7ddb14SDimitry Andric << "SLP: perfect diamond match for gather bundle that starts with "
37585f7ddb14SDimitry Andric << *VL.front() << ".\n");
37595f7ddb14SDimitry Andric if (NeedToShuffleReuses)
37605f7ddb14SDimitry Andric GatherCost =
37615f7ddb14SDimitry Andric TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
37625f7ddb14SDimitry Andric FinalVecTy, E->ReuseShuffleIndices);
37635f7ddb14SDimitry Andric } else {
37645f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
37655f7ddb14SDimitry Andric << " entries for bundle that starts with "
37665f7ddb14SDimitry Andric << *VL.front() << ".\n");
37675f7ddb14SDimitry Andric // Detected that instead of gather we can emit a shuffle of single/two
37685f7ddb14SDimitry Andric // previously vectorized nodes. Add the cost of the permutation rather
37695f7ddb14SDimitry Andric // than gather.
37705f7ddb14SDimitry Andric ::addMask(Mask, E->ReuseShuffleIndices);
37715f7ddb14SDimitry Andric GatherCost = TTI->getShuffleCost(*Shuffle, FinalVecTy, Mask);
37725f7ddb14SDimitry Andric }
37735f7ddb14SDimitry Andric return GatherCost;
37745f7ddb14SDimitry Andric }
37755f7ddb14SDimitry Andric if (isSplat(VL)) {
37765f7ddb14SDimitry Andric // Found the broadcasting of the single scalar, calculate the cost as the
37775f7ddb14SDimitry Andric // broadcast.
37785f7ddb14SDimitry Andric return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
37795f7ddb14SDimitry Andric }
37805f7ddb14SDimitry Andric if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
37815f7ddb14SDimitry Andric allSameBlock(VL) &&
37825f7ddb14SDimitry Andric !isa<ScalableVectorType>(
37835f7ddb14SDimitry Andric cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
37845f7ddb14SDimitry Andric // Check that gather of extractelements can be represented as just a
37855f7ddb14SDimitry Andric // shuffle of a single/two vectors the scalars are extracted from.
37865f7ddb14SDimitry Andric SmallVector<int> Mask;
37875f7ddb14SDimitry Andric Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
37885f7ddb14SDimitry Andric isShuffle(VL, Mask);
37895f7ddb14SDimitry Andric if (ShuffleKind.hasValue()) {
37905f7ddb14SDimitry Andric // Found the bunch of extractelement instructions that must be gathered
37915f7ddb14SDimitry Andric // into a vector and can be represented as a permutation elements in a
37925f7ddb14SDimitry Andric // single input vector or of 2 input vectors.
37935f7ddb14SDimitry Andric InstructionCost Cost =
37945f7ddb14SDimitry Andric computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
37955f7ddb14SDimitry Andric AdjustExtractsCost(Cost, /*IsGather=*/true);
37965f7ddb14SDimitry Andric if (NeedToShuffleReuses)
37975f7ddb14SDimitry Andric Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
37985f7ddb14SDimitry Andric FinalVecTy, E->ReuseShuffleIndices);
37995f7ddb14SDimitry Andric return Cost;
38005f7ddb14SDimitry Andric }
38015f7ddb14SDimitry Andric }
38025f7ddb14SDimitry Andric InstructionCost ReuseShuffleCost = 0;
38035f7ddb14SDimitry Andric if (NeedToShuffleReuses)
38045f7ddb14SDimitry Andric ReuseShuffleCost = TTI->getShuffleCost(
38055f7ddb14SDimitry Andric TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
38060b57cec5SDimitry Andric return ReuseShuffleCost + getGatherCost(VL);
38070b57cec5SDimitry Andric }
38085f7ddb14SDimitry Andric InstructionCost CommonCost = 0;
38095f7ddb14SDimitry Andric SmallVector<int> Mask;
38105f7ddb14SDimitry Andric if (!E->ReorderIndices.empty()) {
38115f7ddb14SDimitry Andric SmallVector<int> NewMask;
38125f7ddb14SDimitry Andric if (E->getOpcode() == Instruction::Store) {
38135f7ddb14SDimitry Andric // For stores the order is actually a mask.
38145f7ddb14SDimitry Andric NewMask.resize(E->ReorderIndices.size());
38155f7ddb14SDimitry Andric copy(E->ReorderIndices, NewMask.begin());
38165f7ddb14SDimitry Andric } else {
38175f7ddb14SDimitry Andric inversePermutation(E->ReorderIndices, NewMask);
38185f7ddb14SDimitry Andric }
38195f7ddb14SDimitry Andric ::addMask(Mask, NewMask);
38205f7ddb14SDimitry Andric }
38215f7ddb14SDimitry Andric if (NeedToShuffleReuses)
38225f7ddb14SDimitry Andric ::addMask(Mask, E->ReuseShuffleIndices);
38235f7ddb14SDimitry Andric if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))
38245f7ddb14SDimitry Andric CommonCost =
38255f7ddb14SDimitry Andric TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
3826af732203SDimitry Andric assert((E->State == TreeEntry::Vectorize ||
3827af732203SDimitry Andric E->State == TreeEntry::ScatterVectorize) &&
3828af732203SDimitry Andric "Unhandled state");
38298bcb0991SDimitry Andric assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
38308bcb0991SDimitry Andric Instruction *VL0 = E->getMainOp();
38318bcb0991SDimitry Andric unsigned ShuffleOrOp =
38328bcb0991SDimitry Andric E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
38330b57cec5SDimitry Andric switch (ShuffleOrOp) {
38340b57cec5SDimitry Andric case Instruction::PHI:
38350b57cec5SDimitry Andric return 0;
38360b57cec5SDimitry Andric
38370b57cec5SDimitry Andric case Instruction::ExtractValue:
38385ffd83dbSDimitry Andric case Instruction::ExtractElement: {
38395f7ddb14SDimitry Andric // The common cost of removal ExtractElement/ExtractValue instructions +
38405f7ddb14SDimitry Andric // the cost of shuffles, if required to resuffle the original vector.
38410b57cec5SDimitry Andric if (NeedToShuffleReuses) {
38420b57cec5SDimitry Andric unsigned Idx = 0;
38430b57cec5SDimitry Andric for (unsigned I : E->ReuseShuffleIndices) {
38440b57cec5SDimitry Andric if (ShuffleOrOp == Instruction::ExtractElement) {
38455f7ddb14SDimitry Andric auto *EE = cast<ExtractElementInst>(VL[I]);
38465f7ddb14SDimitry Andric CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
38475f7ddb14SDimitry Andric EE->getVectorOperandType(),
38485f7ddb14SDimitry Andric *getExtractIndex(EE));
38490b57cec5SDimitry Andric } else {
38505f7ddb14SDimitry Andric CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
38515f7ddb14SDimitry Andric VecTy, Idx);
38520b57cec5SDimitry Andric ++Idx;
38530b57cec5SDimitry Andric }
38540b57cec5SDimitry Andric }
38550b57cec5SDimitry Andric Idx = ReuseShuffleNumbers;
38560b57cec5SDimitry Andric for (Value *V : VL) {
38570b57cec5SDimitry Andric if (ShuffleOrOp == Instruction::ExtractElement) {
38585f7ddb14SDimitry Andric auto *EE = cast<ExtractElementInst>(V);
38595f7ddb14SDimitry Andric CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
38605f7ddb14SDimitry Andric EE->getVectorOperandType(),
38615f7ddb14SDimitry Andric *getExtractIndex(EE));
38620b57cec5SDimitry Andric } else {
38630b57cec5SDimitry Andric --Idx;
38645f7ddb14SDimitry Andric CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
38655f7ddb14SDimitry Andric VecTy, Idx);
38660b57cec5SDimitry Andric }
38670b57cec5SDimitry Andric }
38680b57cec5SDimitry Andric }
38695f7ddb14SDimitry Andric if (ShuffleOrOp == Instruction::ExtractValue) {
3870af732203SDimitry Andric for (unsigned I = 0, E = VL.size(); I < E; ++I) {
38715f7ddb14SDimitry Andric auto *EI = cast<Instruction>(VL[I]);
38720b57cec5SDimitry Andric // Take credit for instruction that will become dead.
3873af732203SDimitry Andric if (EI->hasOneUse()) {
3874af732203SDimitry Andric Instruction *Ext = EI->user_back();
38750b57cec5SDimitry Andric if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
38760b57cec5SDimitry Andric all_of(Ext->users(),
38770b57cec5SDimitry Andric [](User *U) { return isa<GetElementPtrInst>(U); })) {
38780b57cec5SDimitry Andric // Use getExtractWithExtendCost() to calculate the cost of
38790b57cec5SDimitry Andric // extractelement/ext pair.
38805f7ddb14SDimitry Andric CommonCost -= TTI->getExtractWithExtendCost(
3881af732203SDimitry Andric Ext->getOpcode(), Ext->getType(), VecTy, I);
38820b57cec5SDimitry Andric // Add back the cost of s|zext which is subtracted separately.
38835f7ddb14SDimitry Andric CommonCost += TTI->getCastInstrCost(
3884af732203SDimitry Andric Ext->getOpcode(), Ext->getType(), EI->getType(),
3885af732203SDimitry Andric TTI::getCastContextHint(Ext), CostKind, Ext);
38860b57cec5SDimitry Andric continue;
38870b57cec5SDimitry Andric }
38880b57cec5SDimitry Andric }
38895f7ddb14SDimitry Andric CommonCost -=
3890af732203SDimitry Andric TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
38910b57cec5SDimitry Andric }
38925f7ddb14SDimitry Andric } else {
38935f7ddb14SDimitry Andric AdjustExtractsCost(CommonCost, /*IsGather=*/false);
38940b57cec5SDimitry Andric }
38955f7ddb14SDimitry Andric return CommonCost;
38965f7ddb14SDimitry Andric }
38975f7ddb14SDimitry Andric case Instruction::InsertElement: {
38985f7ddb14SDimitry Andric auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
38995f7ddb14SDimitry Andric
39005f7ddb14SDimitry Andric unsigned const NumElts = SrcVecTy->getNumElements();
39015f7ddb14SDimitry Andric unsigned const NumScalars = VL.size();
39025f7ddb14SDimitry Andric APInt DemandedElts = APInt::getNullValue(NumElts);
39035f7ddb14SDimitry Andric // TODO: Add support for Instruction::InsertValue.
39045f7ddb14SDimitry Andric unsigned Offset = UINT_MAX;
39055f7ddb14SDimitry Andric bool IsIdentity = true;
39065f7ddb14SDimitry Andric SmallVector<int> ShuffleMask(NumElts, UndefMaskElem);
39075f7ddb14SDimitry Andric for (unsigned I = 0; I < NumScalars; ++I) {
39085f7ddb14SDimitry Andric Optional<int> InsertIdx = getInsertIndex(VL[I], 0);
39095f7ddb14SDimitry Andric if (!InsertIdx || *InsertIdx == UndefMaskElem)
39105f7ddb14SDimitry Andric continue;
39115f7ddb14SDimitry Andric unsigned Idx = *InsertIdx;
39125f7ddb14SDimitry Andric DemandedElts.setBit(Idx);
39135f7ddb14SDimitry Andric if (Idx < Offset) {
39145f7ddb14SDimitry Andric Offset = Idx;
39155f7ddb14SDimitry Andric IsIdentity &= I == 0;
39165f7ddb14SDimitry Andric } else {
39175f7ddb14SDimitry Andric assert(Idx >= Offset && "Failed to find vector index offset");
39185f7ddb14SDimitry Andric IsIdentity &= Idx - Offset == I;
39195f7ddb14SDimitry Andric }
39205f7ddb14SDimitry Andric ShuffleMask[Idx] = I;
39215f7ddb14SDimitry Andric }
39225f7ddb14SDimitry Andric assert(Offset < NumElts && "Failed to find vector index offset");
39235f7ddb14SDimitry Andric
39245f7ddb14SDimitry Andric InstructionCost Cost = 0;
39255f7ddb14SDimitry Andric Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
39265f7ddb14SDimitry Andric /*Insert*/ true, /*Extract*/ false);
39275f7ddb14SDimitry Andric
39285f7ddb14SDimitry Andric if (IsIdentity && NumElts != NumScalars && Offset % NumScalars != 0) {
39295f7ddb14SDimitry Andric // FIXME: Replace with SK_InsertSubvector once it is properly supported.
39305f7ddb14SDimitry Andric unsigned Sz = PowerOf2Ceil(Offset + NumScalars);
39315f7ddb14SDimitry Andric Cost += TTI->getShuffleCost(
39325f7ddb14SDimitry Andric TargetTransformInfo::SK_PermuteSingleSrc,
39335f7ddb14SDimitry Andric FixedVectorType::get(SrcVecTy->getElementType(), Sz));
39345f7ddb14SDimitry Andric } else if (!IsIdentity) {
39355f7ddb14SDimitry Andric Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy,
39365f7ddb14SDimitry Andric ShuffleMask);
39375f7ddb14SDimitry Andric }
39385f7ddb14SDimitry Andric
39395f7ddb14SDimitry Andric return Cost;
39400b57cec5SDimitry Andric }
39410b57cec5SDimitry Andric case Instruction::ZExt:
39420b57cec5SDimitry Andric case Instruction::SExt:
39430b57cec5SDimitry Andric case Instruction::FPToUI:
39440b57cec5SDimitry Andric case Instruction::FPToSI:
39450b57cec5SDimitry Andric case Instruction::FPExt:
39460b57cec5SDimitry Andric case Instruction::PtrToInt:
39470b57cec5SDimitry Andric case Instruction::IntToPtr:
39480b57cec5SDimitry Andric case Instruction::SIToFP:
39490b57cec5SDimitry Andric case Instruction::UIToFP:
39500b57cec5SDimitry Andric case Instruction::Trunc:
39510b57cec5SDimitry Andric case Instruction::FPTrunc:
39520b57cec5SDimitry Andric case Instruction::BitCast: {
39530b57cec5SDimitry Andric Type *SrcTy = VL0->getOperand(0)->getType();
3954af732203SDimitry Andric InstructionCost ScalarEltCost =
3955af732203SDimitry Andric TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
3956af732203SDimitry Andric TTI::getCastContextHint(VL0), CostKind, VL0);
39570b57cec5SDimitry Andric if (NeedToShuffleReuses) {
39585f7ddb14SDimitry Andric CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
39590b57cec5SDimitry Andric }
39600b57cec5SDimitry Andric
39610b57cec5SDimitry Andric // Calculate the cost of this instruction.
3962af732203SDimitry Andric InstructionCost ScalarCost = VL.size() * ScalarEltCost;
39630b57cec5SDimitry Andric
39645ffd83dbSDimitry Andric auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
3965af732203SDimitry Andric InstructionCost VecCost = 0;
39660b57cec5SDimitry Andric // Check if the values are candidates to demote.
39670b57cec5SDimitry Andric if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
39685f7ddb14SDimitry Andric VecCost = CommonCost + TTI->getCastInstrCost(
39695f7ddb14SDimitry Andric E->getOpcode(), VecTy, SrcVecTy,
3970af732203SDimitry Andric TTI::getCastContextHint(VL0), CostKind, VL0);
39710b57cec5SDimitry Andric }
39725f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
39730b57cec5SDimitry Andric return VecCost - ScalarCost;
39740b57cec5SDimitry Andric }
39750b57cec5SDimitry Andric case Instruction::FCmp:
39760b57cec5SDimitry Andric case Instruction::ICmp:
39770b57cec5SDimitry Andric case Instruction::Select: {
39780b57cec5SDimitry Andric // Calculate the cost of this instruction.
3979af732203SDimitry Andric InstructionCost ScalarEltCost =
3980af732203SDimitry Andric TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
3981af732203SDimitry Andric CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
39820b57cec5SDimitry Andric if (NeedToShuffleReuses) {
39835f7ddb14SDimitry Andric CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
39840b57cec5SDimitry Andric }
39855ffd83dbSDimitry Andric auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
3986af732203SDimitry Andric InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
3987af732203SDimitry Andric
3988af732203SDimitry Andric // Check if all entries in VL are either compares or selects with compares
3989af732203SDimitry Andric // as condition that have the same predicates.
3990af732203SDimitry Andric CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE;
3991af732203SDimitry Andric bool First = true;
3992af732203SDimitry Andric for (auto *V : VL) {
3993af732203SDimitry Andric CmpInst::Predicate CurrentPred;
3994af732203SDimitry Andric auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
3995af732203SDimitry Andric if ((!match(V, m_Select(MatchCmp, m_Value(), m_Value())) &&
3996af732203SDimitry Andric !match(V, MatchCmp)) ||
3997af732203SDimitry Andric (!First && VecPred != CurrentPred)) {
3998af732203SDimitry Andric VecPred = CmpInst::BAD_ICMP_PREDICATE;
3999af732203SDimitry Andric break;
4000af732203SDimitry Andric }
4001af732203SDimitry Andric First = false;
4002af732203SDimitry Andric VecPred = CurrentPred;
4003af732203SDimitry Andric }
4004af732203SDimitry Andric
4005af732203SDimitry Andric InstructionCost VecCost = TTI->getCmpSelInstrCost(
4006af732203SDimitry Andric E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
4007af732203SDimitry Andric // Check if it is possible and profitable to use min/max for selects in
4008af732203SDimitry Andric // VL.
4009af732203SDimitry Andric //
4010af732203SDimitry Andric auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
4011af732203SDimitry Andric if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
4012af732203SDimitry Andric IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
4013af732203SDimitry Andric {VecTy, VecTy});
4014af732203SDimitry Andric InstructionCost IntrinsicCost =
4015af732203SDimitry Andric TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
4016af732203SDimitry Andric // If the selects are the only uses of the compares, they will be dead
4017af732203SDimitry Andric // and we can adjust the cost by removing their cost.
4018af732203SDimitry Andric if (IntrinsicAndUse.second)
4019af732203SDimitry Andric IntrinsicCost -=
4020af732203SDimitry Andric TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy,
4021af732203SDimitry Andric CmpInst::BAD_ICMP_PREDICATE, CostKind);
4022af732203SDimitry Andric VecCost = std::min(VecCost, IntrinsicCost);
4023af732203SDimitry Andric }
40245f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
40255f7ddb14SDimitry Andric return CommonCost + VecCost - ScalarCost;
40260b57cec5SDimitry Andric }
40270b57cec5SDimitry Andric case Instruction::FNeg:
40280b57cec5SDimitry Andric case Instruction::Add:
40290b57cec5SDimitry Andric case Instruction::FAdd:
40300b57cec5SDimitry Andric case Instruction::Sub:
40310b57cec5SDimitry Andric case Instruction::FSub:
40320b57cec5SDimitry Andric case Instruction::Mul:
40330b57cec5SDimitry Andric case Instruction::FMul:
40340b57cec5SDimitry Andric case Instruction::UDiv:
40350b57cec5SDimitry Andric case Instruction::SDiv:
40360b57cec5SDimitry Andric case Instruction::FDiv:
40370b57cec5SDimitry Andric case Instruction::URem:
40380b57cec5SDimitry Andric case Instruction::SRem:
40390b57cec5SDimitry Andric case Instruction::FRem:
40400b57cec5SDimitry Andric case Instruction::Shl:
40410b57cec5SDimitry Andric case Instruction::LShr:
40420b57cec5SDimitry Andric case Instruction::AShr:
40430b57cec5SDimitry Andric case Instruction::And:
40440b57cec5SDimitry Andric case Instruction::Or:
40450b57cec5SDimitry Andric case Instruction::Xor: {
40460b57cec5SDimitry Andric // Certain instructions can be cheaper to vectorize if they have a
40470b57cec5SDimitry Andric // constant second vector operand.
40480b57cec5SDimitry Andric TargetTransformInfo::OperandValueKind Op1VK =
40490b57cec5SDimitry Andric TargetTransformInfo::OK_AnyValue;
40500b57cec5SDimitry Andric TargetTransformInfo::OperandValueKind Op2VK =
40510b57cec5SDimitry Andric TargetTransformInfo::OK_UniformConstantValue;
40520b57cec5SDimitry Andric TargetTransformInfo::OperandValueProperties Op1VP =
40530b57cec5SDimitry Andric TargetTransformInfo::OP_None;
40540b57cec5SDimitry Andric TargetTransformInfo::OperandValueProperties Op2VP =
40550b57cec5SDimitry Andric TargetTransformInfo::OP_PowerOf2;
40560b57cec5SDimitry Andric
40570b57cec5SDimitry Andric // If all operands are exactly the same ConstantInt then set the
40580b57cec5SDimitry Andric // operand kind to OK_UniformConstantValue.
40590b57cec5SDimitry Andric // If instead not all operands are constants, then set the operand kind
40600b57cec5SDimitry Andric // to OK_AnyValue. If all operands are constants but not the same,
40610b57cec5SDimitry Andric // then set the operand kind to OK_NonUniformConstantValue.
40620b57cec5SDimitry Andric ConstantInt *CInt0 = nullptr;
40630b57cec5SDimitry Andric for (unsigned i = 0, e = VL.size(); i < e; ++i) {
40640b57cec5SDimitry Andric const Instruction *I = cast<Instruction>(VL[i]);
40650b57cec5SDimitry Andric unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
40660b57cec5SDimitry Andric ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
40670b57cec5SDimitry Andric if (!CInt) {
40680b57cec5SDimitry Andric Op2VK = TargetTransformInfo::OK_AnyValue;
40690b57cec5SDimitry Andric Op2VP = TargetTransformInfo::OP_None;
40700b57cec5SDimitry Andric break;
40710b57cec5SDimitry Andric }
40720b57cec5SDimitry Andric if (Op2VP == TargetTransformInfo::OP_PowerOf2 &&
40730b57cec5SDimitry Andric !CInt->getValue().isPowerOf2())
40740b57cec5SDimitry Andric Op2VP = TargetTransformInfo::OP_None;
40750b57cec5SDimitry Andric if (i == 0) {
40760b57cec5SDimitry Andric CInt0 = CInt;
40770b57cec5SDimitry Andric continue;
40780b57cec5SDimitry Andric }
40790b57cec5SDimitry Andric if (CInt0 != CInt)
40800b57cec5SDimitry Andric Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
40810b57cec5SDimitry Andric }
40820b57cec5SDimitry Andric
40830b57cec5SDimitry Andric SmallVector<const Value *, 4> Operands(VL0->operand_values());
4084af732203SDimitry Andric InstructionCost ScalarEltCost =
4085af732203SDimitry Andric TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
4086af732203SDimitry Andric Op2VK, Op1VP, Op2VP, Operands, VL0);
40870b57cec5SDimitry Andric if (NeedToShuffleReuses) {
40885f7ddb14SDimitry Andric CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
40890b57cec5SDimitry Andric }
4090af732203SDimitry Andric InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
4091af732203SDimitry Andric InstructionCost VecCost =
4092af732203SDimitry Andric TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
4093af732203SDimitry Andric Op2VK, Op1VP, Op2VP, Operands, VL0);
40945f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
40955f7ddb14SDimitry Andric return CommonCost + VecCost - ScalarCost;
40960b57cec5SDimitry Andric }
40970b57cec5SDimitry Andric case Instruction::GetElementPtr: {
40980b57cec5SDimitry Andric TargetTransformInfo::OperandValueKind Op1VK =
40990b57cec5SDimitry Andric TargetTransformInfo::OK_AnyValue;
41000b57cec5SDimitry Andric TargetTransformInfo::OperandValueKind Op2VK =
41010b57cec5SDimitry Andric TargetTransformInfo::OK_UniformConstantValue;
41020b57cec5SDimitry Andric
4103af732203SDimitry Andric InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
4104af732203SDimitry Andric Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
41050b57cec5SDimitry Andric if (NeedToShuffleReuses) {
41065f7ddb14SDimitry Andric CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
41070b57cec5SDimitry Andric }
4108af732203SDimitry Andric InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
4109af732203SDimitry Andric InstructionCost VecCost = TTI->getArithmeticInstrCost(
4110af732203SDimitry Andric Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
41115f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
41125f7ddb14SDimitry Andric return CommonCost + VecCost - ScalarCost;
41130b57cec5SDimitry Andric }
41140b57cec5SDimitry Andric case Instruction::Load: {
41150b57cec5SDimitry Andric // Cost of wide load - cost of scalar loads.
41165f7ddb14SDimitry Andric Align Alignment = cast<LoadInst>(VL0)->getAlign();
4117af732203SDimitry Andric InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
41185f7ddb14SDimitry Andric Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0);
41190b57cec5SDimitry Andric if (NeedToShuffleReuses) {
41205f7ddb14SDimitry Andric CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
41210b57cec5SDimitry Andric }
4122af732203SDimitry Andric InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
4123af732203SDimitry Andric InstructionCost VecLdCost;
4124af732203SDimitry Andric if (E->State == TreeEntry::Vectorize) {
41255f7ddb14SDimitry Andric VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
41265ffd83dbSDimitry Andric CostKind, VL0);
4127af732203SDimitry Andric } else {
4128af732203SDimitry Andric assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
41295f7ddb14SDimitry Andric Align CommonAlignment = Alignment;
41305f7ddb14SDimitry Andric for (Value *V : VL)
41315f7ddb14SDimitry Andric CommonAlignment =
41325f7ddb14SDimitry Andric commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
4133af732203SDimitry Andric VecLdCost = TTI->getGatherScatterOpCost(
4134af732203SDimitry Andric Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
41355f7ddb14SDimitry Andric /*VariableMask=*/false, CommonAlignment, CostKind, VL0);
4136af732203SDimitry Andric }
41375f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecLdCost, ScalarLdCost));
41385f7ddb14SDimitry Andric return CommonCost + VecLdCost - ScalarLdCost;
41390b57cec5SDimitry Andric }
41400b57cec5SDimitry Andric case Instruction::Store: {
41410b57cec5SDimitry Andric // We know that we can merge the stores. Calculate the cost.
4142480093f4SDimitry Andric bool IsReorder = !E->ReorderIndices.empty();
4143480093f4SDimitry Andric auto *SI =
4144480093f4SDimitry Andric cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
41455ffd83dbSDimitry Andric Align Alignment = SI->getAlign();
4146af732203SDimitry Andric InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
4147af732203SDimitry Andric Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
4148af732203SDimitry Andric InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
4149af732203SDimitry Andric InstructionCost VecStCost = TTI->getMemoryOpCost(
4150af732203SDimitry Andric Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
41515f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecStCost, ScalarStCost));
41525f7ddb14SDimitry Andric return CommonCost + VecStCost - ScalarStCost;
41530b57cec5SDimitry Andric }
41540b57cec5SDimitry Andric case Instruction::Call: {
41550b57cec5SDimitry Andric CallInst *CI = cast<CallInst>(VL0);
41560b57cec5SDimitry Andric Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
41570b57cec5SDimitry Andric
41580b57cec5SDimitry Andric // Calculate the cost of the scalar and vector calls.
41595f7ddb14SDimitry Andric IntrinsicCostAttributes CostAttrs(ID, *CI, 1);
4160af732203SDimitry Andric InstructionCost ScalarEltCost =
4161af732203SDimitry Andric TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
41620b57cec5SDimitry Andric if (NeedToShuffleReuses) {
41635f7ddb14SDimitry Andric CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
41640b57cec5SDimitry Andric }
4165af732203SDimitry Andric InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
41660b57cec5SDimitry Andric
41675ffd83dbSDimitry Andric auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
4168af732203SDimitry Andric InstructionCost VecCallCost =
4169af732203SDimitry Andric std::min(VecCallCosts.first, VecCallCosts.second);
41700b57cec5SDimitry Andric
41710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
41720b57cec5SDimitry Andric << " (" << VecCallCost << "-" << ScalarCallCost << ")"
41730b57cec5SDimitry Andric << " for " << *CI << "\n");
41740b57cec5SDimitry Andric
41755f7ddb14SDimitry Andric return CommonCost + VecCallCost - ScalarCallCost;
41760b57cec5SDimitry Andric }
41770b57cec5SDimitry Andric case Instruction::ShuffleVector: {
41788bcb0991SDimitry Andric assert(E->isAltShuffle() &&
41798bcb0991SDimitry Andric ((Instruction::isBinaryOp(E->getOpcode()) &&
41808bcb0991SDimitry Andric Instruction::isBinaryOp(E->getAltOpcode())) ||
41818bcb0991SDimitry Andric (Instruction::isCast(E->getOpcode()) &&
41828bcb0991SDimitry Andric Instruction::isCast(E->getAltOpcode()))) &&
41830b57cec5SDimitry Andric "Invalid Shuffle Vector Operand");
4184af732203SDimitry Andric InstructionCost ScalarCost = 0;
41850b57cec5SDimitry Andric if (NeedToShuffleReuses) {
41860b57cec5SDimitry Andric for (unsigned Idx : E->ReuseShuffleIndices) {
41870b57cec5SDimitry Andric Instruction *I = cast<Instruction>(VL[Idx]);
41885f7ddb14SDimitry Andric CommonCost -= TTI->getInstructionCost(I, CostKind);
41890b57cec5SDimitry Andric }
41900b57cec5SDimitry Andric for (Value *V : VL) {
41910b57cec5SDimitry Andric Instruction *I = cast<Instruction>(V);
41925f7ddb14SDimitry Andric CommonCost += TTI->getInstructionCost(I, CostKind);
41930b57cec5SDimitry Andric }
41940b57cec5SDimitry Andric }
41958bcb0991SDimitry Andric for (Value *V : VL) {
41968bcb0991SDimitry Andric Instruction *I = cast<Instruction>(V);
41978bcb0991SDimitry Andric assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
41985ffd83dbSDimitry Andric ScalarCost += TTI->getInstructionCost(I, CostKind);
41990b57cec5SDimitry Andric }
42000b57cec5SDimitry Andric // VecCost is equal to sum of the cost of creating 2 vectors
42010b57cec5SDimitry Andric // and the cost of creating shuffle.
4202af732203SDimitry Andric InstructionCost VecCost = 0;
42038bcb0991SDimitry Andric if (Instruction::isBinaryOp(E->getOpcode())) {
42045ffd83dbSDimitry Andric VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
42055ffd83dbSDimitry Andric VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
42065ffd83dbSDimitry Andric CostKind);
42070b57cec5SDimitry Andric } else {
42088bcb0991SDimitry Andric Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
42098bcb0991SDimitry Andric Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
42105ffd83dbSDimitry Andric auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
42115ffd83dbSDimitry Andric auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
42125ffd83dbSDimitry Andric VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
4213af732203SDimitry Andric TTI::CastContextHint::None, CostKind);
42145ffd83dbSDimitry Andric VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
4215af732203SDimitry Andric TTI::CastContextHint::None, CostKind);
42160b57cec5SDimitry Andric }
42175f7ddb14SDimitry Andric
42185f7ddb14SDimitry Andric SmallVector<int> Mask(E->Scalars.size());
42195f7ddb14SDimitry Andric for (unsigned I = 0, End = E->Scalars.size(); I < End; ++I) {
42205f7ddb14SDimitry Andric auto *OpInst = cast<Instruction>(E->Scalars[I]);
42215f7ddb14SDimitry Andric assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
42225f7ddb14SDimitry Andric Mask[I] = I + (OpInst->getOpcode() == E->getAltOpcode() ? End : 0);
42235f7ddb14SDimitry Andric }
42245f7ddb14SDimitry Andric VecCost +=
42255f7ddb14SDimitry Andric TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, 0);
42265f7ddb14SDimitry Andric LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
42275f7ddb14SDimitry Andric return CommonCost + VecCost - ScalarCost;
42280b57cec5SDimitry Andric }
42290b57cec5SDimitry Andric default:
42300b57cec5SDimitry Andric llvm_unreachable("Unknown instruction");
42310b57cec5SDimitry Andric }
42320b57cec5SDimitry Andric }
42330b57cec5SDimitry Andric
isFullyVectorizableTinyTree() const42340b57cec5SDimitry Andric bool BoUpSLP::isFullyVectorizableTinyTree() const {
42350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
42360b57cec5SDimitry Andric << VectorizableTree.size() << " is fully vectorizable .\n");
42370b57cec5SDimitry Andric
42380b57cec5SDimitry Andric // We only handle trees of heights 1 and 2.
4239480093f4SDimitry Andric if (VectorizableTree.size() == 1 &&
4240480093f4SDimitry Andric VectorizableTree[0]->State == TreeEntry::Vectorize)
42410b57cec5SDimitry Andric return true;
42420b57cec5SDimitry Andric
42430b57cec5SDimitry Andric if (VectorizableTree.size() != 2)
42440b57cec5SDimitry Andric return false;
42450b57cec5SDimitry Andric
42465f7ddb14SDimitry Andric // Handle splat and all-constants stores. Also try to vectorize tiny trees
42475f7ddb14SDimitry Andric // with the second gather nodes if they have less scalar operands rather than
42485f7ddb14SDimitry Andric // the initial tree element (may be profitable to shuffle the second gather)
42495f7ddb14SDimitry Andric // or they are extractelements, which form shuffle.
42505f7ddb14SDimitry Andric SmallVector<int> Mask;
4251480093f4SDimitry Andric if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
42520b57cec5SDimitry Andric (allConstant(VectorizableTree[1]->Scalars) ||
42535f7ddb14SDimitry Andric isSplat(VectorizableTree[1]->Scalars) ||
42545f7ddb14SDimitry Andric (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
42555f7ddb14SDimitry Andric VectorizableTree[1]->Scalars.size() <
42565f7ddb14SDimitry Andric VectorizableTree[0]->Scalars.size()) ||
42575f7ddb14SDimitry Andric (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
42585f7ddb14SDimitry Andric VectorizableTree[1]->getOpcode() == Instruction::ExtractElement &&
42595f7ddb14SDimitry Andric isShuffle(VectorizableTree[1]->Scalars, Mask))))
42600b57cec5SDimitry Andric return true;
42610b57cec5SDimitry Andric
42620b57cec5SDimitry Andric // Gathering cost would be too much for tiny trees.
4263480093f4SDimitry Andric if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
4264480093f4SDimitry Andric VectorizableTree[1]->State == TreeEntry::NeedToGather)
42650b57cec5SDimitry Andric return false;
42660b57cec5SDimitry Andric
42670b57cec5SDimitry Andric return true;
42680b57cec5SDimitry Andric }
42690b57cec5SDimitry Andric
isLoadCombineCandidateImpl(Value * Root,unsigned NumElts,TargetTransformInfo * TTI,bool MustMatchOrInst)42705ffd83dbSDimitry Andric static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
42715f7ddb14SDimitry Andric TargetTransformInfo *TTI,
42725f7ddb14SDimitry Andric bool MustMatchOrInst) {
42735ffd83dbSDimitry Andric // Look past the root to find a source value. Arbitrarily follow the
42748bcb0991SDimitry Andric // path through operand 0 of any 'or'. Also, peek through optional
4275af732203SDimitry Andric // shift-left-by-multiple-of-8-bits.
42765ffd83dbSDimitry Andric Value *ZextLoad = Root;
4277af732203SDimitry Andric const APInt *ShAmtC;
42785f7ddb14SDimitry Andric bool FoundOr = false;
42795ffd83dbSDimitry Andric while (!isa<ConstantExpr>(ZextLoad) &&
42805ffd83dbSDimitry Andric (match(ZextLoad, m_Or(m_Value(), m_Value())) ||
4281af732203SDimitry Andric (match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
42825f7ddb14SDimitry Andric ShAmtC->urem(8) == 0))) {
42835f7ddb14SDimitry Andric auto *BinOp = cast<BinaryOperator>(ZextLoad);
42845f7ddb14SDimitry Andric ZextLoad = BinOp->getOperand(0);
42855f7ddb14SDimitry Andric if (BinOp->getOpcode() == Instruction::Or)
42865f7ddb14SDimitry Andric FoundOr = true;
42875f7ddb14SDimitry Andric }
42885ffd83dbSDimitry Andric // Check if the input is an extended load of the required or/shift expression.
42898bcb0991SDimitry Andric Value *LoadPtr;
42905f7ddb14SDimitry Andric if ((MustMatchOrInst && !FoundOr) || ZextLoad == Root ||
42915f7ddb14SDimitry Andric !match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr)))))
42928bcb0991SDimitry Andric return false;
42938bcb0991SDimitry Andric
42948bcb0991SDimitry Andric // Require that the total load bit width is a legal integer type.
42958bcb0991SDimitry Andric // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
42968bcb0991SDimitry Andric // But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
42978bcb0991SDimitry Andric Type *SrcTy = LoadPtr->getType()->getPointerElementType();
42988bcb0991SDimitry Andric unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
42995ffd83dbSDimitry Andric if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth)))
43008bcb0991SDimitry Andric return false;
43018bcb0991SDimitry Andric
43028bcb0991SDimitry Andric // Everything matched - assume that we can fold the whole sequence using
43038bcb0991SDimitry Andric // load combining.
43045ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Assume load combining for tree starting at "
43055ffd83dbSDimitry Andric << *(cast<Instruction>(Root)) << "\n");
43068bcb0991SDimitry Andric
43078bcb0991SDimitry Andric return true;
43088bcb0991SDimitry Andric }
43098bcb0991SDimitry Andric
isLoadCombineReductionCandidate(RecurKind RdxKind) const4310af732203SDimitry Andric bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
4311af732203SDimitry Andric if (RdxKind != RecurKind::Or)
43125ffd83dbSDimitry Andric return false;
43135ffd83dbSDimitry Andric
43145ffd83dbSDimitry Andric unsigned NumElts = VectorizableTree[0]->Scalars.size();
43155ffd83dbSDimitry Andric Value *FirstReduced = VectorizableTree[0]->Scalars[0];
43165f7ddb14SDimitry Andric return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI,
43175f7ddb14SDimitry Andric /* MatchOr */ false);
43185ffd83dbSDimitry Andric }
43195ffd83dbSDimitry Andric
isLoadCombineCandidate() const43205ffd83dbSDimitry Andric bool BoUpSLP::isLoadCombineCandidate() const {
43215ffd83dbSDimitry Andric // Peek through a final sequence of stores and check if all operations are
43225ffd83dbSDimitry Andric // likely to be load-combined.
43235ffd83dbSDimitry Andric unsigned NumElts = VectorizableTree[0]->Scalars.size();
43245ffd83dbSDimitry Andric for (Value *Scalar : VectorizableTree[0]->Scalars) {
43255ffd83dbSDimitry Andric Value *X;
43265ffd83dbSDimitry Andric if (!match(Scalar, m_Store(m_Value(X), m_Value())) ||
43275f7ddb14SDimitry Andric !isLoadCombineCandidateImpl(X, NumElts, TTI, /* MatchOr */ true))
43285ffd83dbSDimitry Andric return false;
43295ffd83dbSDimitry Andric }
43305ffd83dbSDimitry Andric return true;
43315ffd83dbSDimitry Andric }
43325ffd83dbSDimitry Andric
isTreeTinyAndNotFullyVectorizable() const43330b57cec5SDimitry Andric bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
43345f7ddb14SDimitry Andric // No need to vectorize inserts of gathered values.
43355f7ddb14SDimitry Andric if (VectorizableTree.size() == 2 &&
43365f7ddb14SDimitry Andric isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
43375f7ddb14SDimitry Andric VectorizableTree[1]->State == TreeEntry::NeedToGather)
43385f7ddb14SDimitry Andric return true;
43395f7ddb14SDimitry Andric
43400b57cec5SDimitry Andric // We can vectorize the tree if its size is greater than or equal to the
43410b57cec5SDimitry Andric // minimum size specified by the MinTreeSize command line option.
43420b57cec5SDimitry Andric if (VectorizableTree.size() >= MinTreeSize)
43430b57cec5SDimitry Andric return false;
43440b57cec5SDimitry Andric
43450b57cec5SDimitry Andric // If we have a tiny tree (a tree whose size is less than MinTreeSize), we
43460b57cec5SDimitry Andric // can vectorize it if we can prove it fully vectorizable.
43470b57cec5SDimitry Andric if (isFullyVectorizableTinyTree())
43480b57cec5SDimitry Andric return false;
43490b57cec5SDimitry Andric
43500b57cec5SDimitry Andric assert(VectorizableTree.empty()
43510b57cec5SDimitry Andric ? ExternalUses.empty()
43520b57cec5SDimitry Andric : true && "We shouldn't have any external users");
43530b57cec5SDimitry Andric
43540b57cec5SDimitry Andric // Otherwise, we can't vectorize the tree. It is both tiny and not fully
43550b57cec5SDimitry Andric // vectorizable.
43560b57cec5SDimitry Andric return true;
43570b57cec5SDimitry Andric }
43580b57cec5SDimitry Andric
getSpillCost() const4359af732203SDimitry Andric InstructionCost BoUpSLP::getSpillCost() const {
43600b57cec5SDimitry Andric // Walk from the bottom of the tree to the top, tracking which values are
43610b57cec5SDimitry Andric // live. When we see a call instruction that is not part of our tree,
43620b57cec5SDimitry Andric // query TTI to see if there is a cost to keeping values live over it
43630b57cec5SDimitry Andric // (for example, if spills and fills are required).
43640b57cec5SDimitry Andric unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
4365af732203SDimitry Andric InstructionCost Cost = 0;
43660b57cec5SDimitry Andric
43670b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> LiveValues;
43680b57cec5SDimitry Andric Instruction *PrevInst = nullptr;
43690b57cec5SDimitry Andric
4370af732203SDimitry Andric // The entries in VectorizableTree are not necessarily ordered by their
4371af732203SDimitry Andric // position in basic blocks. Collect them and order them by dominance so later
4372af732203SDimitry Andric // instructions are guaranteed to be visited first. For instructions in
4373af732203SDimitry Andric // different basic blocks, we only scan to the beginning of the block, so
4374af732203SDimitry Andric // their order does not matter, as long as all instructions in a basic block
4375af732203SDimitry Andric // are grouped together. Using dominance ensures a deterministic order.
4376af732203SDimitry Andric SmallVector<Instruction *, 16> OrderedScalars;
43770b57cec5SDimitry Andric for (const auto &TEPtr : VectorizableTree) {
43780b57cec5SDimitry Andric Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
43790b57cec5SDimitry Andric if (!Inst)
43800b57cec5SDimitry Andric continue;
4381af732203SDimitry Andric OrderedScalars.push_back(Inst);
4382af732203SDimitry Andric }
43835f7ddb14SDimitry Andric llvm::sort(OrderedScalars, [&](Instruction *A, Instruction *B) {
43845f7ddb14SDimitry Andric auto *NodeA = DT->getNode(A->getParent());
43855f7ddb14SDimitry Andric auto *NodeB = DT->getNode(B->getParent());
43865f7ddb14SDimitry Andric assert(NodeA && "Should only process reachable instructions");
43875f7ddb14SDimitry Andric assert(NodeB && "Should only process reachable instructions");
43885f7ddb14SDimitry Andric assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
43895f7ddb14SDimitry Andric "Different nodes should have different DFS numbers");
43905f7ddb14SDimitry Andric if (NodeA != NodeB)
43915f7ddb14SDimitry Andric return NodeA->getDFSNumIn() < NodeB->getDFSNumIn();
43925f7ddb14SDimitry Andric return B->comesBefore(A);
4393af732203SDimitry Andric });
43940b57cec5SDimitry Andric
4395af732203SDimitry Andric for (Instruction *Inst : OrderedScalars) {
43960b57cec5SDimitry Andric if (!PrevInst) {
43970b57cec5SDimitry Andric PrevInst = Inst;
43980b57cec5SDimitry Andric continue;
43990b57cec5SDimitry Andric }
44000b57cec5SDimitry Andric
44010b57cec5SDimitry Andric // Update LiveValues.
44020b57cec5SDimitry Andric LiveValues.erase(PrevInst);
44030b57cec5SDimitry Andric for (auto &J : PrevInst->operands()) {
44040b57cec5SDimitry Andric if (isa<Instruction>(&*J) && getTreeEntry(&*J))
44050b57cec5SDimitry Andric LiveValues.insert(cast<Instruction>(&*J));
44060b57cec5SDimitry Andric }
44070b57cec5SDimitry Andric
44080b57cec5SDimitry Andric LLVM_DEBUG({
44090b57cec5SDimitry Andric dbgs() << "SLP: #LV: " << LiveValues.size();
44100b57cec5SDimitry Andric for (auto *X : LiveValues)
44110b57cec5SDimitry Andric dbgs() << " " << X->getName();
44120b57cec5SDimitry Andric dbgs() << ", Looking at ";
44130b57cec5SDimitry Andric Inst->dump();
44140b57cec5SDimitry Andric });
44150b57cec5SDimitry Andric
44160b57cec5SDimitry Andric // Now find the sequence of instructions between PrevInst and Inst.
44170b57cec5SDimitry Andric unsigned NumCalls = 0;
44180b57cec5SDimitry Andric BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
44190b57cec5SDimitry Andric PrevInstIt =
44200b57cec5SDimitry Andric PrevInst->getIterator().getReverse();
44210b57cec5SDimitry Andric while (InstIt != PrevInstIt) {
44220b57cec5SDimitry Andric if (PrevInstIt == PrevInst->getParent()->rend()) {
44230b57cec5SDimitry Andric PrevInstIt = Inst->getParent()->rbegin();
44240b57cec5SDimitry Andric continue;
44250b57cec5SDimitry Andric }
44260b57cec5SDimitry Andric
4427480093f4SDimitry Andric // Debug information does not impact spill cost.
44280b57cec5SDimitry Andric if ((isa<CallInst>(&*PrevInstIt) &&
44290b57cec5SDimitry Andric !isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
44300b57cec5SDimitry Andric &*PrevInstIt != PrevInst)
44310b57cec5SDimitry Andric NumCalls++;
44320b57cec5SDimitry Andric
44330b57cec5SDimitry Andric ++PrevInstIt;
44340b57cec5SDimitry Andric }
44350b57cec5SDimitry Andric
44360b57cec5SDimitry Andric if (NumCalls) {
44370b57cec5SDimitry Andric SmallVector<Type*, 4> V;
44385f7ddb14SDimitry Andric for (auto *II : LiveValues) {
44395f7ddb14SDimitry Andric auto *ScalarTy = II->getType();
44405f7ddb14SDimitry Andric if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
44415f7ddb14SDimitry Andric ScalarTy = VectorTy->getElementType();
44425f7ddb14SDimitry Andric V.push_back(FixedVectorType::get(ScalarTy, BundleWidth));
44435f7ddb14SDimitry Andric }
44440b57cec5SDimitry Andric Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
44450b57cec5SDimitry Andric }
44460b57cec5SDimitry Andric
44470b57cec5SDimitry Andric PrevInst = Inst;
44480b57cec5SDimitry Andric }
44490b57cec5SDimitry Andric
44500b57cec5SDimitry Andric return Cost;
44510b57cec5SDimitry Andric }
44520b57cec5SDimitry Andric
getTreeCost(ArrayRef<Value * > VectorizedVals)44535f7ddb14SDimitry Andric InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
4454af732203SDimitry Andric InstructionCost Cost = 0;
44550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
44560b57cec5SDimitry Andric << VectorizableTree.size() << ".\n");
44570b57cec5SDimitry Andric
44580b57cec5SDimitry Andric unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
44590b57cec5SDimitry Andric
44600b57cec5SDimitry Andric for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
44610b57cec5SDimitry Andric TreeEntry &TE = *VectorizableTree[I].get();
44620b57cec5SDimitry Andric
44635f7ddb14SDimitry Andric InstructionCost C = getEntryCost(&TE, VectorizedVals);
4464af732203SDimitry Andric Cost += C;
44650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
44660b57cec5SDimitry Andric << " for bundle that starts with " << *TE.Scalars[0]
4467af732203SDimitry Andric << ".\n"
4468af732203SDimitry Andric << "SLP: Current total cost = " << Cost << "\n");
44690b57cec5SDimitry Andric }
44700b57cec5SDimitry Andric
44710b57cec5SDimitry Andric SmallPtrSet<Value *, 16> ExtractCostCalculated;
4472af732203SDimitry Andric InstructionCost ExtractCost = 0;
44735f7ddb14SDimitry Andric SmallVector<unsigned> VF;
44745f7ddb14SDimitry Andric SmallVector<SmallVector<int>> ShuffleMask;
44755f7ddb14SDimitry Andric SmallVector<Value *> FirstUsers;
44765f7ddb14SDimitry Andric SmallVector<APInt> DemandedElts;
44770b57cec5SDimitry Andric for (ExternalUser &EU : ExternalUses) {
44780b57cec5SDimitry Andric // We only add extract cost once for the same scalar.
44790b57cec5SDimitry Andric if (!ExtractCostCalculated.insert(EU.Scalar).second)
44800b57cec5SDimitry Andric continue;
44810b57cec5SDimitry Andric
44820b57cec5SDimitry Andric // Uses by ephemeral values are free (because the ephemeral value will be
44830b57cec5SDimitry Andric // removed prior to code generation, and so the extraction will be
44840b57cec5SDimitry Andric // removed as well).
44850b57cec5SDimitry Andric if (EphValues.count(EU.User))
44860b57cec5SDimitry Andric continue;
44870b57cec5SDimitry Andric
44885f7ddb14SDimitry Andric // No extract cost for vector "scalar"
44895f7ddb14SDimitry Andric if (isa<FixedVectorType>(EU.Scalar->getType()))
44905f7ddb14SDimitry Andric continue;
44915f7ddb14SDimitry Andric
44925f7ddb14SDimitry Andric // Already counted the cost for external uses when tried to adjust the cost
44935f7ddb14SDimitry Andric // for extractelements, no need to add it again.
44945f7ddb14SDimitry Andric if (isa<ExtractElementInst>(EU.Scalar))
44955f7ddb14SDimitry Andric continue;
44965f7ddb14SDimitry Andric
44975f7ddb14SDimitry Andric // If found user is an insertelement, do not calculate extract cost but try
44985f7ddb14SDimitry Andric // to detect it as a final shuffled/identity match.
44995f7ddb14SDimitry Andric if (EU.User && isa<InsertElementInst>(EU.User)) {
45005f7ddb14SDimitry Andric if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) {
45015f7ddb14SDimitry Andric Optional<int> InsertIdx = getInsertIndex(EU.User, 0);
45025f7ddb14SDimitry Andric if (!InsertIdx || *InsertIdx == UndefMaskElem)
45035f7ddb14SDimitry Andric continue;
45045f7ddb14SDimitry Andric Value *VU = EU.User;
45055f7ddb14SDimitry Andric auto *It = find_if(FirstUsers, [VU](Value *V) {
45065f7ddb14SDimitry Andric // Checks if 2 insertelements are from the same buildvector.
45075f7ddb14SDimitry Andric if (VU->getType() != V->getType())
45085f7ddb14SDimitry Andric return false;
45095f7ddb14SDimitry Andric auto *IE1 = cast<InsertElementInst>(VU);
45105f7ddb14SDimitry Andric auto *IE2 = cast<InsertElementInst>(V);
45115f7ddb14SDimitry Andric // Go though of insertelement instructions trying to find either VU as
45125f7ddb14SDimitry Andric // the original vector for IE2 or V as the original vector for IE1.
45135f7ddb14SDimitry Andric do {
45145f7ddb14SDimitry Andric if (IE1 == VU || IE2 == V)
45155f7ddb14SDimitry Andric return true;
45165f7ddb14SDimitry Andric if (IE1)
45175f7ddb14SDimitry Andric IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
45185f7ddb14SDimitry Andric if (IE2)
45195f7ddb14SDimitry Andric IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
45205f7ddb14SDimitry Andric } while (IE1 || IE2);
45215f7ddb14SDimitry Andric return false;
45225f7ddb14SDimitry Andric });
45235f7ddb14SDimitry Andric int VecId = -1;
45245f7ddb14SDimitry Andric if (It == FirstUsers.end()) {
45255f7ddb14SDimitry Andric VF.push_back(FTy->getNumElements());
45265f7ddb14SDimitry Andric ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
45275f7ddb14SDimitry Andric FirstUsers.push_back(EU.User);
45285f7ddb14SDimitry Andric DemandedElts.push_back(APInt::getNullValue(VF.back()));
45295f7ddb14SDimitry Andric VecId = FirstUsers.size() - 1;
45305f7ddb14SDimitry Andric } else {
45315f7ddb14SDimitry Andric VecId = std::distance(FirstUsers.begin(), It);
45325f7ddb14SDimitry Andric }
45335f7ddb14SDimitry Andric int Idx = *InsertIdx;
45345f7ddb14SDimitry Andric ShuffleMask[VecId][Idx] = EU.Lane;
45355f7ddb14SDimitry Andric DemandedElts[VecId].setBit(Idx);
45365f7ddb14SDimitry Andric }
45375f7ddb14SDimitry Andric }
45385f7ddb14SDimitry Andric
45390b57cec5SDimitry Andric // If we plan to rewrite the tree in a smaller type, we will need to sign
45400b57cec5SDimitry Andric // extend the extracted value back to the original type. Here, we account
45410b57cec5SDimitry Andric // for the extract and the added cost of the sign extend if needed.
45425ffd83dbSDimitry Andric auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
45430b57cec5SDimitry Andric auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
45440b57cec5SDimitry Andric if (MinBWs.count(ScalarRoot)) {
45450b57cec5SDimitry Andric auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
45460b57cec5SDimitry Andric auto Extend =
45470b57cec5SDimitry Andric MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt;
45485ffd83dbSDimitry Andric VecTy = FixedVectorType::get(MinTy, BundleWidth);
45490b57cec5SDimitry Andric ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
45500b57cec5SDimitry Andric VecTy, EU.Lane);
45510b57cec5SDimitry Andric } else {
45520b57cec5SDimitry Andric ExtractCost +=
45530b57cec5SDimitry Andric TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
45540b57cec5SDimitry Andric }
45550b57cec5SDimitry Andric }
45560b57cec5SDimitry Andric
4557af732203SDimitry Andric InstructionCost SpillCost = getSpillCost();
45580b57cec5SDimitry Andric Cost += SpillCost + ExtractCost;
45595f7ddb14SDimitry Andric for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
45605f7ddb14SDimitry Andric // For the very first element - simple shuffle of the source vector.
45615f7ddb14SDimitry Andric int Limit = ShuffleMask[I].size() * 2;
45625f7ddb14SDimitry Andric if (I == 0 &&
45635f7ddb14SDimitry Andric all_of(ShuffleMask[I], [Limit](int Idx) { return Idx < Limit; }) &&
45645f7ddb14SDimitry Andric !ShuffleVectorInst::isIdentityMask(ShuffleMask[I])) {
45655f7ddb14SDimitry Andric InstructionCost C = TTI->getShuffleCost(
45665f7ddb14SDimitry Andric TTI::SK_PermuteSingleSrc,
45675f7ddb14SDimitry Andric cast<FixedVectorType>(FirstUsers[I]->getType()), ShuffleMask[I]);
45685f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
45695f7ddb14SDimitry Andric << " for final shuffle of insertelement external users "
45705f7ddb14SDimitry Andric << *VectorizableTree.front()->Scalars.front() << ".\n"
45715f7ddb14SDimitry Andric << "SLP: Current total cost = " << Cost << "\n");
45725f7ddb14SDimitry Andric Cost += C;
45735f7ddb14SDimitry Andric continue;
45745f7ddb14SDimitry Andric }
45755f7ddb14SDimitry Andric // Other elements - permutation of 2 vectors (the initial one and the next
45765f7ddb14SDimitry Andric // Ith incoming vector).
45775f7ddb14SDimitry Andric unsigned VF = ShuffleMask[I].size();
45785f7ddb14SDimitry Andric for (unsigned Idx = 0; Idx < VF; ++Idx) {
45795f7ddb14SDimitry Andric int &Mask = ShuffleMask[I][Idx];
45805f7ddb14SDimitry Andric Mask = Mask == UndefMaskElem ? Idx : VF + Mask;
45815f7ddb14SDimitry Andric }
45825f7ddb14SDimitry Andric InstructionCost C = TTI->getShuffleCost(
45835f7ddb14SDimitry Andric TTI::SK_PermuteTwoSrc, cast<FixedVectorType>(FirstUsers[I]->getType()),
45845f7ddb14SDimitry Andric ShuffleMask[I]);
45855f7ddb14SDimitry Andric LLVM_DEBUG(
45865f7ddb14SDimitry Andric dbgs()
45875f7ddb14SDimitry Andric << "SLP: Adding cost " << C
45885f7ddb14SDimitry Andric << " for final shuffle of vector node and external insertelement users "
45895f7ddb14SDimitry Andric << *VectorizableTree.front()->Scalars.front() << ".\n"
45905f7ddb14SDimitry Andric << "SLP: Current total cost = " << Cost << "\n");
45915f7ddb14SDimitry Andric Cost += C;
45925f7ddb14SDimitry Andric InstructionCost InsertCost = TTI->getScalarizationOverhead(
45935f7ddb14SDimitry Andric cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
45945f7ddb14SDimitry Andric /*Insert*/ true,
45955f7ddb14SDimitry Andric /*Extract*/ false);
45965f7ddb14SDimitry Andric Cost -= InsertCost;
45975f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
45985f7ddb14SDimitry Andric << " for insertelements gather.\n"
45995f7ddb14SDimitry Andric << "SLP: Current total cost = " << Cost << "\n");
46005f7ddb14SDimitry Andric }
46010b57cec5SDimitry Andric
4602af732203SDimitry Andric #ifndef NDEBUG
4603af732203SDimitry Andric SmallString<256> Str;
46040b57cec5SDimitry Andric {
4605af732203SDimitry Andric raw_svector_ostream OS(Str);
46060b57cec5SDimitry Andric OS << "SLP: Spill Cost = " << SpillCost << ".\n"
46070b57cec5SDimitry Andric << "SLP: Extract Cost = " << ExtractCost << ".\n"
46080b57cec5SDimitry Andric << "SLP: Total Cost = " << Cost << ".\n";
46090b57cec5SDimitry Andric }
46100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << Str);
46110b57cec5SDimitry Andric if (ViewSLPTree)
46120b57cec5SDimitry Andric ViewGraph(this, "SLP" + F->getName(), false, Str);
4613af732203SDimitry Andric #endif
46140b57cec5SDimitry Andric
46150b57cec5SDimitry Andric return Cost;
46160b57cec5SDimitry Andric }
46170b57cec5SDimitry Andric
46185f7ddb14SDimitry Andric Optional<TargetTransformInfo::ShuffleKind>
isGatherShuffledEntry(const TreeEntry * TE,SmallVectorImpl<int> & Mask,SmallVectorImpl<const TreeEntry * > & Entries)46195f7ddb14SDimitry Andric BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
46205f7ddb14SDimitry Andric SmallVectorImpl<const TreeEntry *> &Entries) {
46215f7ddb14SDimitry Andric // TODO: currently checking only for Scalars in the tree entry, need to count
46225f7ddb14SDimitry Andric // reused elements too for better cost estimation.
46235f7ddb14SDimitry Andric Mask.assign(TE->Scalars.size(), UndefMaskElem);
46245f7ddb14SDimitry Andric Entries.clear();
46255f7ddb14SDimitry Andric // Build a lists of values to tree entries.
46265f7ddb14SDimitry Andric DenseMap<Value *, SmallPtrSet<const TreeEntry *, 4>> ValueToTEs;
46275f7ddb14SDimitry Andric for (const std::unique_ptr<TreeEntry> &EntryPtr : VectorizableTree) {
46285f7ddb14SDimitry Andric if (EntryPtr.get() == TE)
46295f7ddb14SDimitry Andric break;
46305f7ddb14SDimitry Andric if (EntryPtr->State != TreeEntry::NeedToGather)
46315f7ddb14SDimitry Andric continue;
46325f7ddb14SDimitry Andric for (Value *V : EntryPtr->Scalars)
46335f7ddb14SDimitry Andric ValueToTEs.try_emplace(V).first->getSecond().insert(EntryPtr.get());
46345f7ddb14SDimitry Andric }
46355f7ddb14SDimitry Andric // Find all tree entries used by the gathered values. If no common entries
46365f7ddb14SDimitry Andric // found - not a shuffle.
46375f7ddb14SDimitry Andric // Here we build a set of tree nodes for each gathered value and trying to
46385f7ddb14SDimitry Andric // find the intersection between these sets. If we have at least one common
46395f7ddb14SDimitry Andric // tree node for each gathered value - we have just a permutation of the
46405f7ddb14SDimitry Andric // single vector. If we have 2 different sets, we're in situation where we
46415f7ddb14SDimitry Andric // have a permutation of 2 input vectors.
46425f7ddb14SDimitry Andric SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
46435f7ddb14SDimitry Andric DenseMap<Value *, int> UsedValuesEntry;
46445f7ddb14SDimitry Andric for (Value *V : TE->Scalars) {
46455f7ddb14SDimitry Andric if (isa<UndefValue>(V))
46465f7ddb14SDimitry Andric continue;
46475f7ddb14SDimitry Andric // Build a list of tree entries where V is used.
46485f7ddb14SDimitry Andric SmallPtrSet<const TreeEntry *, 4> VToTEs;
46495f7ddb14SDimitry Andric auto It = ValueToTEs.find(V);
46505f7ddb14SDimitry Andric if (It != ValueToTEs.end())
46515f7ddb14SDimitry Andric VToTEs = It->second;
46525f7ddb14SDimitry Andric if (const TreeEntry *VTE = getTreeEntry(V))
46535f7ddb14SDimitry Andric VToTEs.insert(VTE);
46545f7ddb14SDimitry Andric if (VToTEs.empty())
46555f7ddb14SDimitry Andric return None;
46565f7ddb14SDimitry Andric if (UsedTEs.empty()) {
46575f7ddb14SDimitry Andric // The first iteration, just insert the list of nodes to vector.
46585f7ddb14SDimitry Andric UsedTEs.push_back(VToTEs);
46595f7ddb14SDimitry Andric } else {
46605f7ddb14SDimitry Andric // Need to check if there are any previously used tree nodes which use V.
46615f7ddb14SDimitry Andric // If there are no such nodes, consider that we have another one input
46625f7ddb14SDimitry Andric // vector.
46635f7ddb14SDimitry Andric SmallPtrSet<const TreeEntry *, 4> SavedVToTEs(VToTEs);
46645f7ddb14SDimitry Andric unsigned Idx = 0;
46655f7ddb14SDimitry Andric for (SmallPtrSet<const TreeEntry *, 4> &Set : UsedTEs) {
46665f7ddb14SDimitry Andric // Do we have a non-empty intersection of previously listed tree entries
46675f7ddb14SDimitry Andric // and tree entries using current V?
46685f7ddb14SDimitry Andric set_intersect(VToTEs, Set);
46695f7ddb14SDimitry Andric if (!VToTEs.empty()) {
46705f7ddb14SDimitry Andric // Yes, write the new subset and continue analysis for the next
46715f7ddb14SDimitry Andric // scalar.
46725f7ddb14SDimitry Andric Set.swap(VToTEs);
46735f7ddb14SDimitry Andric break;
46745f7ddb14SDimitry Andric }
46755f7ddb14SDimitry Andric VToTEs = SavedVToTEs;
46765f7ddb14SDimitry Andric ++Idx;
46775f7ddb14SDimitry Andric }
46785f7ddb14SDimitry Andric // No non-empty intersection found - need to add a second set of possible
46795f7ddb14SDimitry Andric // source vectors.
46805f7ddb14SDimitry Andric if (Idx == UsedTEs.size()) {
46815f7ddb14SDimitry Andric // If the number of input vectors is greater than 2 - not a permutation,
46825f7ddb14SDimitry Andric // fallback to the regular gather.
46835f7ddb14SDimitry Andric if (UsedTEs.size() == 2)
46845f7ddb14SDimitry Andric return None;
46855f7ddb14SDimitry Andric UsedTEs.push_back(SavedVToTEs);
46865f7ddb14SDimitry Andric Idx = UsedTEs.size() - 1;
46875f7ddb14SDimitry Andric }
46885f7ddb14SDimitry Andric UsedValuesEntry.try_emplace(V, Idx);
46895f7ddb14SDimitry Andric }
46905f7ddb14SDimitry Andric }
46915f7ddb14SDimitry Andric
46925f7ddb14SDimitry Andric unsigned VF = 0;
46935f7ddb14SDimitry Andric if (UsedTEs.size() == 1) {
46945f7ddb14SDimitry Andric // Try to find the perfect match in another gather node at first.
46955f7ddb14SDimitry Andric auto It = find_if(UsedTEs.front(), [TE](const TreeEntry *EntryPtr) {
46965f7ddb14SDimitry Andric return EntryPtr->isSame(TE->Scalars);
46975f7ddb14SDimitry Andric });
46985f7ddb14SDimitry Andric if (It != UsedTEs.front().end()) {
46995f7ddb14SDimitry Andric Entries.push_back(*It);
47005f7ddb14SDimitry Andric std::iota(Mask.begin(), Mask.end(), 0);
47015f7ddb14SDimitry Andric return TargetTransformInfo::SK_PermuteSingleSrc;
47025f7ddb14SDimitry Andric }
47035f7ddb14SDimitry Andric // No perfect match, just shuffle, so choose the first tree node.
47045f7ddb14SDimitry Andric Entries.push_back(*UsedTEs.front().begin());
47055f7ddb14SDimitry Andric } else {
47065f7ddb14SDimitry Andric // Try to find nodes with the same vector factor.
47075f7ddb14SDimitry Andric assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
47085f7ddb14SDimitry Andric // FIXME: Shall be replaced by GetVF function once non-power-2 patch is
47095f7ddb14SDimitry Andric // landed.
47105f7ddb14SDimitry Andric auto &&GetVF = [](const TreeEntry *TE) {
47115f7ddb14SDimitry Andric if (!TE->ReuseShuffleIndices.empty())
47125f7ddb14SDimitry Andric return TE->ReuseShuffleIndices.size();
47135f7ddb14SDimitry Andric return TE->Scalars.size();
47145f7ddb14SDimitry Andric };
47155f7ddb14SDimitry Andric DenseMap<int, const TreeEntry *> VFToTE;
47165f7ddb14SDimitry Andric for (const TreeEntry *TE : UsedTEs.front())
47175f7ddb14SDimitry Andric VFToTE.try_emplace(GetVF(TE), TE);
47185f7ddb14SDimitry Andric for (const TreeEntry *TE : UsedTEs.back()) {
47195f7ddb14SDimitry Andric auto It = VFToTE.find(GetVF(TE));
47205f7ddb14SDimitry Andric if (It != VFToTE.end()) {
47215f7ddb14SDimitry Andric VF = It->first;
47225f7ddb14SDimitry Andric Entries.push_back(It->second);
47235f7ddb14SDimitry Andric Entries.push_back(TE);
47245f7ddb14SDimitry Andric break;
47255f7ddb14SDimitry Andric }
47265f7ddb14SDimitry Andric }
47275f7ddb14SDimitry Andric // No 2 source vectors with the same vector factor - give up and do regular
47285f7ddb14SDimitry Andric // gather.
47295f7ddb14SDimitry Andric if (Entries.empty())
47305f7ddb14SDimitry Andric return None;
47315f7ddb14SDimitry Andric }
47325f7ddb14SDimitry Andric
47335f7ddb14SDimitry Andric // Build a shuffle mask for better cost estimation and vector emission.
47345f7ddb14SDimitry Andric for (int I = 0, E = TE->Scalars.size(); I < E; ++I) {
47355f7ddb14SDimitry Andric Value *V = TE->Scalars[I];
47365f7ddb14SDimitry Andric if (isa<UndefValue>(V))
47375f7ddb14SDimitry Andric continue;
47385f7ddb14SDimitry Andric unsigned Idx = UsedValuesEntry.lookup(V);
47395f7ddb14SDimitry Andric const TreeEntry *VTE = Entries[Idx];
47405f7ddb14SDimitry Andric int FoundLane = VTE->findLaneForValue(V);
47415f7ddb14SDimitry Andric Mask[I] = Idx * VF + FoundLane;
47425f7ddb14SDimitry Andric // Extra check required by isSingleSourceMaskImpl function (called by
47435f7ddb14SDimitry Andric // ShuffleVectorInst::isSingleSourceMask).
47445f7ddb14SDimitry Andric if (Mask[I] >= 2 * E)
47455f7ddb14SDimitry Andric return None;
47465f7ddb14SDimitry Andric }
47475f7ddb14SDimitry Andric switch (Entries.size()) {
47485f7ddb14SDimitry Andric case 1:
47495f7ddb14SDimitry Andric return TargetTransformInfo::SK_PermuteSingleSrc;
47505f7ddb14SDimitry Andric case 2:
47515f7ddb14SDimitry Andric return TargetTransformInfo::SK_PermuteTwoSrc;
47525f7ddb14SDimitry Andric default:
47535f7ddb14SDimitry Andric break;
47545f7ddb14SDimitry Andric }
47555f7ddb14SDimitry Andric return None;
47565f7ddb14SDimitry Andric }
47575f7ddb14SDimitry Andric
4758af732203SDimitry Andric InstructionCost
getGatherCost(FixedVectorType * Ty,const DenseSet<unsigned> & ShuffledIndices) const4759af732203SDimitry Andric BoUpSLP::getGatherCost(FixedVectorType *Ty,
47600b57cec5SDimitry Andric const DenseSet<unsigned> &ShuffledIndices) const {
47615ffd83dbSDimitry Andric unsigned NumElts = Ty->getNumElements();
47625ffd83dbSDimitry Andric APInt DemandedElts = APInt::getNullValue(NumElts);
4763af732203SDimitry Andric for (unsigned I = 0; I < NumElts; ++I)
4764af732203SDimitry Andric if (!ShuffledIndices.count(I))
4765af732203SDimitry Andric DemandedElts.setBit(I);
4766af732203SDimitry Andric InstructionCost Cost =
4767af732203SDimitry Andric TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
47685ffd83dbSDimitry Andric /*Extract*/ false);
47690b57cec5SDimitry Andric if (!ShuffledIndices.empty())
47700b57cec5SDimitry Andric Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
47710b57cec5SDimitry Andric return Cost;
47720b57cec5SDimitry Andric }
47730b57cec5SDimitry Andric
getGatherCost(ArrayRef<Value * > VL) const4774af732203SDimitry Andric InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
47750b57cec5SDimitry Andric // Find the type of the operands in VL.
47760b57cec5SDimitry Andric Type *ScalarTy = VL[0]->getType();
47770b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
47780b57cec5SDimitry Andric ScalarTy = SI->getValueOperand()->getType();
47795ffd83dbSDimitry Andric auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
47800b57cec5SDimitry Andric // Find the cost of inserting/extracting values from the vector.
47810b57cec5SDimitry Andric // Check if the same elements are inserted several times and count them as
47820b57cec5SDimitry Andric // shuffle candidates.
47830b57cec5SDimitry Andric DenseSet<unsigned> ShuffledElements;
47840b57cec5SDimitry Andric DenseSet<Value *> UniqueElements;
47850b57cec5SDimitry Andric // Iterate in reverse order to consider insert elements with the high cost.
47860b57cec5SDimitry Andric for (unsigned I = VL.size(); I > 0; --I) {
47870b57cec5SDimitry Andric unsigned Idx = I - 1;
47885f7ddb14SDimitry Andric if (isConstant(VL[Idx]))
47895f7ddb14SDimitry Andric continue;
47900b57cec5SDimitry Andric if (!UniqueElements.insert(VL[Idx]).second)
47910b57cec5SDimitry Andric ShuffledElements.insert(Idx);
47920b57cec5SDimitry Andric }
47930b57cec5SDimitry Andric return getGatherCost(VecTy, ShuffledElements);
47940b57cec5SDimitry Andric }
47950b57cec5SDimitry Andric
47960b57cec5SDimitry Andric // Perform operand reordering on the instructions in VL and return the reordered
47970b57cec5SDimitry Andric // operands in Left and Right.
reorderInputsAccordingToOpcode(ArrayRef<Value * > VL,SmallVectorImpl<Value * > & Left,SmallVectorImpl<Value * > & Right,const DataLayout & DL,ScalarEvolution & SE,const BoUpSLP & R)4798480093f4SDimitry Andric void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
4799480093f4SDimitry Andric SmallVectorImpl<Value *> &Left,
4800480093f4SDimitry Andric SmallVectorImpl<Value *> &Right,
4801480093f4SDimitry Andric const DataLayout &DL,
4802480093f4SDimitry Andric ScalarEvolution &SE,
4803480093f4SDimitry Andric const BoUpSLP &R) {
48040b57cec5SDimitry Andric if (VL.empty())
48050b57cec5SDimitry Andric return;
4806480093f4SDimitry Andric VLOperands Ops(VL, DL, SE, R);
48070b57cec5SDimitry Andric // Reorder the operands in place.
48080b57cec5SDimitry Andric Ops.reorder();
48090b57cec5SDimitry Andric Left = Ops.getVL(0);
48100b57cec5SDimitry Andric Right = Ops.getVL(1);
48110b57cec5SDimitry Andric }
48120b57cec5SDimitry Andric
setInsertPointAfterBundle(const TreeEntry * E)48135f7ddb14SDimitry Andric void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
48140b57cec5SDimitry Andric // Get the basic block this bundle is in. All instructions in the bundle
48150b57cec5SDimitry Andric // should be in this block.
48168bcb0991SDimitry Andric auto *Front = E->getMainOp();
48170b57cec5SDimitry Andric auto *BB = Front->getParent();
4818af732203SDimitry Andric assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
48190b57cec5SDimitry Andric auto *I = cast<Instruction>(V);
48208bcb0991SDimitry Andric return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
48210b57cec5SDimitry Andric }));
48220b57cec5SDimitry Andric
48230b57cec5SDimitry Andric // The last instruction in the bundle in program order.
48240b57cec5SDimitry Andric Instruction *LastInst = nullptr;
48250b57cec5SDimitry Andric
48260b57cec5SDimitry Andric // Find the last instruction. The common case should be that BB has been
48270b57cec5SDimitry Andric // scheduled, and the last instruction is VL.back(). So we start with
48280b57cec5SDimitry Andric // VL.back() and iterate over schedule data until we reach the end of the
48290b57cec5SDimitry Andric // bundle. The end of the bundle is marked by null ScheduleData.
48300b57cec5SDimitry Andric if (BlocksSchedules.count(BB)) {
48310b57cec5SDimitry Andric auto *Bundle =
48328bcb0991SDimitry Andric BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
48330b57cec5SDimitry Andric if (Bundle && Bundle->isPartOfBundle())
48340b57cec5SDimitry Andric for (; Bundle; Bundle = Bundle->NextInBundle)
48350b57cec5SDimitry Andric if (Bundle->OpValue == Bundle->Inst)
48360b57cec5SDimitry Andric LastInst = Bundle->Inst;
48370b57cec5SDimitry Andric }
48380b57cec5SDimitry Andric
48390b57cec5SDimitry Andric // LastInst can still be null at this point if there's either not an entry
48400b57cec5SDimitry Andric // for BB in BlocksSchedules or there's no ScheduleData available for
48410b57cec5SDimitry Andric // VL.back(). This can be the case if buildTree_rec aborts for various
48420b57cec5SDimitry Andric // reasons (e.g., the maximum recursion depth is reached, the maximum region
48430b57cec5SDimitry Andric // size is reached, etc.). ScheduleData is initialized in the scheduling
48440b57cec5SDimitry Andric // "dry-run".
48450b57cec5SDimitry Andric //
48460b57cec5SDimitry Andric // If this happens, we can still find the last instruction by brute force. We
48470b57cec5SDimitry Andric // iterate forwards from Front (inclusive) until we either see all
48480b57cec5SDimitry Andric // instructions in the bundle or reach the end of the block. If Front is the
48490b57cec5SDimitry Andric // last instruction in program order, LastInst will be set to Front, and we
48500b57cec5SDimitry Andric // will visit all the remaining instructions in the block.
48510b57cec5SDimitry Andric //
48520b57cec5SDimitry Andric // One of the reasons we exit early from buildTree_rec is to place an upper
48530b57cec5SDimitry Andric // bound on compile-time. Thus, taking an additional compile-time hit here is
48540b57cec5SDimitry Andric // not ideal. However, this should be exceedingly rare since it requires that
48550b57cec5SDimitry Andric // we both exit early from buildTree_rec and that the bundle be out-of-order
48560b57cec5SDimitry Andric // (causing us to iterate all the way to the end of the block).
48570b57cec5SDimitry Andric if (!LastInst) {
48588bcb0991SDimitry Andric SmallPtrSet<Value *, 16> Bundle(E->Scalars.begin(), E->Scalars.end());
48590b57cec5SDimitry Andric for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
48608bcb0991SDimitry Andric if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I))
48610b57cec5SDimitry Andric LastInst = &I;
48620b57cec5SDimitry Andric if (Bundle.empty())
48630b57cec5SDimitry Andric break;
48640b57cec5SDimitry Andric }
48650b57cec5SDimitry Andric }
48668bcb0991SDimitry Andric assert(LastInst && "Failed to find last instruction in bundle");
48670b57cec5SDimitry Andric
48680b57cec5SDimitry Andric // Set the insertion point after the last instruction in the bundle. Set the
48690b57cec5SDimitry Andric // debug location to Front.
48700b57cec5SDimitry Andric Builder.SetInsertPoint(BB, ++LastInst->getIterator());
48710b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(Front->getDebugLoc());
48720b57cec5SDimitry Andric }
48730b57cec5SDimitry Andric
gather(ArrayRef<Value * > VL)4874af732203SDimitry Andric Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
48755f7ddb14SDimitry Andric // List of instructions/lanes from current block and/or the blocks which are
48765f7ddb14SDimitry Andric // part of the current loop. These instructions will be inserted at the end to
48775f7ddb14SDimitry Andric // make it possible to optimize loops and hoist invariant instructions out of
48785f7ddb14SDimitry Andric // the loops body with better chances for success.
48795f7ddb14SDimitry Andric SmallVector<std::pair<Value *, unsigned>, 4> PostponedInsts;
48805f7ddb14SDimitry Andric SmallSet<int, 4> PostponedIndices;
48815f7ddb14SDimitry Andric Loop *L = LI->getLoopFor(Builder.GetInsertBlock());
48825f7ddb14SDimitry Andric auto &&CheckPredecessor = [](BasicBlock *InstBB, BasicBlock *InsertBB) {
48835f7ddb14SDimitry Andric SmallPtrSet<BasicBlock *, 4> Visited;
48845f7ddb14SDimitry Andric while (InsertBB && InsertBB != InstBB && Visited.insert(InsertBB).second)
48855f7ddb14SDimitry Andric InsertBB = InsertBB->getSinglePredecessor();
48865f7ddb14SDimitry Andric return InsertBB && InsertBB == InstBB;
48875f7ddb14SDimitry Andric };
48885f7ddb14SDimitry Andric for (int I = 0, E = VL.size(); I < E; ++I) {
48895f7ddb14SDimitry Andric if (auto *Inst = dyn_cast<Instruction>(VL[I]))
48905f7ddb14SDimitry Andric if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
48915f7ddb14SDimitry Andric getTreeEntry(Inst) || (L && (L->contains(Inst)))) &&
48925f7ddb14SDimitry Andric PostponedIndices.insert(I).second)
48935f7ddb14SDimitry Andric PostponedInsts.emplace_back(Inst, I);
48945f7ddb14SDimitry Andric }
48955f7ddb14SDimitry Andric
48965f7ddb14SDimitry Andric auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) {
48975f7ddb14SDimitry Andric Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos));
48985f7ddb14SDimitry Andric auto *InsElt = dyn_cast<InsertElementInst>(Vec);
48995f7ddb14SDimitry Andric if (!InsElt)
49005f7ddb14SDimitry Andric return Vec;
49015f7ddb14SDimitry Andric GatherSeq.insert(InsElt);
49025f7ddb14SDimitry Andric CSEBlocks.insert(InsElt->getParent());
49035f7ddb14SDimitry Andric // Add to our 'need-to-extract' list.
49045f7ddb14SDimitry Andric if (TreeEntry *Entry = getTreeEntry(V)) {
49055f7ddb14SDimitry Andric // Find which lane we need to extract.
49065f7ddb14SDimitry Andric unsigned FoundLane = Entry->findLaneForValue(V);
49075f7ddb14SDimitry Andric ExternalUses.emplace_back(V, InsElt, FoundLane);
49085f7ddb14SDimitry Andric }
49095f7ddb14SDimitry Andric return Vec;
49105f7ddb14SDimitry Andric };
4911af732203SDimitry Andric Value *Val0 =
4912af732203SDimitry Andric isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
4913af732203SDimitry Andric FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
4914af732203SDimitry Andric Value *Vec = PoisonValue::get(VecTy);
49155f7ddb14SDimitry Andric SmallVector<int> NonConsts;
49165f7ddb14SDimitry Andric // Insert constant values at first.
49175f7ddb14SDimitry Andric for (int I = 0, E = VL.size(); I < E; ++I) {
49185f7ddb14SDimitry Andric if (PostponedIndices.contains(I))
4919af732203SDimitry Andric continue;
49205f7ddb14SDimitry Andric if (!isConstant(VL[I])) {
49215f7ddb14SDimitry Andric NonConsts.push_back(I);
49225f7ddb14SDimitry Andric continue;
49230b57cec5SDimitry Andric }
49245f7ddb14SDimitry Andric Vec = CreateInsertElement(Vec, VL[I], I);
49250b57cec5SDimitry Andric }
49265f7ddb14SDimitry Andric // Insert non-constant values.
49275f7ddb14SDimitry Andric for (int I : NonConsts)
49285f7ddb14SDimitry Andric Vec = CreateInsertElement(Vec, VL[I], I);
49295f7ddb14SDimitry Andric // Append instructions, which are/may be part of the loop, in the end to make
49305f7ddb14SDimitry Andric // it possible to hoist non-loop-based instructions.
49315f7ddb14SDimitry Andric for (const std::pair<Value *, unsigned> &Pair : PostponedInsts)
49325f7ddb14SDimitry Andric Vec = CreateInsertElement(Vec, Pair.first, Pair.second);
49330b57cec5SDimitry Andric
49340b57cec5SDimitry Andric return Vec;
49350b57cec5SDimitry Andric }
49360b57cec5SDimitry Andric
49375f7ddb14SDimitry Andric namespace {
49385f7ddb14SDimitry Andric /// Merges shuffle masks and emits final shuffle instruction, if required.
49395f7ddb14SDimitry Andric class ShuffleInstructionBuilder {
49405f7ddb14SDimitry Andric IRBuilderBase &Builder;
49415f7ddb14SDimitry Andric const unsigned VF = 0;
49425f7ddb14SDimitry Andric bool IsFinalized = false;
49435f7ddb14SDimitry Andric SmallVector<int, 4> Mask;
49445f7ddb14SDimitry Andric
49455f7ddb14SDimitry Andric public:
ShuffleInstructionBuilder(IRBuilderBase & Builder,unsigned VF)49465f7ddb14SDimitry Andric ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF)
49475f7ddb14SDimitry Andric : Builder(Builder), VF(VF) {}
49485f7ddb14SDimitry Andric
49495f7ddb14SDimitry Andric /// Adds a mask, inverting it before applying.
addInversedMask(ArrayRef<unsigned> SubMask)49505f7ddb14SDimitry Andric void addInversedMask(ArrayRef<unsigned> SubMask) {
49515f7ddb14SDimitry Andric if (SubMask.empty())
49525f7ddb14SDimitry Andric return;
49535f7ddb14SDimitry Andric SmallVector<int, 4> NewMask;
49545f7ddb14SDimitry Andric inversePermutation(SubMask, NewMask);
49555f7ddb14SDimitry Andric addMask(NewMask);
49565f7ddb14SDimitry Andric }
49575f7ddb14SDimitry Andric
49585f7ddb14SDimitry Andric /// Functions adds masks, merging them into single one.
addMask(ArrayRef<unsigned> SubMask)49595f7ddb14SDimitry Andric void addMask(ArrayRef<unsigned> SubMask) {
49605f7ddb14SDimitry Andric SmallVector<int, 4> NewMask(SubMask.begin(), SubMask.end());
49615f7ddb14SDimitry Andric addMask(NewMask);
49625f7ddb14SDimitry Andric }
49635f7ddb14SDimitry Andric
addMask(ArrayRef<int> SubMask)49645f7ddb14SDimitry Andric void addMask(ArrayRef<int> SubMask) { ::addMask(Mask, SubMask); }
49655f7ddb14SDimitry Andric
finalize(Value * V)49665f7ddb14SDimitry Andric Value *finalize(Value *V) {
49675f7ddb14SDimitry Andric IsFinalized = true;
49685f7ddb14SDimitry Andric unsigned ValueVF = cast<FixedVectorType>(V->getType())->getNumElements();
49695f7ddb14SDimitry Andric if (VF == ValueVF && Mask.empty())
49705f7ddb14SDimitry Andric return V;
49715f7ddb14SDimitry Andric SmallVector<int, 4> NormalizedMask(VF, UndefMaskElem);
49725f7ddb14SDimitry Andric std::iota(NormalizedMask.begin(), NormalizedMask.end(), 0);
49735f7ddb14SDimitry Andric addMask(NormalizedMask);
49745f7ddb14SDimitry Andric
49755f7ddb14SDimitry Andric if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask))
49765f7ddb14SDimitry Andric return V;
49775f7ddb14SDimitry Andric return Builder.CreateShuffleVector(V, Mask, "shuffle");
49785f7ddb14SDimitry Andric }
49795f7ddb14SDimitry Andric
~ShuffleInstructionBuilder()49805f7ddb14SDimitry Andric ~ShuffleInstructionBuilder() {
49815f7ddb14SDimitry Andric assert((IsFinalized || Mask.empty()) &&
49825f7ddb14SDimitry Andric "Shuffle construction must be finalized.");
49835f7ddb14SDimitry Andric }
49845f7ddb14SDimitry Andric };
49855f7ddb14SDimitry Andric } // namespace
49865f7ddb14SDimitry Andric
vectorizeTree(ArrayRef<Value * > VL)49870b57cec5SDimitry Andric Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
49885f7ddb14SDimitry Andric unsigned VF = VL.size();
49890b57cec5SDimitry Andric InstructionsState S = getSameOpcode(VL);
49900b57cec5SDimitry Andric if (S.getOpcode()) {
49915f7ddb14SDimitry Andric if (TreeEntry *E = getTreeEntry(S.OpValue))
49920b57cec5SDimitry Andric if (E->isSame(VL)) {
49930b57cec5SDimitry Andric Value *V = vectorizeTree(E);
49945f7ddb14SDimitry Andric if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
49955f7ddb14SDimitry Andric if (!E->ReuseShuffleIndices.empty()) {
49960b57cec5SDimitry Andric // Reshuffle to get only unique values.
49975f7ddb14SDimitry Andric // If some of the scalars are duplicated in the vectorization tree
49985f7ddb14SDimitry Andric // entry, we do not vectorize them but instead generate a mask for
49995f7ddb14SDimitry Andric // the reuses. But if there are several users of the same entry,
50005f7ddb14SDimitry Andric // they may have different vectorization factors. This is especially
50015f7ddb14SDimitry Andric // important for PHI nodes. In this case, we need to adapt the
50025f7ddb14SDimitry Andric // resulting instruction for the user vectorization factor and have
50035f7ddb14SDimitry Andric // to reshuffle it again to take only unique elements of the vector.
50045f7ddb14SDimitry Andric // Without this code the function incorrectly returns reduced vector
50055f7ddb14SDimitry Andric // instruction with the same elements, not with the unique ones.
50065f7ddb14SDimitry Andric
50075f7ddb14SDimitry Andric // block:
50085f7ddb14SDimitry Andric // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
50095f7ddb14SDimitry Andric // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
50105f7ddb14SDimitry Andric // ... (use %2)
50115f7ddb14SDimitry Andric // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
50125f7ddb14SDimitry Andric // br %block
50135f7ddb14SDimitry Andric SmallVector<int> UniqueIdxs;
50145ffd83dbSDimitry Andric SmallSet<int, 4> UsedIdxs;
50155f7ddb14SDimitry Andric int Pos = 0;
50165f7ddb14SDimitry Andric int Sz = VL.size();
50175f7ddb14SDimitry Andric for (int Idx : E->ReuseShuffleIndices) {
50185f7ddb14SDimitry Andric if (Idx != Sz && UsedIdxs.insert(Idx).second)
50195f7ddb14SDimitry Andric UniqueIdxs.emplace_back(Pos);
50205f7ddb14SDimitry Andric ++Pos;
50215f7ddb14SDimitry Andric }
50225f7ddb14SDimitry Andric assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
50235f7ddb14SDimitry Andric "less than original vector size.");
50245f7ddb14SDimitry Andric UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
50255f7ddb14SDimitry Andric V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
50265f7ddb14SDimitry Andric } else {
50275f7ddb14SDimitry Andric assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
50285f7ddb14SDimitry Andric "Expected vectorization factor less "
50295f7ddb14SDimitry Andric "than original vector size.");
50305f7ddb14SDimitry Andric SmallVector<int> UniformMask(VF, 0);
50315f7ddb14SDimitry Andric std::iota(UniformMask.begin(), UniformMask.end(), 0);
50325f7ddb14SDimitry Andric V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
50330b57cec5SDimitry Andric }
50340b57cec5SDimitry Andric }
50350b57cec5SDimitry Andric return V;
50360b57cec5SDimitry Andric }
50370b57cec5SDimitry Andric }
50380b57cec5SDimitry Andric
50390b57cec5SDimitry Andric // Check that every instruction appears once in this bundle.
50405f7ddb14SDimitry Andric SmallVector<int> ReuseShuffleIndicies;
50415f7ddb14SDimitry Andric SmallVector<Value *> UniqueValues;
50420b57cec5SDimitry Andric if (VL.size() > 2) {
50430b57cec5SDimitry Andric DenseMap<Value *, unsigned> UniquePositions;
50445f7ddb14SDimitry Andric unsigned NumValues =
50455f7ddb14SDimitry Andric std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) {
50465f7ddb14SDimitry Andric return !isa<UndefValue>(V);
50475f7ddb14SDimitry Andric }).base());
50485f7ddb14SDimitry Andric VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
50495f7ddb14SDimitry Andric int UniqueVals = 0;
50505f7ddb14SDimitry Andric bool HasUndefs = false;
50515f7ddb14SDimitry Andric for (Value *V : VL.drop_back(VL.size() - VF)) {
50525f7ddb14SDimitry Andric if (isa<UndefValue>(V)) {
50535f7ddb14SDimitry Andric ReuseShuffleIndicies.emplace_back(UndefMaskElem);
50545f7ddb14SDimitry Andric HasUndefs = true;
50555f7ddb14SDimitry Andric continue;
50565f7ddb14SDimitry Andric }
50575f7ddb14SDimitry Andric if (isConstant(V)) {
50585f7ddb14SDimitry Andric ReuseShuffleIndicies.emplace_back(UniqueValues.size());
50595f7ddb14SDimitry Andric UniqueValues.emplace_back(V);
50605f7ddb14SDimitry Andric continue;
50615f7ddb14SDimitry Andric }
50620b57cec5SDimitry Andric auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
50630b57cec5SDimitry Andric ReuseShuffleIndicies.emplace_back(Res.first->second);
50645f7ddb14SDimitry Andric if (Res.second) {
50650b57cec5SDimitry Andric UniqueValues.emplace_back(V);
50665f7ddb14SDimitry Andric ++UniqueVals;
50670b57cec5SDimitry Andric }
50685f7ddb14SDimitry Andric }
50695f7ddb14SDimitry Andric if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) {
50705f7ddb14SDimitry Andric // Emit pure splat vector.
50715f7ddb14SDimitry Andric // FIXME: why it is not identified as an identity.
50725f7ddb14SDimitry Andric unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem);
50735f7ddb14SDimitry Andric if (NumUndefs == ReuseShuffleIndicies.size() - 1)
50745f7ddb14SDimitry Andric ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
50755f7ddb14SDimitry Andric UndefMaskElem);
50760b57cec5SDimitry Andric else
50775f7ddb14SDimitry Andric ReuseShuffleIndicies.assign(VF, 0);
50785f7ddb14SDimitry Andric } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
50795f7ddb14SDimitry Andric ReuseShuffleIndicies.clear();
50805f7ddb14SDimitry Andric UniqueValues.clear();
50815f7ddb14SDimitry Andric UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
50825f7ddb14SDimitry Andric }
50835f7ddb14SDimitry Andric UniqueValues.append(VF - UniqueValues.size(),
50845f7ddb14SDimitry Andric PoisonValue::get(VL[0]->getType()));
50850b57cec5SDimitry Andric VL = UniqueValues;
50860b57cec5SDimitry Andric }
50870b57cec5SDimitry Andric
50885f7ddb14SDimitry Andric ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
5089af732203SDimitry Andric Value *Vec = gather(VL);
50900b57cec5SDimitry Andric if (!ReuseShuffleIndicies.empty()) {
50915f7ddb14SDimitry Andric ShuffleBuilder.addMask(ReuseShuffleIndicies);
50925f7ddb14SDimitry Andric Vec = ShuffleBuilder.finalize(Vec);
5093af732203SDimitry Andric if (auto *I = dyn_cast<Instruction>(Vec)) {
50940b57cec5SDimitry Andric GatherSeq.insert(I);
50950b57cec5SDimitry Andric CSEBlocks.insert(I->getParent());
50960b57cec5SDimitry Andric }
50970b57cec5SDimitry Andric }
5098af732203SDimitry Andric return Vec;
50990b57cec5SDimitry Andric }
51000b57cec5SDimitry Andric
vectorizeTree(TreeEntry * E)51010b57cec5SDimitry Andric Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
51020b57cec5SDimitry Andric IRBuilder<>::InsertPointGuard Guard(Builder);
51030b57cec5SDimitry Andric
51040b57cec5SDimitry Andric if (E->VectorizedValue) {
51050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
51060b57cec5SDimitry Andric return E->VectorizedValue;
51070b57cec5SDimitry Andric }
51080b57cec5SDimitry Andric
51090b57cec5SDimitry Andric bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
51105f7ddb14SDimitry Andric unsigned VF = E->Scalars.size();
51115f7ddb14SDimitry Andric if (NeedToShuffleReuses)
51125f7ddb14SDimitry Andric VF = E->ReuseShuffleIndices.size();
51135f7ddb14SDimitry Andric ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
5114480093f4SDimitry Andric if (E->State == TreeEntry::NeedToGather) {
51158bcb0991SDimitry Andric setInsertPointAfterBundle(E);
51165f7ddb14SDimitry Andric Value *Vec;
51175f7ddb14SDimitry Andric SmallVector<int> Mask;
51185f7ddb14SDimitry Andric SmallVector<const TreeEntry *> Entries;
51195f7ddb14SDimitry Andric Optional<TargetTransformInfo::ShuffleKind> Shuffle =
51205f7ddb14SDimitry Andric isGatherShuffledEntry(E, Mask, Entries);
51215f7ddb14SDimitry Andric if (Shuffle.hasValue()) {
51225f7ddb14SDimitry Andric assert((Entries.size() == 1 || Entries.size() == 2) &&
51235f7ddb14SDimitry Andric "Expected shuffle of 1 or 2 entries.");
51245f7ddb14SDimitry Andric Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
51255f7ddb14SDimitry Andric Entries.back()->VectorizedValue, Mask);
51265f7ddb14SDimitry Andric } else {
51275f7ddb14SDimitry Andric Vec = gather(E->Scalars);
51285f7ddb14SDimitry Andric }
51290b57cec5SDimitry Andric if (NeedToShuffleReuses) {
51305f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
51315f7ddb14SDimitry Andric Vec = ShuffleBuilder.finalize(Vec);
5132af732203SDimitry Andric if (auto *I = dyn_cast<Instruction>(Vec)) {
51330b57cec5SDimitry Andric GatherSeq.insert(I);
51340b57cec5SDimitry Andric CSEBlocks.insert(I->getParent());
51350b57cec5SDimitry Andric }
51360b57cec5SDimitry Andric }
5137af732203SDimitry Andric E->VectorizedValue = Vec;
5138af732203SDimitry Andric return Vec;
51390b57cec5SDimitry Andric }
51400b57cec5SDimitry Andric
5141af732203SDimitry Andric assert((E->State == TreeEntry::Vectorize ||
5142af732203SDimitry Andric E->State == TreeEntry::ScatterVectorize) &&
5143af732203SDimitry Andric "Unhandled state");
51448bcb0991SDimitry Andric unsigned ShuffleOrOp =
51458bcb0991SDimitry Andric E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
5146af732203SDimitry Andric Instruction *VL0 = E->getMainOp();
5147af732203SDimitry Andric Type *ScalarTy = VL0->getType();
5148af732203SDimitry Andric if (auto *Store = dyn_cast<StoreInst>(VL0))
5149af732203SDimitry Andric ScalarTy = Store->getValueOperand()->getType();
51505f7ddb14SDimitry Andric else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
51515f7ddb14SDimitry Andric ScalarTy = IE->getOperand(1)->getType();
5152af732203SDimitry Andric auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
51530b57cec5SDimitry Andric switch (ShuffleOrOp) {
51540b57cec5SDimitry Andric case Instruction::PHI: {
51558bcb0991SDimitry Andric auto *PH = cast<PHINode>(VL0);
51560b57cec5SDimitry Andric Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
51570b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(PH->getDebugLoc());
51580b57cec5SDimitry Andric PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
51590b57cec5SDimitry Andric Value *V = NewPhi;
5160af732203SDimitry Andric if (NeedToShuffleReuses)
5161af732203SDimitry Andric V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
5162af732203SDimitry Andric
51630b57cec5SDimitry Andric E->VectorizedValue = V;
51640b57cec5SDimitry Andric
51650b57cec5SDimitry Andric // PHINodes may have multiple entries from the same block. We want to
51660b57cec5SDimitry Andric // visit every block once.
51670b57cec5SDimitry Andric SmallPtrSet<BasicBlock*, 4> VisitedBBs;
51680b57cec5SDimitry Andric
51690b57cec5SDimitry Andric for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
51700b57cec5SDimitry Andric ValueList Operands;
51710b57cec5SDimitry Andric BasicBlock *IBB = PH->getIncomingBlock(i);
51720b57cec5SDimitry Andric
51730b57cec5SDimitry Andric if (!VisitedBBs.insert(IBB).second) {
51740b57cec5SDimitry Andric NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
51750b57cec5SDimitry Andric continue;
51760b57cec5SDimitry Andric }
51770b57cec5SDimitry Andric
51780b57cec5SDimitry Andric Builder.SetInsertPoint(IBB->getTerminator());
51790b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(PH->getDebugLoc());
51800b57cec5SDimitry Andric Value *Vec = vectorizeTree(E->getOperand(i));
51810b57cec5SDimitry Andric NewPhi->addIncoming(Vec, IBB);
51820b57cec5SDimitry Andric }
51830b57cec5SDimitry Andric
51840b57cec5SDimitry Andric assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
51850b57cec5SDimitry Andric "Invalid number of incoming values");
51860b57cec5SDimitry Andric return V;
51870b57cec5SDimitry Andric }
51880b57cec5SDimitry Andric
51890b57cec5SDimitry Andric case Instruction::ExtractElement: {
51900b57cec5SDimitry Andric Value *V = E->getSingleOperand(0);
51910b57cec5SDimitry Andric Builder.SetInsertPoint(VL0);
51925f7ddb14SDimitry Andric ShuffleBuilder.addInversedMask(E->ReorderIndices);
51935f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
51945f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
51950b57cec5SDimitry Andric E->VectorizedValue = V;
51960b57cec5SDimitry Andric return V;
51970b57cec5SDimitry Andric }
51980b57cec5SDimitry Andric case Instruction::ExtractValue: {
5199af732203SDimitry Andric auto *LI = cast<LoadInst>(E->getSingleOperand(0));
52000b57cec5SDimitry Andric Builder.SetInsertPoint(LI);
5201af732203SDimitry Andric auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
52020b57cec5SDimitry Andric Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
52035ffd83dbSDimitry Andric LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
52040b57cec5SDimitry Andric Value *NewV = propagateMetadata(V, E->Scalars);
52055f7ddb14SDimitry Andric ShuffleBuilder.addInversedMask(E->ReorderIndices);
52065f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
52075f7ddb14SDimitry Andric NewV = ShuffleBuilder.finalize(NewV);
52080b57cec5SDimitry Andric E->VectorizedValue = NewV;
52090b57cec5SDimitry Andric return NewV;
52100b57cec5SDimitry Andric }
52115f7ddb14SDimitry Andric case Instruction::InsertElement: {
52125f7ddb14SDimitry Andric Builder.SetInsertPoint(VL0);
52135f7ddb14SDimitry Andric Value *V = vectorizeTree(E->getOperand(1));
52145f7ddb14SDimitry Andric
52155f7ddb14SDimitry Andric const unsigned NumElts =
52165f7ddb14SDimitry Andric cast<FixedVectorType>(VL0->getType())->getNumElements();
52175f7ddb14SDimitry Andric const unsigned NumScalars = E->Scalars.size();
52185f7ddb14SDimitry Andric
52195f7ddb14SDimitry Andric // Create InsertVector shuffle if necessary
52205f7ddb14SDimitry Andric Instruction *FirstInsert = nullptr;
52215f7ddb14SDimitry Andric bool IsIdentity = true;
52225f7ddb14SDimitry Andric unsigned Offset = UINT_MAX;
52235f7ddb14SDimitry Andric for (unsigned I = 0; I < NumScalars; ++I) {
52245f7ddb14SDimitry Andric Value *Scalar = E->Scalars[I];
52255f7ddb14SDimitry Andric if (!FirstInsert &&
52265f7ddb14SDimitry Andric !is_contained(E->Scalars, cast<Instruction>(Scalar)->getOperand(0)))
52275f7ddb14SDimitry Andric FirstInsert = cast<Instruction>(Scalar);
52285f7ddb14SDimitry Andric Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
52295f7ddb14SDimitry Andric if (!InsertIdx || *InsertIdx == UndefMaskElem)
52305f7ddb14SDimitry Andric continue;
52315f7ddb14SDimitry Andric unsigned Idx = *InsertIdx;
52325f7ddb14SDimitry Andric if (Idx < Offset) {
52335f7ddb14SDimitry Andric Offset = Idx;
52345f7ddb14SDimitry Andric IsIdentity &= I == 0;
52355f7ddb14SDimitry Andric } else {
52365f7ddb14SDimitry Andric assert(Idx >= Offset && "Failed to find vector index offset");
52375f7ddb14SDimitry Andric IsIdentity &= Idx - Offset == I;
52385f7ddb14SDimitry Andric }
52395f7ddb14SDimitry Andric }
52405f7ddb14SDimitry Andric assert(Offset < NumElts && "Failed to find vector index offset");
52415f7ddb14SDimitry Andric
52425f7ddb14SDimitry Andric // Create shuffle to resize vector
52435f7ddb14SDimitry Andric SmallVector<int> Mask(NumElts, UndefMaskElem);
52445f7ddb14SDimitry Andric if (!IsIdentity) {
52455f7ddb14SDimitry Andric for (unsigned I = 0; I < NumScalars; ++I) {
52465f7ddb14SDimitry Andric Value *Scalar = E->Scalars[I];
52475f7ddb14SDimitry Andric Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
52485f7ddb14SDimitry Andric if (!InsertIdx || *InsertIdx == UndefMaskElem)
52495f7ddb14SDimitry Andric continue;
52505f7ddb14SDimitry Andric Mask[*InsertIdx - Offset] = I;
52515f7ddb14SDimitry Andric }
52525f7ddb14SDimitry Andric } else {
52535f7ddb14SDimitry Andric std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
52545f7ddb14SDimitry Andric }
52555f7ddb14SDimitry Andric if (!IsIdentity || NumElts != NumScalars)
52565f7ddb14SDimitry Andric V = Builder.CreateShuffleVector(V, Mask);
52575f7ddb14SDimitry Andric
52585f7ddb14SDimitry Andric if (NumElts != NumScalars) {
52595f7ddb14SDimitry Andric SmallVector<int> InsertMask(NumElts);
52605f7ddb14SDimitry Andric std::iota(InsertMask.begin(), InsertMask.end(), 0);
52615f7ddb14SDimitry Andric for (unsigned I = 0; I < NumElts; I++) {
52625f7ddb14SDimitry Andric if (Mask[I] != UndefMaskElem)
52635f7ddb14SDimitry Andric InsertMask[Offset + I] = NumElts + I;
52645f7ddb14SDimitry Andric }
52655f7ddb14SDimitry Andric
52665f7ddb14SDimitry Andric V = Builder.CreateShuffleVector(
52675f7ddb14SDimitry Andric FirstInsert->getOperand(0), V, InsertMask,
52685f7ddb14SDimitry Andric cast<Instruction>(E->Scalars.back())->getName());
52695f7ddb14SDimitry Andric }
52705f7ddb14SDimitry Andric
52715f7ddb14SDimitry Andric ++NumVectorInstructions;
52725f7ddb14SDimitry Andric E->VectorizedValue = V;
52735f7ddb14SDimitry Andric return V;
52745f7ddb14SDimitry Andric }
52750b57cec5SDimitry Andric case Instruction::ZExt:
52760b57cec5SDimitry Andric case Instruction::SExt:
52770b57cec5SDimitry Andric case Instruction::FPToUI:
52780b57cec5SDimitry Andric case Instruction::FPToSI:
52790b57cec5SDimitry Andric case Instruction::FPExt:
52800b57cec5SDimitry Andric case Instruction::PtrToInt:
52810b57cec5SDimitry Andric case Instruction::IntToPtr:
52820b57cec5SDimitry Andric case Instruction::SIToFP:
52830b57cec5SDimitry Andric case Instruction::UIToFP:
52840b57cec5SDimitry Andric case Instruction::Trunc:
52850b57cec5SDimitry Andric case Instruction::FPTrunc:
52860b57cec5SDimitry Andric case Instruction::BitCast: {
52878bcb0991SDimitry Andric setInsertPointAfterBundle(E);
52880b57cec5SDimitry Andric
52890b57cec5SDimitry Andric Value *InVec = vectorizeTree(E->getOperand(0));
52900b57cec5SDimitry Andric
52910b57cec5SDimitry Andric if (E->VectorizedValue) {
52920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
52930b57cec5SDimitry Andric return E->VectorizedValue;
52940b57cec5SDimitry Andric }
52950b57cec5SDimitry Andric
52968bcb0991SDimitry Andric auto *CI = cast<CastInst>(VL0);
52970b57cec5SDimitry Andric Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
52985f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
52995f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5300af732203SDimitry Andric
53010b57cec5SDimitry Andric E->VectorizedValue = V;
53020b57cec5SDimitry Andric ++NumVectorInstructions;
53030b57cec5SDimitry Andric return V;
53040b57cec5SDimitry Andric }
53050b57cec5SDimitry Andric case Instruction::FCmp:
53060b57cec5SDimitry Andric case Instruction::ICmp: {
53078bcb0991SDimitry Andric setInsertPointAfterBundle(E);
53080b57cec5SDimitry Andric
53090b57cec5SDimitry Andric Value *L = vectorizeTree(E->getOperand(0));
53100b57cec5SDimitry Andric Value *R = vectorizeTree(E->getOperand(1));
53110b57cec5SDimitry Andric
53120b57cec5SDimitry Andric if (E->VectorizedValue) {
53130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53140b57cec5SDimitry Andric return E->VectorizedValue;
53150b57cec5SDimitry Andric }
53160b57cec5SDimitry Andric
53170b57cec5SDimitry Andric CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
53185ffd83dbSDimitry Andric Value *V = Builder.CreateCmp(P0, L, R);
53190b57cec5SDimitry Andric propagateIRFlags(V, E->Scalars, VL0);
53205f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
53215f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5322af732203SDimitry Andric
53230b57cec5SDimitry Andric E->VectorizedValue = V;
53240b57cec5SDimitry Andric ++NumVectorInstructions;
53250b57cec5SDimitry Andric return V;
53260b57cec5SDimitry Andric }
53270b57cec5SDimitry Andric case Instruction::Select: {
53288bcb0991SDimitry Andric setInsertPointAfterBundle(E);
53290b57cec5SDimitry Andric
53300b57cec5SDimitry Andric Value *Cond = vectorizeTree(E->getOperand(0));
53310b57cec5SDimitry Andric Value *True = vectorizeTree(E->getOperand(1));
53320b57cec5SDimitry Andric Value *False = vectorizeTree(E->getOperand(2));
53330b57cec5SDimitry Andric
53340b57cec5SDimitry Andric if (E->VectorizedValue) {
53350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53360b57cec5SDimitry Andric return E->VectorizedValue;
53370b57cec5SDimitry Andric }
53380b57cec5SDimitry Andric
53390b57cec5SDimitry Andric Value *V = Builder.CreateSelect(Cond, True, False);
53405f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
53415f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5342af732203SDimitry Andric
53430b57cec5SDimitry Andric E->VectorizedValue = V;
53440b57cec5SDimitry Andric ++NumVectorInstructions;
53450b57cec5SDimitry Andric return V;
53460b57cec5SDimitry Andric }
53470b57cec5SDimitry Andric case Instruction::FNeg: {
53488bcb0991SDimitry Andric setInsertPointAfterBundle(E);
53490b57cec5SDimitry Andric
53500b57cec5SDimitry Andric Value *Op = vectorizeTree(E->getOperand(0));
53510b57cec5SDimitry Andric
53520b57cec5SDimitry Andric if (E->VectorizedValue) {
53530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53540b57cec5SDimitry Andric return E->VectorizedValue;
53550b57cec5SDimitry Andric }
53560b57cec5SDimitry Andric
53570b57cec5SDimitry Andric Value *V = Builder.CreateUnOp(
53588bcb0991SDimitry Andric static_cast<Instruction::UnaryOps>(E->getOpcode()), Op);
53590b57cec5SDimitry Andric propagateIRFlags(V, E->Scalars, VL0);
53600b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(V))
53610b57cec5SDimitry Andric V = propagateMetadata(I, E->Scalars);
53620b57cec5SDimitry Andric
53635f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
53645f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5365af732203SDimitry Andric
53660b57cec5SDimitry Andric E->VectorizedValue = V;
53670b57cec5SDimitry Andric ++NumVectorInstructions;
53680b57cec5SDimitry Andric
53690b57cec5SDimitry Andric return V;
53700b57cec5SDimitry Andric }
53710b57cec5SDimitry Andric case Instruction::Add:
53720b57cec5SDimitry Andric case Instruction::FAdd:
53730b57cec5SDimitry Andric case Instruction::Sub:
53740b57cec5SDimitry Andric case Instruction::FSub:
53750b57cec5SDimitry Andric case Instruction::Mul:
53760b57cec5SDimitry Andric case Instruction::FMul:
53770b57cec5SDimitry Andric case Instruction::UDiv:
53780b57cec5SDimitry Andric case Instruction::SDiv:
53790b57cec5SDimitry Andric case Instruction::FDiv:
53800b57cec5SDimitry Andric case Instruction::URem:
53810b57cec5SDimitry Andric case Instruction::SRem:
53820b57cec5SDimitry Andric case Instruction::FRem:
53830b57cec5SDimitry Andric case Instruction::Shl:
53840b57cec5SDimitry Andric case Instruction::LShr:
53850b57cec5SDimitry Andric case Instruction::AShr:
53860b57cec5SDimitry Andric case Instruction::And:
53870b57cec5SDimitry Andric case Instruction::Or:
53880b57cec5SDimitry Andric case Instruction::Xor: {
53898bcb0991SDimitry Andric setInsertPointAfterBundle(E);
53900b57cec5SDimitry Andric
53910b57cec5SDimitry Andric Value *LHS = vectorizeTree(E->getOperand(0));
53920b57cec5SDimitry Andric Value *RHS = vectorizeTree(E->getOperand(1));
53930b57cec5SDimitry Andric
53940b57cec5SDimitry Andric if (E->VectorizedValue) {
53950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
53960b57cec5SDimitry Andric return E->VectorizedValue;
53970b57cec5SDimitry Andric }
53980b57cec5SDimitry Andric
53990b57cec5SDimitry Andric Value *V = Builder.CreateBinOp(
54008bcb0991SDimitry Andric static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
54018bcb0991SDimitry Andric RHS);
54020b57cec5SDimitry Andric propagateIRFlags(V, E->Scalars, VL0);
54030b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(V))
54040b57cec5SDimitry Andric V = propagateMetadata(I, E->Scalars);
54050b57cec5SDimitry Andric
54065f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
54075f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5408af732203SDimitry Andric
54090b57cec5SDimitry Andric E->VectorizedValue = V;
54100b57cec5SDimitry Andric ++NumVectorInstructions;
54110b57cec5SDimitry Andric
54120b57cec5SDimitry Andric return V;
54130b57cec5SDimitry Andric }
54140b57cec5SDimitry Andric case Instruction::Load: {
54150b57cec5SDimitry Andric // Loads are inserted at the head of the tree because we don't want to
54160b57cec5SDimitry Andric // sink them all the way down past store instructions.
54178bcb0991SDimitry Andric bool IsReorder = E->updateStateIfReorder();
54188bcb0991SDimitry Andric if (IsReorder)
54198bcb0991SDimitry Andric VL0 = E->getMainOp();
54208bcb0991SDimitry Andric setInsertPointAfterBundle(E);
54210b57cec5SDimitry Andric
54220b57cec5SDimitry Andric LoadInst *LI = cast<LoadInst>(VL0);
5423af732203SDimitry Andric Instruction *NewLI;
54240b57cec5SDimitry Andric unsigned AS = LI->getPointerAddressSpace();
54250b57cec5SDimitry Andric Value *PO = LI->getPointerOperand();
5426af732203SDimitry Andric if (E->State == TreeEntry::Vectorize) {
54270b57cec5SDimitry Andric
5428af732203SDimitry Andric Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
5429af732203SDimitry Andric
5430af732203SDimitry Andric // The pointer operand uses an in-tree scalar so we add the new BitCast
5431af732203SDimitry Andric // to ExternalUses list to make sure that an extract will be generated
5432af732203SDimitry Andric // in the future.
5433*1aaf10a9SDimitry Andric if (TreeEntry *Entry = getTreeEntry(PO)) {
5434*1aaf10a9SDimitry Andric // Find which lane we need to extract.
5435*1aaf10a9SDimitry Andric unsigned FoundLane = Entry->findLaneForValue(PO);
5436*1aaf10a9SDimitry Andric ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
5437*1aaf10a9SDimitry Andric }
5438af732203SDimitry Andric
5439af732203SDimitry Andric NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
5440af732203SDimitry Andric } else {
5441af732203SDimitry Andric assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
5442af732203SDimitry Andric Value *VecPtr = vectorizeTree(E->getOperand(0));
5443af732203SDimitry Andric // Use the minimum alignment of the gathered loads.
5444af732203SDimitry Andric Align CommonAlignment = LI->getAlign();
5445af732203SDimitry Andric for (Value *V : E->Scalars)
5446af732203SDimitry Andric CommonAlignment =
5447af732203SDimitry Andric commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
54485f7ddb14SDimitry Andric NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
5449af732203SDimitry Andric }
5450af732203SDimitry Andric Value *V = propagateMetadata(NewLI, E->Scalars);
5451af732203SDimitry Andric
54525f7ddb14SDimitry Andric ShuffleBuilder.addInversedMask(E->ReorderIndices);
54535f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
54545f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
54550b57cec5SDimitry Andric E->VectorizedValue = V;
54560b57cec5SDimitry Andric ++NumVectorInstructions;
54570b57cec5SDimitry Andric return V;
54580b57cec5SDimitry Andric }
54590b57cec5SDimitry Andric case Instruction::Store: {
5460480093f4SDimitry Andric bool IsReorder = !E->ReorderIndices.empty();
5461480093f4SDimitry Andric auto *SI = cast<StoreInst>(
5462480093f4SDimitry Andric IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
54630b57cec5SDimitry Andric unsigned AS = SI->getPointerAddressSpace();
54640b57cec5SDimitry Andric
54658bcb0991SDimitry Andric setInsertPointAfterBundle(E);
54660b57cec5SDimitry Andric
54670b57cec5SDimitry Andric Value *VecValue = vectorizeTree(E->getOperand(0));
54685f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReorderIndices);
54695f7ddb14SDimitry Andric VecValue = ShuffleBuilder.finalize(VecValue);
54705f7ddb14SDimitry Andric
54710b57cec5SDimitry Andric Value *ScalarPtr = SI->getPointerOperand();
5472480093f4SDimitry Andric Value *VecPtr = Builder.CreateBitCast(
5473480093f4SDimitry Andric ScalarPtr, VecValue->getType()->getPointerTo(AS));
54745ffd83dbSDimitry Andric StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr,
54755ffd83dbSDimitry Andric SI->getAlign());
54760b57cec5SDimitry Andric
54770b57cec5SDimitry Andric // The pointer operand uses an in-tree scalar, so add the new BitCast to
54780b57cec5SDimitry Andric // ExternalUses to make sure that an extract will be generated in the
54790b57cec5SDimitry Andric // future.
5480*1aaf10a9SDimitry Andric if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
5481*1aaf10a9SDimitry Andric // Find which lane we need to extract.
5482*1aaf10a9SDimitry Andric unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
5483*1aaf10a9SDimitry Andric ExternalUses.push_back(
5484*1aaf10a9SDimitry Andric ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
5485*1aaf10a9SDimitry Andric }
54860b57cec5SDimitry Andric
54870b57cec5SDimitry Andric Value *V = propagateMetadata(ST, E->Scalars);
5488af732203SDimitry Andric
54890b57cec5SDimitry Andric E->VectorizedValue = V;
54900b57cec5SDimitry Andric ++NumVectorInstructions;
54910b57cec5SDimitry Andric return V;
54920b57cec5SDimitry Andric }
54930b57cec5SDimitry Andric case Instruction::GetElementPtr: {
54948bcb0991SDimitry Andric setInsertPointAfterBundle(E);
54950b57cec5SDimitry Andric
54960b57cec5SDimitry Andric Value *Op0 = vectorizeTree(E->getOperand(0));
54970b57cec5SDimitry Andric
54980b57cec5SDimitry Andric std::vector<Value *> OpVecs;
54990b57cec5SDimitry Andric for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
55000b57cec5SDimitry Andric ++j) {
5501480093f4SDimitry Andric ValueList &VL = E->getOperand(j);
5502480093f4SDimitry Andric // Need to cast all elements to the same type before vectorization to
5503480093f4SDimitry Andric // avoid crash.
5504480093f4SDimitry Andric Type *VL0Ty = VL0->getOperand(j)->getType();
5505480093f4SDimitry Andric Type *Ty = llvm::all_of(
5506480093f4SDimitry Andric VL, [VL0Ty](Value *V) { return VL0Ty == V->getType(); })
5507480093f4SDimitry Andric ? VL0Ty
5508480093f4SDimitry Andric : DL->getIndexType(cast<GetElementPtrInst>(VL0)
5509480093f4SDimitry Andric ->getPointerOperandType()
5510480093f4SDimitry Andric ->getScalarType());
5511480093f4SDimitry Andric for (Value *&V : VL) {
5512480093f4SDimitry Andric auto *CI = cast<ConstantInt>(V);
5513480093f4SDimitry Andric V = ConstantExpr::getIntegerCast(CI, Ty,
5514480093f4SDimitry Andric CI->getValue().isSignBitSet());
5515480093f4SDimitry Andric }
5516480093f4SDimitry Andric Value *OpVec = vectorizeTree(VL);
55170b57cec5SDimitry Andric OpVecs.push_back(OpVec);
55180b57cec5SDimitry Andric }
55190b57cec5SDimitry Andric
55200b57cec5SDimitry Andric Value *V = Builder.CreateGEP(
55210b57cec5SDimitry Andric cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
55220b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V))
55230b57cec5SDimitry Andric V = propagateMetadata(I, E->Scalars);
55240b57cec5SDimitry Andric
55255f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
55265f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5527af732203SDimitry Andric
55280b57cec5SDimitry Andric E->VectorizedValue = V;
55290b57cec5SDimitry Andric ++NumVectorInstructions;
55300b57cec5SDimitry Andric
55310b57cec5SDimitry Andric return V;
55320b57cec5SDimitry Andric }
55330b57cec5SDimitry Andric case Instruction::Call: {
55340b57cec5SDimitry Andric CallInst *CI = cast<CallInst>(VL0);
55358bcb0991SDimitry Andric setInsertPointAfterBundle(E);
55368bcb0991SDimitry Andric
55370b57cec5SDimitry Andric Intrinsic::ID IID = Intrinsic::not_intrinsic;
55388bcb0991SDimitry Andric if (Function *FI = CI->getCalledFunction())
55390b57cec5SDimitry Andric IID = FI->getIntrinsicID();
55408bcb0991SDimitry Andric
55415ffd83dbSDimitry Andric Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
55425ffd83dbSDimitry Andric
55435ffd83dbSDimitry Andric auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
55445ffd83dbSDimitry Andric bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
55455ffd83dbSDimitry Andric VecCallCosts.first <= VecCallCosts.second;
55465ffd83dbSDimitry Andric
55478bcb0991SDimitry Andric Value *ScalarArg = nullptr;
55480b57cec5SDimitry Andric std::vector<Value *> OpVecs;
55495f7ddb14SDimitry Andric SmallVector<Type *, 2> TysForDecl =
55505f7ddb14SDimitry Andric {FixedVectorType::get(CI->getType(), E->Scalars.size())};
55510b57cec5SDimitry Andric for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
55520b57cec5SDimitry Andric ValueList OpVL;
55530b57cec5SDimitry Andric // Some intrinsics have scalar arguments. This argument should not be
55540b57cec5SDimitry Andric // vectorized.
55555ffd83dbSDimitry Andric if (UseIntrinsic && hasVectorInstrinsicScalarOpd(IID, j)) {
55560b57cec5SDimitry Andric CallInst *CEI = cast<CallInst>(VL0);
55570b57cec5SDimitry Andric ScalarArg = CEI->getArgOperand(j);
55580b57cec5SDimitry Andric OpVecs.push_back(CEI->getArgOperand(j));
55595f7ddb14SDimitry Andric if (hasVectorInstrinsicOverloadedScalarOpd(IID, j))
55605f7ddb14SDimitry Andric TysForDecl.push_back(ScalarArg->getType());
55610b57cec5SDimitry Andric continue;
55620b57cec5SDimitry Andric }
55630b57cec5SDimitry Andric
55640b57cec5SDimitry Andric Value *OpVec = vectorizeTree(E->getOperand(j));
55650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
55660b57cec5SDimitry Andric OpVecs.push_back(OpVec);
55670b57cec5SDimitry Andric }
55680b57cec5SDimitry Andric
55695ffd83dbSDimitry Andric Function *CF;
55705ffd83dbSDimitry Andric if (!UseIntrinsic) {
5571af732203SDimitry Andric VFShape Shape =
5572af732203SDimitry Andric VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
5573af732203SDimitry Andric VecTy->getNumElements())),
55745ffd83dbSDimitry Andric false /*HasGlobalPred*/);
55755ffd83dbSDimitry Andric CF = VFDatabase(*CI).getVectorizedFunction(Shape);
55765ffd83dbSDimitry Andric } else {
55775f7ddb14SDimitry Andric CF = Intrinsic::getDeclaration(F->getParent(), ID, TysForDecl);
55785ffd83dbSDimitry Andric }
55795ffd83dbSDimitry Andric
55800b57cec5SDimitry Andric SmallVector<OperandBundleDef, 1> OpBundles;
55810b57cec5SDimitry Andric CI->getOperandBundlesAsDefs(OpBundles);
55820b57cec5SDimitry Andric Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
55830b57cec5SDimitry Andric
55840b57cec5SDimitry Andric // The scalar argument uses an in-tree scalar so we add the new vectorized
55850b57cec5SDimitry Andric // call to ExternalUses list to make sure that an extract will be
55860b57cec5SDimitry Andric // generated in the future.
5587*1aaf10a9SDimitry Andric if (ScalarArg) {
5588*1aaf10a9SDimitry Andric if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
5589*1aaf10a9SDimitry Andric // Find which lane we need to extract.
5590*1aaf10a9SDimitry Andric unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
5591*1aaf10a9SDimitry Andric ExternalUses.push_back(
5592*1aaf10a9SDimitry Andric ExternalUser(ScalarArg, cast<User>(V), FoundLane));
5593*1aaf10a9SDimitry Andric }
5594*1aaf10a9SDimitry Andric }
55950b57cec5SDimitry Andric
55960b57cec5SDimitry Andric propagateIRFlags(V, E->Scalars, VL0);
55975f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
55985f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5599af732203SDimitry Andric
56000b57cec5SDimitry Andric E->VectorizedValue = V;
56010b57cec5SDimitry Andric ++NumVectorInstructions;
56020b57cec5SDimitry Andric return V;
56030b57cec5SDimitry Andric }
56040b57cec5SDimitry Andric case Instruction::ShuffleVector: {
56058bcb0991SDimitry Andric assert(E->isAltShuffle() &&
56068bcb0991SDimitry Andric ((Instruction::isBinaryOp(E->getOpcode()) &&
56078bcb0991SDimitry Andric Instruction::isBinaryOp(E->getAltOpcode())) ||
56088bcb0991SDimitry Andric (Instruction::isCast(E->getOpcode()) &&
56098bcb0991SDimitry Andric Instruction::isCast(E->getAltOpcode()))) &&
56100b57cec5SDimitry Andric "Invalid Shuffle Vector Operand");
56110b57cec5SDimitry Andric
56128bcb0991SDimitry Andric Value *LHS = nullptr, *RHS = nullptr;
56138bcb0991SDimitry Andric if (Instruction::isBinaryOp(E->getOpcode())) {
56148bcb0991SDimitry Andric setInsertPointAfterBundle(E);
56150b57cec5SDimitry Andric LHS = vectorizeTree(E->getOperand(0));
56160b57cec5SDimitry Andric RHS = vectorizeTree(E->getOperand(1));
56170b57cec5SDimitry Andric } else {
56188bcb0991SDimitry Andric setInsertPointAfterBundle(E);
56190b57cec5SDimitry Andric LHS = vectorizeTree(E->getOperand(0));
56200b57cec5SDimitry Andric }
56210b57cec5SDimitry Andric
56220b57cec5SDimitry Andric if (E->VectorizedValue) {
56230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
56240b57cec5SDimitry Andric return E->VectorizedValue;
56250b57cec5SDimitry Andric }
56260b57cec5SDimitry Andric
56270b57cec5SDimitry Andric Value *V0, *V1;
56288bcb0991SDimitry Andric if (Instruction::isBinaryOp(E->getOpcode())) {
56290b57cec5SDimitry Andric V0 = Builder.CreateBinOp(
56308bcb0991SDimitry Andric static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
56310b57cec5SDimitry Andric V1 = Builder.CreateBinOp(
56328bcb0991SDimitry Andric static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
56330b57cec5SDimitry Andric } else {
56340b57cec5SDimitry Andric V0 = Builder.CreateCast(
56358bcb0991SDimitry Andric static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
56360b57cec5SDimitry Andric V1 = Builder.CreateCast(
56378bcb0991SDimitry Andric static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
56380b57cec5SDimitry Andric }
56390b57cec5SDimitry Andric
56400b57cec5SDimitry Andric // Create shuffle to take alternate operations from the vector.
56410b57cec5SDimitry Andric // Also, gather up main and alt scalar ops to propagate IR flags to
56420b57cec5SDimitry Andric // each vector operation.
56430b57cec5SDimitry Andric ValueList OpScalars, AltScalars;
56445f7ddb14SDimitry Andric unsigned Sz = E->Scalars.size();
56455f7ddb14SDimitry Andric SmallVector<int> Mask(Sz);
56465f7ddb14SDimitry Andric for (unsigned I = 0; I < Sz; ++I) {
56475f7ddb14SDimitry Andric auto *OpInst = cast<Instruction>(E->Scalars[I]);
56488bcb0991SDimitry Andric assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
56498bcb0991SDimitry Andric if (OpInst->getOpcode() == E->getAltOpcode()) {
56505f7ddb14SDimitry Andric Mask[I] = Sz + I;
56515f7ddb14SDimitry Andric AltScalars.push_back(E->Scalars[I]);
56520b57cec5SDimitry Andric } else {
56535f7ddb14SDimitry Andric Mask[I] = I;
56545f7ddb14SDimitry Andric OpScalars.push_back(E->Scalars[I]);
56550b57cec5SDimitry Andric }
56560b57cec5SDimitry Andric }
56570b57cec5SDimitry Andric
56580b57cec5SDimitry Andric propagateIRFlags(V0, OpScalars);
56590b57cec5SDimitry Andric propagateIRFlags(V1, AltScalars);
56600b57cec5SDimitry Andric
56615ffd83dbSDimitry Andric Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
56620b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V))
56630b57cec5SDimitry Andric V = propagateMetadata(I, E->Scalars);
56645f7ddb14SDimitry Andric ShuffleBuilder.addMask(E->ReuseShuffleIndices);
56655f7ddb14SDimitry Andric V = ShuffleBuilder.finalize(V);
5666af732203SDimitry Andric
56670b57cec5SDimitry Andric E->VectorizedValue = V;
56680b57cec5SDimitry Andric ++NumVectorInstructions;
56690b57cec5SDimitry Andric
56700b57cec5SDimitry Andric return V;
56710b57cec5SDimitry Andric }
56720b57cec5SDimitry Andric default:
56730b57cec5SDimitry Andric llvm_unreachable("unknown inst");
56740b57cec5SDimitry Andric }
56750b57cec5SDimitry Andric return nullptr;
56760b57cec5SDimitry Andric }
56770b57cec5SDimitry Andric
vectorizeTree()56780b57cec5SDimitry Andric Value *BoUpSLP::vectorizeTree() {
56790b57cec5SDimitry Andric ExtraValueToDebugLocsMap ExternallyUsedValues;
56800b57cec5SDimitry Andric return vectorizeTree(ExternallyUsedValues);
56810b57cec5SDimitry Andric }
56820b57cec5SDimitry Andric
56830b57cec5SDimitry Andric Value *
vectorizeTree(ExtraValueToDebugLocsMap & ExternallyUsedValues)56840b57cec5SDimitry Andric BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
56850b57cec5SDimitry Andric // All blocks must be scheduled before any instructions are inserted.
56860b57cec5SDimitry Andric for (auto &BSIter : BlocksSchedules) {
56870b57cec5SDimitry Andric scheduleBlock(BSIter.second.get());
56880b57cec5SDimitry Andric }
56890b57cec5SDimitry Andric
56900b57cec5SDimitry Andric Builder.SetInsertPoint(&F->getEntryBlock().front());
56910b57cec5SDimitry Andric auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
56920b57cec5SDimitry Andric
56930b57cec5SDimitry Andric // If the vectorized tree can be rewritten in a smaller type, we truncate the
56940b57cec5SDimitry Andric // vectorized root. InstCombine will then rewrite the entire expression. We
56950b57cec5SDimitry Andric // sign extend the extracted values below.
56960b57cec5SDimitry Andric auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
56970b57cec5SDimitry Andric if (MinBWs.count(ScalarRoot)) {
56985f7ddb14SDimitry Andric if (auto *I = dyn_cast<Instruction>(VectorRoot)) {
56995f7ddb14SDimitry Andric // If current instr is a phi and not the last phi, insert it after the
57005f7ddb14SDimitry Andric // last phi node.
57015f7ddb14SDimitry Andric if (isa<PHINode>(I))
57025f7ddb14SDimitry Andric Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt());
57035f7ddb14SDimitry Andric else
57040b57cec5SDimitry Andric Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
57055f7ddb14SDimitry Andric }
57060b57cec5SDimitry Andric auto BundleWidth = VectorizableTree[0]->Scalars.size();
57070b57cec5SDimitry Andric auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
57085ffd83dbSDimitry Andric auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
57090b57cec5SDimitry Andric auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
57100b57cec5SDimitry Andric VectorizableTree[0]->VectorizedValue = Trunc;
57110b57cec5SDimitry Andric }
57120b57cec5SDimitry Andric
57130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
57140b57cec5SDimitry Andric << " values .\n");
57150b57cec5SDimitry Andric
57160b57cec5SDimitry Andric // Extract all of the elements with the external uses.
57170b57cec5SDimitry Andric for (const auto &ExternalUse : ExternalUses) {
57180b57cec5SDimitry Andric Value *Scalar = ExternalUse.Scalar;
57190b57cec5SDimitry Andric llvm::User *User = ExternalUse.User;
57200b57cec5SDimitry Andric
57210b57cec5SDimitry Andric // Skip users that we already RAUW. This happens when one instruction
57220b57cec5SDimitry Andric // has multiple uses of the same value.
57230b57cec5SDimitry Andric if (User && !is_contained(Scalar->users(), User))
57240b57cec5SDimitry Andric continue;
57250b57cec5SDimitry Andric TreeEntry *E = getTreeEntry(Scalar);
57260b57cec5SDimitry Andric assert(E && "Invalid scalar");
5727af732203SDimitry Andric assert(E->State != TreeEntry::NeedToGather &&
5728af732203SDimitry Andric "Extracting from a gather list");
57290b57cec5SDimitry Andric
57300b57cec5SDimitry Andric Value *Vec = E->VectorizedValue;
57310b57cec5SDimitry Andric assert(Vec && "Can't find vectorizable value");
57320b57cec5SDimitry Andric
57330b57cec5SDimitry Andric Value *Lane = Builder.getInt32(ExternalUse.Lane);
57345f7ddb14SDimitry Andric auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
57355f7ddb14SDimitry Andric if (Scalar->getType() != Vec->getType()) {
57365f7ddb14SDimitry Andric Value *Ex;
57375f7ddb14SDimitry Andric // "Reuse" the existing extract to improve final codegen.
57385f7ddb14SDimitry Andric if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
57395f7ddb14SDimitry Andric Ex = Builder.CreateExtractElement(ES->getOperand(0),
57405f7ddb14SDimitry Andric ES->getOperand(1));
57415f7ddb14SDimitry Andric } else {
57425f7ddb14SDimitry Andric Ex = Builder.CreateExtractElement(Vec, Lane);
57435f7ddb14SDimitry Andric }
57445f7ddb14SDimitry Andric // If necessary, sign-extend or zero-extend ScalarRoot
57455f7ddb14SDimitry Andric // to the larger type.
57465f7ddb14SDimitry Andric if (!MinBWs.count(ScalarRoot))
57475f7ddb14SDimitry Andric return Ex;
57485f7ddb14SDimitry Andric if (MinBWs[ScalarRoot].second)
57495f7ddb14SDimitry Andric return Builder.CreateSExt(Ex, Scalar->getType());
57505f7ddb14SDimitry Andric return Builder.CreateZExt(Ex, Scalar->getType());
57515f7ddb14SDimitry Andric }
57525f7ddb14SDimitry Andric assert(isa<FixedVectorType>(Scalar->getType()) &&
57535f7ddb14SDimitry Andric isa<InsertElementInst>(Scalar) &&
57545f7ddb14SDimitry Andric "In-tree scalar of vector type is not insertelement?");
57555f7ddb14SDimitry Andric return Vec;
57565f7ddb14SDimitry Andric };
57570b57cec5SDimitry Andric // If User == nullptr, the Scalar is used as extra arg. Generate
57580b57cec5SDimitry Andric // ExtractElement instruction and update the record for this scalar in
57590b57cec5SDimitry Andric // ExternallyUsedValues.
57600b57cec5SDimitry Andric if (!User) {
57610b57cec5SDimitry Andric assert(ExternallyUsedValues.count(Scalar) &&
57620b57cec5SDimitry Andric "Scalar with nullptr as an external user must be registered in "
57630b57cec5SDimitry Andric "ExternallyUsedValues map");
57640b57cec5SDimitry Andric if (auto *VecI = dyn_cast<Instruction>(Vec)) {
57650b57cec5SDimitry Andric Builder.SetInsertPoint(VecI->getParent(),
57660b57cec5SDimitry Andric std::next(VecI->getIterator()));
57670b57cec5SDimitry Andric } else {
57680b57cec5SDimitry Andric Builder.SetInsertPoint(&F->getEntryBlock().front());
57690b57cec5SDimitry Andric }
57705f7ddb14SDimitry Andric Value *NewInst = ExtractAndExtendIfNeeded(Vec);
57710b57cec5SDimitry Andric CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
57725f7ddb14SDimitry Andric auto &NewInstLocs = ExternallyUsedValues[NewInst];
57735f7ddb14SDimitry Andric auto It = ExternallyUsedValues.find(Scalar);
57745f7ddb14SDimitry Andric assert(It != ExternallyUsedValues.end() &&
57755f7ddb14SDimitry Andric "Externally used scalar is not found in ExternallyUsedValues");
57765f7ddb14SDimitry Andric NewInstLocs.append(It->second);
57770b57cec5SDimitry Andric ExternallyUsedValues.erase(Scalar);
57780b57cec5SDimitry Andric // Required to update internally referenced instructions.
57795f7ddb14SDimitry Andric Scalar->replaceAllUsesWith(NewInst);
57800b57cec5SDimitry Andric continue;
57810b57cec5SDimitry Andric }
57820b57cec5SDimitry Andric
57830b57cec5SDimitry Andric // Generate extracts for out-of-tree users.
57840b57cec5SDimitry Andric // Find the insertion point for the extractelement lane.
57850b57cec5SDimitry Andric if (auto *VecI = dyn_cast<Instruction>(Vec)) {
57860b57cec5SDimitry Andric if (PHINode *PH = dyn_cast<PHINode>(User)) {
57870b57cec5SDimitry Andric for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
57880b57cec5SDimitry Andric if (PH->getIncomingValue(i) == Scalar) {
57890b57cec5SDimitry Andric Instruction *IncomingTerminator =
57900b57cec5SDimitry Andric PH->getIncomingBlock(i)->getTerminator();
57910b57cec5SDimitry Andric if (isa<CatchSwitchInst>(IncomingTerminator)) {
57920b57cec5SDimitry Andric Builder.SetInsertPoint(VecI->getParent(),
57930b57cec5SDimitry Andric std::next(VecI->getIterator()));
57940b57cec5SDimitry Andric } else {
57950b57cec5SDimitry Andric Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
57960b57cec5SDimitry Andric }
57975f7ddb14SDimitry Andric Value *NewInst = ExtractAndExtendIfNeeded(Vec);
57980b57cec5SDimitry Andric CSEBlocks.insert(PH->getIncomingBlock(i));
57995f7ddb14SDimitry Andric PH->setOperand(i, NewInst);
58000b57cec5SDimitry Andric }
58010b57cec5SDimitry Andric }
58020b57cec5SDimitry Andric } else {
58030b57cec5SDimitry Andric Builder.SetInsertPoint(cast<Instruction>(User));
58045f7ddb14SDimitry Andric Value *NewInst = ExtractAndExtendIfNeeded(Vec);
58050b57cec5SDimitry Andric CSEBlocks.insert(cast<Instruction>(User)->getParent());
58065f7ddb14SDimitry Andric User->replaceUsesOfWith(Scalar, NewInst);
58070b57cec5SDimitry Andric }
58080b57cec5SDimitry Andric } else {
58090b57cec5SDimitry Andric Builder.SetInsertPoint(&F->getEntryBlock().front());
58105f7ddb14SDimitry Andric Value *NewInst = ExtractAndExtendIfNeeded(Vec);
58110b57cec5SDimitry Andric CSEBlocks.insert(&F->getEntryBlock());
58125f7ddb14SDimitry Andric User->replaceUsesOfWith(Scalar, NewInst);
58130b57cec5SDimitry Andric }
58140b57cec5SDimitry Andric
58150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
58160b57cec5SDimitry Andric }
58170b57cec5SDimitry Andric
58180b57cec5SDimitry Andric // For each vectorized value:
58190b57cec5SDimitry Andric for (auto &TEPtr : VectorizableTree) {
58200b57cec5SDimitry Andric TreeEntry *Entry = TEPtr.get();
58210b57cec5SDimitry Andric
58220b57cec5SDimitry Andric // No need to handle users of gathered values.
5823480093f4SDimitry Andric if (Entry->State == TreeEntry::NeedToGather)
58240b57cec5SDimitry Andric continue;
58250b57cec5SDimitry Andric
58260b57cec5SDimitry Andric assert(Entry->VectorizedValue && "Can't find vectorizable value");
58270b57cec5SDimitry Andric
58280b57cec5SDimitry Andric // For each lane:
58290b57cec5SDimitry Andric for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
58300b57cec5SDimitry Andric Value *Scalar = Entry->Scalars[Lane];
58310b57cec5SDimitry Andric
58328bcb0991SDimitry Andric #ifndef NDEBUG
58330b57cec5SDimitry Andric Type *Ty = Scalar->getType();
58340b57cec5SDimitry Andric if (!Ty->isVoidTy()) {
58350b57cec5SDimitry Andric for (User *U : Scalar->users()) {
58360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
58370b57cec5SDimitry Andric
58388bcb0991SDimitry Andric // It is legal to delete users in the ignorelist.
58390b57cec5SDimitry Andric assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
58408bcb0991SDimitry Andric "Deleting out-of-tree value");
58418bcb0991SDimitry Andric }
58420b57cec5SDimitry Andric }
58430b57cec5SDimitry Andric #endif
58440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
58450b57cec5SDimitry Andric eraseInstruction(cast<Instruction>(Scalar));
58460b57cec5SDimitry Andric }
58470b57cec5SDimitry Andric }
58480b57cec5SDimitry Andric
58490b57cec5SDimitry Andric Builder.ClearInsertionPoint();
58505ffd83dbSDimitry Andric InstrElementSize.clear();
58510b57cec5SDimitry Andric
58520b57cec5SDimitry Andric return VectorizableTree[0]->VectorizedValue;
58530b57cec5SDimitry Andric }
58540b57cec5SDimitry Andric
optimizeGatherSequence()58550b57cec5SDimitry Andric void BoUpSLP::optimizeGatherSequence() {
58560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
58570b57cec5SDimitry Andric << " gather sequences instructions.\n");
58580b57cec5SDimitry Andric // LICM InsertElementInst sequences.
58590b57cec5SDimitry Andric for (Instruction *I : GatherSeq) {
58608bcb0991SDimitry Andric if (isDeleted(I))
58610b57cec5SDimitry Andric continue;
58620b57cec5SDimitry Andric
58630b57cec5SDimitry Andric // Check if this block is inside a loop.
58640b57cec5SDimitry Andric Loop *L = LI->getLoopFor(I->getParent());
58650b57cec5SDimitry Andric if (!L)
58660b57cec5SDimitry Andric continue;
58670b57cec5SDimitry Andric
58680b57cec5SDimitry Andric // Check if it has a preheader.
58690b57cec5SDimitry Andric BasicBlock *PreHeader = L->getLoopPreheader();
58700b57cec5SDimitry Andric if (!PreHeader)
58710b57cec5SDimitry Andric continue;
58720b57cec5SDimitry Andric
58730b57cec5SDimitry Andric // If the vector or the element that we insert into it are
58740b57cec5SDimitry Andric // instructions that are defined in this basic block then we can't
58750b57cec5SDimitry Andric // hoist this instruction.
58760b57cec5SDimitry Andric auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
58770b57cec5SDimitry Andric auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
58780b57cec5SDimitry Andric if (Op0 && L->contains(Op0))
58790b57cec5SDimitry Andric continue;
58800b57cec5SDimitry Andric if (Op1 && L->contains(Op1))
58810b57cec5SDimitry Andric continue;
58820b57cec5SDimitry Andric
58830b57cec5SDimitry Andric // We can hoist this instruction. Move it to the pre-header.
58840b57cec5SDimitry Andric I->moveBefore(PreHeader->getTerminator());
58850b57cec5SDimitry Andric }
58860b57cec5SDimitry Andric
58870b57cec5SDimitry Andric // Make a list of all reachable blocks in our CSE queue.
58880b57cec5SDimitry Andric SmallVector<const DomTreeNode *, 8> CSEWorkList;
58890b57cec5SDimitry Andric CSEWorkList.reserve(CSEBlocks.size());
58900b57cec5SDimitry Andric for (BasicBlock *BB : CSEBlocks)
58910b57cec5SDimitry Andric if (DomTreeNode *N = DT->getNode(BB)) {
58920b57cec5SDimitry Andric assert(DT->isReachableFromEntry(N));
58930b57cec5SDimitry Andric CSEWorkList.push_back(N);
58940b57cec5SDimitry Andric }
58950b57cec5SDimitry Andric
58960b57cec5SDimitry Andric // Sort blocks by domination. This ensures we visit a block after all blocks
58970b57cec5SDimitry Andric // dominating it are visited.
58985f7ddb14SDimitry Andric llvm::sort(CSEWorkList, [](const DomTreeNode *A, const DomTreeNode *B) {
58995f7ddb14SDimitry Andric assert((A == B) == (A->getDFSNumIn() == B->getDFSNumIn()) &&
59005f7ddb14SDimitry Andric "Different nodes should have different DFS numbers");
59015f7ddb14SDimitry Andric return A->getDFSNumIn() < B->getDFSNumIn();
59020b57cec5SDimitry Andric });
59030b57cec5SDimitry Andric
59040b57cec5SDimitry Andric // Perform O(N^2) search over the gather sequences and merge identical
59050b57cec5SDimitry Andric // instructions. TODO: We can further optimize this scan if we split the
59060b57cec5SDimitry Andric // instructions into different buckets based on the insert lane.
59070b57cec5SDimitry Andric SmallVector<Instruction *, 16> Visited;
59080b57cec5SDimitry Andric for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
5909af732203SDimitry Andric assert(*I &&
5910af732203SDimitry Andric (I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
59110b57cec5SDimitry Andric "Worklist not sorted properly!");
59120b57cec5SDimitry Andric BasicBlock *BB = (*I)->getBlock();
59130b57cec5SDimitry Andric // For all instructions in blocks containing gather sequences:
59140b57cec5SDimitry Andric for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
59150b57cec5SDimitry Andric Instruction *In = &*it++;
59168bcb0991SDimitry Andric if (isDeleted(In))
59178bcb0991SDimitry Andric continue;
59180b57cec5SDimitry Andric if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
59190b57cec5SDimitry Andric continue;
59200b57cec5SDimitry Andric
59210b57cec5SDimitry Andric // Check if we can replace this instruction with any of the
59220b57cec5SDimitry Andric // visited instructions.
59230b57cec5SDimitry Andric for (Instruction *v : Visited) {
59240b57cec5SDimitry Andric if (In->isIdenticalTo(v) &&
59250b57cec5SDimitry Andric DT->dominates(v->getParent(), In->getParent())) {
59260b57cec5SDimitry Andric In->replaceAllUsesWith(v);
59270b57cec5SDimitry Andric eraseInstruction(In);
59280b57cec5SDimitry Andric In = nullptr;
59290b57cec5SDimitry Andric break;
59300b57cec5SDimitry Andric }
59310b57cec5SDimitry Andric }
59320b57cec5SDimitry Andric if (In) {
59330b57cec5SDimitry Andric assert(!is_contained(Visited, In));
59340b57cec5SDimitry Andric Visited.push_back(In);
59350b57cec5SDimitry Andric }
59360b57cec5SDimitry Andric }
59370b57cec5SDimitry Andric }
59380b57cec5SDimitry Andric CSEBlocks.clear();
59390b57cec5SDimitry Andric GatherSeq.clear();
59400b57cec5SDimitry Andric }
59410b57cec5SDimitry Andric
59420b57cec5SDimitry Andric // Groups the instructions to a bundle (which is then a single scheduling entity)
59430b57cec5SDimitry Andric // and schedules instructions until the bundle gets ready.
59448bcb0991SDimitry Andric Optional<BoUpSLP::ScheduleData *>
tryScheduleBundle(ArrayRef<Value * > VL,BoUpSLP * SLP,const InstructionsState & S)59458bcb0991SDimitry Andric BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
59460b57cec5SDimitry Andric const InstructionsState &S) {
59475f7ddb14SDimitry Andric if (isa<PHINode>(S.OpValue) || isa<InsertElementInst>(S.OpValue))
59488bcb0991SDimitry Andric return nullptr;
59490b57cec5SDimitry Andric
59500b57cec5SDimitry Andric // Initialize the instruction bundle.
59510b57cec5SDimitry Andric Instruction *OldScheduleEnd = ScheduleEnd;
59520b57cec5SDimitry Andric ScheduleData *PrevInBundle = nullptr;
59530b57cec5SDimitry Andric ScheduleData *Bundle = nullptr;
59540b57cec5SDimitry Andric bool ReSchedule = false;
59550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n");
59560b57cec5SDimitry Andric
59575f7ddb14SDimitry Andric auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule,
59585f7ddb14SDimitry Andric ScheduleData *Bundle) {
59595f7ddb14SDimitry Andric // The scheduling region got new instructions at the lower end (or it is a
59605f7ddb14SDimitry Andric // new region for the first bundle). This makes it necessary to
59615f7ddb14SDimitry Andric // recalculate all dependencies.
59625f7ddb14SDimitry Andric // It is seldom that this needs to be done a second time after adding the
59635f7ddb14SDimitry Andric // initial bundle to the region.
59645f7ddb14SDimitry Andric if (ScheduleEnd != OldScheduleEnd) {
59655f7ddb14SDimitry Andric for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode())
59665f7ddb14SDimitry Andric doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
59675f7ddb14SDimitry Andric ReSchedule = true;
59685f7ddb14SDimitry Andric }
59695f7ddb14SDimitry Andric if (ReSchedule) {
59705f7ddb14SDimitry Andric resetSchedule();
59715f7ddb14SDimitry Andric initialFillReadyList(ReadyInsts);
59725f7ddb14SDimitry Andric }
59735f7ddb14SDimitry Andric if (Bundle) {
59745f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
59755f7ddb14SDimitry Andric << " in block " << BB->getName() << "\n");
59765f7ddb14SDimitry Andric calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
59775f7ddb14SDimitry Andric }
59785f7ddb14SDimitry Andric
59795f7ddb14SDimitry Andric // Now try to schedule the new bundle or (if no bundle) just calculate
59805f7ddb14SDimitry Andric // dependencies. As soon as the bundle is "ready" it means that there are no
59815f7ddb14SDimitry Andric // cyclic dependencies and we can schedule it. Note that's important that we
59825f7ddb14SDimitry Andric // don't "schedule" the bundle yet (see cancelScheduling).
59835f7ddb14SDimitry Andric while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
59845f7ddb14SDimitry Andric !ReadyInsts.empty()) {
59855f7ddb14SDimitry Andric ScheduleData *Picked = ReadyInsts.pop_back_val();
59865f7ddb14SDimitry Andric if (Picked->isSchedulingEntity() && Picked->isReady())
59875f7ddb14SDimitry Andric schedule(Picked, ReadyInsts);
59885f7ddb14SDimitry Andric }
59895f7ddb14SDimitry Andric };
59905f7ddb14SDimitry Andric
59910b57cec5SDimitry Andric // Make sure that the scheduling region contains all
59920b57cec5SDimitry Andric // instructions of the bundle.
59930b57cec5SDimitry Andric for (Value *V : VL) {
59945f7ddb14SDimitry Andric if (!extendSchedulingRegion(V, S)) {
59955f7ddb14SDimitry Andric // If the scheduling region got new instructions at the lower end (or it
59965f7ddb14SDimitry Andric // is a new region for the first bundle). This makes it necessary to
59975f7ddb14SDimitry Andric // recalculate all dependencies.
59985f7ddb14SDimitry Andric // Otherwise the compiler may crash trying to incorrectly calculate
59995f7ddb14SDimitry Andric // dependencies and emit instruction in the wrong order at the actual
60005f7ddb14SDimitry Andric // scheduling.
60015f7ddb14SDimitry Andric TryScheduleBundle(/*ReSchedule=*/false, nullptr);
60028bcb0991SDimitry Andric return None;
60030b57cec5SDimitry Andric }
60045f7ddb14SDimitry Andric }
60050b57cec5SDimitry Andric
60060b57cec5SDimitry Andric for (Value *V : VL) {
60070b57cec5SDimitry Andric ScheduleData *BundleMember = getScheduleData(V);
60080b57cec5SDimitry Andric assert(BundleMember &&
60090b57cec5SDimitry Andric "no ScheduleData for bundle member (maybe not in same basic block)");
60100b57cec5SDimitry Andric if (BundleMember->IsScheduled) {
60110b57cec5SDimitry Andric // A bundle member was scheduled as single instruction before and now
60120b57cec5SDimitry Andric // needs to be scheduled as part of the bundle. We just get rid of the
60130b57cec5SDimitry Andric // existing schedule.
60140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember
60150b57cec5SDimitry Andric << " was already scheduled\n");
60160b57cec5SDimitry Andric ReSchedule = true;
60170b57cec5SDimitry Andric }
60180b57cec5SDimitry Andric assert(BundleMember->isSchedulingEntity() &&
60190b57cec5SDimitry Andric "bundle member already part of other bundle");
60200b57cec5SDimitry Andric if (PrevInBundle) {
60210b57cec5SDimitry Andric PrevInBundle->NextInBundle = BundleMember;
60220b57cec5SDimitry Andric } else {
60230b57cec5SDimitry Andric Bundle = BundleMember;
60240b57cec5SDimitry Andric }
60250b57cec5SDimitry Andric BundleMember->UnscheduledDepsInBundle = 0;
60260b57cec5SDimitry Andric Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
60270b57cec5SDimitry Andric
60280b57cec5SDimitry Andric // Group the instructions to a bundle.
60290b57cec5SDimitry Andric BundleMember->FirstInBundle = Bundle;
60300b57cec5SDimitry Andric PrevInBundle = BundleMember;
60310b57cec5SDimitry Andric }
60328bcb0991SDimitry Andric assert(Bundle && "Failed to find schedule bundle");
60335f7ddb14SDimitry Andric TryScheduleBundle(ReSchedule, Bundle);
60340b57cec5SDimitry Andric if (!Bundle->isReady()) {
60350b57cec5SDimitry Andric cancelScheduling(VL, S.OpValue);
60368bcb0991SDimitry Andric return None;
60370b57cec5SDimitry Andric }
60388bcb0991SDimitry Andric return Bundle;
60390b57cec5SDimitry Andric }
60400b57cec5SDimitry Andric
cancelScheduling(ArrayRef<Value * > VL,Value * OpValue)60410b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
60420b57cec5SDimitry Andric Value *OpValue) {
60435f7ddb14SDimitry Andric if (isa<PHINode>(OpValue) || isa<InsertElementInst>(OpValue))
60440b57cec5SDimitry Andric return;
60450b57cec5SDimitry Andric
60460b57cec5SDimitry Andric ScheduleData *Bundle = getScheduleData(OpValue);
60470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
60480b57cec5SDimitry Andric assert(!Bundle->IsScheduled &&
60490b57cec5SDimitry Andric "Can't cancel bundle which is already scheduled");
60500b57cec5SDimitry Andric assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
60510b57cec5SDimitry Andric "tried to unbundle something which is not a bundle");
60520b57cec5SDimitry Andric
60530b57cec5SDimitry Andric // Un-bundle: make single instructions out of the bundle.
60540b57cec5SDimitry Andric ScheduleData *BundleMember = Bundle;
60550b57cec5SDimitry Andric while (BundleMember) {
60560b57cec5SDimitry Andric assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
60570b57cec5SDimitry Andric BundleMember->FirstInBundle = BundleMember;
60580b57cec5SDimitry Andric ScheduleData *Next = BundleMember->NextInBundle;
60590b57cec5SDimitry Andric BundleMember->NextInBundle = nullptr;
60600b57cec5SDimitry Andric BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
60610b57cec5SDimitry Andric if (BundleMember->UnscheduledDepsInBundle == 0) {
60620b57cec5SDimitry Andric ReadyInsts.insert(BundleMember);
60630b57cec5SDimitry Andric }
60640b57cec5SDimitry Andric BundleMember = Next;
60650b57cec5SDimitry Andric }
60660b57cec5SDimitry Andric }
60670b57cec5SDimitry Andric
allocateScheduleDataChunks()60680b57cec5SDimitry Andric BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
60690b57cec5SDimitry Andric // Allocate a new ScheduleData for the instruction.
60700b57cec5SDimitry Andric if (ChunkPos >= ChunkSize) {
60718bcb0991SDimitry Andric ScheduleDataChunks.push_back(std::make_unique<ScheduleData[]>(ChunkSize));
60720b57cec5SDimitry Andric ChunkPos = 0;
60730b57cec5SDimitry Andric }
60740b57cec5SDimitry Andric return &(ScheduleDataChunks.back()[ChunkPos++]);
60750b57cec5SDimitry Andric }
60760b57cec5SDimitry Andric
extendSchedulingRegion(Value * V,const InstructionsState & S)60770b57cec5SDimitry Andric bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
60780b57cec5SDimitry Andric const InstructionsState &S) {
60790b57cec5SDimitry Andric if (getScheduleData(V, isOneOf(S, V)))
60800b57cec5SDimitry Andric return true;
60810b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(V);
60820b57cec5SDimitry Andric assert(I && "bundle member must be an instruction");
60835f7ddb14SDimitry Andric assert(!isa<PHINode>(I) && !isa<InsertElementInst>(I) &&
60845f7ddb14SDimitry Andric "phi nodes/insertelements don't need to be scheduled");
60850b57cec5SDimitry Andric auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
60860b57cec5SDimitry Andric ScheduleData *ISD = getScheduleData(I);
60870b57cec5SDimitry Andric if (!ISD)
60880b57cec5SDimitry Andric return false;
60890b57cec5SDimitry Andric assert(isInSchedulingRegion(ISD) &&
60900b57cec5SDimitry Andric "ScheduleData not in scheduling region");
60910b57cec5SDimitry Andric ScheduleData *SD = allocateScheduleDataChunks();
60920b57cec5SDimitry Andric SD->Inst = I;
60930b57cec5SDimitry Andric SD->init(SchedulingRegionID, S.OpValue);
60940b57cec5SDimitry Andric ExtraScheduleDataMap[I][S.OpValue] = SD;
60950b57cec5SDimitry Andric return true;
60960b57cec5SDimitry Andric };
60970b57cec5SDimitry Andric if (CheckSheduleForI(I))
60980b57cec5SDimitry Andric return true;
60990b57cec5SDimitry Andric if (!ScheduleStart) {
61000b57cec5SDimitry Andric // It's the first instruction in the new region.
61010b57cec5SDimitry Andric initScheduleData(I, I->getNextNode(), nullptr, nullptr);
61020b57cec5SDimitry Andric ScheduleStart = I;
61030b57cec5SDimitry Andric ScheduleEnd = I->getNextNode();
61040b57cec5SDimitry Andric if (isOneOf(S, I) != I)
61050b57cec5SDimitry Andric CheckSheduleForI(I);
61060b57cec5SDimitry Andric assert(ScheduleEnd && "tried to vectorize a terminator?");
61070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
61080b57cec5SDimitry Andric return true;
61090b57cec5SDimitry Andric }
61100b57cec5SDimitry Andric // Search up and down at the same time, because we don't know if the new
61110b57cec5SDimitry Andric // instruction is above or below the existing scheduling region.
61120b57cec5SDimitry Andric BasicBlock::reverse_iterator UpIter =
61130b57cec5SDimitry Andric ++ScheduleStart->getIterator().getReverse();
61140b57cec5SDimitry Andric BasicBlock::reverse_iterator UpperEnd = BB->rend();
61150b57cec5SDimitry Andric BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
61160b57cec5SDimitry Andric BasicBlock::iterator LowerEnd = BB->end();
61175f7ddb14SDimitry Andric while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
61185f7ddb14SDimitry Andric &*DownIter != I) {
61190b57cec5SDimitry Andric if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
61200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
61210b57cec5SDimitry Andric return false;
61220b57cec5SDimitry Andric }
61230b57cec5SDimitry Andric
61245f7ddb14SDimitry Andric ++UpIter;
61255f7ddb14SDimitry Andric ++DownIter;
61265f7ddb14SDimitry Andric }
61275f7ddb14SDimitry Andric if (DownIter == LowerEnd || (UpIter != UpperEnd && &*UpIter == I)) {
61285f7ddb14SDimitry Andric assert(I->getParent() == ScheduleStart->getParent() &&
61295f7ddb14SDimitry Andric "Instruction is in wrong basic block.");
61300b57cec5SDimitry Andric initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
61310b57cec5SDimitry Andric ScheduleStart = I;
61320b57cec5SDimitry Andric if (isOneOf(S, I) != I)
61330b57cec5SDimitry Andric CheckSheduleForI(I);
61340b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I
61350b57cec5SDimitry Andric << "\n");
61360b57cec5SDimitry Andric return true;
61370b57cec5SDimitry Andric }
61385f7ddb14SDimitry Andric assert((UpIter == UpperEnd || (DownIter != LowerEnd && &*DownIter == I)) &&
61395f7ddb14SDimitry Andric "Expected to reach top of the basic block or instruction down the "
61405f7ddb14SDimitry Andric "lower end.");
61415f7ddb14SDimitry Andric assert(I->getParent() == ScheduleEnd->getParent() &&
61425f7ddb14SDimitry Andric "Instruction is in wrong basic block.");
61430b57cec5SDimitry Andric initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
61440b57cec5SDimitry Andric nullptr);
61450b57cec5SDimitry Andric ScheduleEnd = I->getNextNode();
61460b57cec5SDimitry Andric if (isOneOf(S, I) != I)
61470b57cec5SDimitry Andric CheckSheduleForI(I);
61480b57cec5SDimitry Andric assert(ScheduleEnd && "tried to vectorize a terminator?");
61495f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
61500b57cec5SDimitry Andric return true;
61510b57cec5SDimitry Andric }
61520b57cec5SDimitry Andric
initScheduleData(Instruction * FromI,Instruction * ToI,ScheduleData * PrevLoadStore,ScheduleData * NextLoadStore)61530b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
61540b57cec5SDimitry Andric Instruction *ToI,
61550b57cec5SDimitry Andric ScheduleData *PrevLoadStore,
61560b57cec5SDimitry Andric ScheduleData *NextLoadStore) {
61570b57cec5SDimitry Andric ScheduleData *CurrentLoadStore = PrevLoadStore;
61580b57cec5SDimitry Andric for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
61590b57cec5SDimitry Andric ScheduleData *SD = ScheduleDataMap[I];
61600b57cec5SDimitry Andric if (!SD) {
61610b57cec5SDimitry Andric SD = allocateScheduleDataChunks();
61620b57cec5SDimitry Andric ScheduleDataMap[I] = SD;
61630b57cec5SDimitry Andric SD->Inst = I;
61640b57cec5SDimitry Andric }
61650b57cec5SDimitry Andric assert(!isInSchedulingRegion(SD) &&
61660b57cec5SDimitry Andric "new ScheduleData already in scheduling region");
61670b57cec5SDimitry Andric SD->init(SchedulingRegionID, I);
61680b57cec5SDimitry Andric
61690b57cec5SDimitry Andric if (I->mayReadOrWriteMemory() &&
61700b57cec5SDimitry Andric (!isa<IntrinsicInst>(I) ||
6171af732203SDimitry Andric (cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect &&
6172af732203SDimitry Andric cast<IntrinsicInst>(I)->getIntrinsicID() !=
6173af732203SDimitry Andric Intrinsic::pseudoprobe))) {
61740b57cec5SDimitry Andric // Update the linked list of memory accessing instructions.
61750b57cec5SDimitry Andric if (CurrentLoadStore) {
61760b57cec5SDimitry Andric CurrentLoadStore->NextLoadStore = SD;
61770b57cec5SDimitry Andric } else {
61780b57cec5SDimitry Andric FirstLoadStoreInRegion = SD;
61790b57cec5SDimitry Andric }
61800b57cec5SDimitry Andric CurrentLoadStore = SD;
61810b57cec5SDimitry Andric }
61820b57cec5SDimitry Andric }
61830b57cec5SDimitry Andric if (NextLoadStore) {
61840b57cec5SDimitry Andric if (CurrentLoadStore)
61850b57cec5SDimitry Andric CurrentLoadStore->NextLoadStore = NextLoadStore;
61860b57cec5SDimitry Andric } else {
61870b57cec5SDimitry Andric LastLoadStoreInRegion = CurrentLoadStore;
61880b57cec5SDimitry Andric }
61890b57cec5SDimitry Andric }
61900b57cec5SDimitry Andric
calculateDependencies(ScheduleData * SD,bool InsertInReadyList,BoUpSLP * SLP)61910b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
61920b57cec5SDimitry Andric bool InsertInReadyList,
61930b57cec5SDimitry Andric BoUpSLP *SLP) {
61940b57cec5SDimitry Andric assert(SD->isSchedulingEntity());
61950b57cec5SDimitry Andric
61960b57cec5SDimitry Andric SmallVector<ScheduleData *, 10> WorkList;
61970b57cec5SDimitry Andric WorkList.push_back(SD);
61980b57cec5SDimitry Andric
61990b57cec5SDimitry Andric while (!WorkList.empty()) {
6200af732203SDimitry Andric ScheduleData *SD = WorkList.pop_back_val();
62010b57cec5SDimitry Andric
62020b57cec5SDimitry Andric ScheduleData *BundleMember = SD;
62030b57cec5SDimitry Andric while (BundleMember) {
62040b57cec5SDimitry Andric assert(isInSchedulingRegion(BundleMember));
62050b57cec5SDimitry Andric if (!BundleMember->hasValidDependencies()) {
62060b57cec5SDimitry Andric
62070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember
62080b57cec5SDimitry Andric << "\n");
62090b57cec5SDimitry Andric BundleMember->Dependencies = 0;
62100b57cec5SDimitry Andric BundleMember->resetUnscheduledDeps();
62110b57cec5SDimitry Andric
62120b57cec5SDimitry Andric // Handle def-use chain dependencies.
62130b57cec5SDimitry Andric if (BundleMember->OpValue != BundleMember->Inst) {
62140b57cec5SDimitry Andric ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
62150b57cec5SDimitry Andric if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
62160b57cec5SDimitry Andric BundleMember->Dependencies++;
62170b57cec5SDimitry Andric ScheduleData *DestBundle = UseSD->FirstInBundle;
62180b57cec5SDimitry Andric if (!DestBundle->IsScheduled)
62190b57cec5SDimitry Andric BundleMember->incrementUnscheduledDeps(1);
62200b57cec5SDimitry Andric if (!DestBundle->hasValidDependencies())
62210b57cec5SDimitry Andric WorkList.push_back(DestBundle);
62220b57cec5SDimitry Andric }
62230b57cec5SDimitry Andric } else {
62240b57cec5SDimitry Andric for (User *U : BundleMember->Inst->users()) {
62250b57cec5SDimitry Andric if (isa<Instruction>(U)) {
62260b57cec5SDimitry Andric ScheduleData *UseSD = getScheduleData(U);
62270b57cec5SDimitry Andric if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
62280b57cec5SDimitry Andric BundleMember->Dependencies++;
62290b57cec5SDimitry Andric ScheduleData *DestBundle = UseSD->FirstInBundle;
62300b57cec5SDimitry Andric if (!DestBundle->IsScheduled)
62310b57cec5SDimitry Andric BundleMember->incrementUnscheduledDeps(1);
62320b57cec5SDimitry Andric if (!DestBundle->hasValidDependencies())
62330b57cec5SDimitry Andric WorkList.push_back(DestBundle);
62340b57cec5SDimitry Andric }
62350b57cec5SDimitry Andric } else {
62360b57cec5SDimitry Andric // I'm not sure if this can ever happen. But we need to be safe.
62370b57cec5SDimitry Andric // This lets the instruction/bundle never be scheduled and
62380b57cec5SDimitry Andric // eventually disable vectorization.
62390b57cec5SDimitry Andric BundleMember->Dependencies++;
62400b57cec5SDimitry Andric BundleMember->incrementUnscheduledDeps(1);
62410b57cec5SDimitry Andric }
62420b57cec5SDimitry Andric }
62430b57cec5SDimitry Andric }
62440b57cec5SDimitry Andric
62450b57cec5SDimitry Andric // Handle the memory dependencies.
62460b57cec5SDimitry Andric ScheduleData *DepDest = BundleMember->NextLoadStore;
62470b57cec5SDimitry Andric if (DepDest) {
62480b57cec5SDimitry Andric Instruction *SrcInst = BundleMember->Inst;
62490b57cec5SDimitry Andric MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
62500b57cec5SDimitry Andric bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
62510b57cec5SDimitry Andric unsigned numAliased = 0;
62520b57cec5SDimitry Andric unsigned DistToSrc = 1;
62530b57cec5SDimitry Andric
62540b57cec5SDimitry Andric while (DepDest) {
62550b57cec5SDimitry Andric assert(isInSchedulingRegion(DepDest));
62560b57cec5SDimitry Andric
62570b57cec5SDimitry Andric // We have two limits to reduce the complexity:
62580b57cec5SDimitry Andric // 1) AliasedCheckLimit: It's a small limit to reduce calls to
62590b57cec5SDimitry Andric // SLP->isAliased (which is the expensive part in this loop).
62600b57cec5SDimitry Andric // 2) MaxMemDepDistance: It's for very large blocks and it aborts
62610b57cec5SDimitry Andric // the whole loop (even if the loop is fast, it's quadratic).
62620b57cec5SDimitry Andric // It's important for the loop break condition (see below) to
62630b57cec5SDimitry Andric // check this limit even between two read-only instructions.
62640b57cec5SDimitry Andric if (DistToSrc >= MaxMemDepDistance ||
62650b57cec5SDimitry Andric ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
62660b57cec5SDimitry Andric (numAliased >= AliasedCheckLimit ||
62670b57cec5SDimitry Andric SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
62680b57cec5SDimitry Andric
62690b57cec5SDimitry Andric // We increment the counter only if the locations are aliased
62700b57cec5SDimitry Andric // (instead of counting all alias checks). This gives a better
62710b57cec5SDimitry Andric // balance between reduced runtime and accurate dependencies.
62720b57cec5SDimitry Andric numAliased++;
62730b57cec5SDimitry Andric
62740b57cec5SDimitry Andric DepDest->MemoryDependencies.push_back(BundleMember);
62750b57cec5SDimitry Andric BundleMember->Dependencies++;
62760b57cec5SDimitry Andric ScheduleData *DestBundle = DepDest->FirstInBundle;
62770b57cec5SDimitry Andric if (!DestBundle->IsScheduled) {
62780b57cec5SDimitry Andric BundleMember->incrementUnscheduledDeps(1);
62790b57cec5SDimitry Andric }
62800b57cec5SDimitry Andric if (!DestBundle->hasValidDependencies()) {
62810b57cec5SDimitry Andric WorkList.push_back(DestBundle);
62820b57cec5SDimitry Andric }
62830b57cec5SDimitry Andric }
62840b57cec5SDimitry Andric DepDest = DepDest->NextLoadStore;
62850b57cec5SDimitry Andric
62860b57cec5SDimitry Andric // Example, explaining the loop break condition: Let's assume our
62870b57cec5SDimitry Andric // starting instruction is i0 and MaxMemDepDistance = 3.
62880b57cec5SDimitry Andric //
62890b57cec5SDimitry Andric // +--------v--v--v
62900b57cec5SDimitry Andric // i0,i1,i2,i3,i4,i5,i6,i7,i8
62910b57cec5SDimitry Andric // +--------^--^--^
62920b57cec5SDimitry Andric //
62930b57cec5SDimitry Andric // MaxMemDepDistance let us stop alias-checking at i3 and we add
62940b57cec5SDimitry Andric // dependencies from i0 to i3,i4,.. (even if they are not aliased).
62950b57cec5SDimitry Andric // Previously we already added dependencies from i3 to i6,i7,i8
62960b57cec5SDimitry Andric // (because of MaxMemDepDistance). As we added a dependency from
62970b57cec5SDimitry Andric // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
62980b57cec5SDimitry Andric // and we can abort this loop at i6.
62990b57cec5SDimitry Andric if (DistToSrc >= 2 * MaxMemDepDistance)
63000b57cec5SDimitry Andric break;
63010b57cec5SDimitry Andric DistToSrc++;
63020b57cec5SDimitry Andric }
63030b57cec5SDimitry Andric }
63040b57cec5SDimitry Andric }
63050b57cec5SDimitry Andric BundleMember = BundleMember->NextInBundle;
63060b57cec5SDimitry Andric }
63070b57cec5SDimitry Andric if (InsertInReadyList && SD->isReady()) {
63080b57cec5SDimitry Andric ReadyInsts.push_back(SD);
63090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst
63100b57cec5SDimitry Andric << "\n");
63110b57cec5SDimitry Andric }
63120b57cec5SDimitry Andric }
63130b57cec5SDimitry Andric }
63140b57cec5SDimitry Andric
resetSchedule()63150b57cec5SDimitry Andric void BoUpSLP::BlockScheduling::resetSchedule() {
63160b57cec5SDimitry Andric assert(ScheduleStart &&
63170b57cec5SDimitry Andric "tried to reset schedule on block which has not been scheduled");
63180b57cec5SDimitry Andric for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
63190b57cec5SDimitry Andric doForAllOpcodes(I, [&](ScheduleData *SD) {
63200b57cec5SDimitry Andric assert(isInSchedulingRegion(SD) &&
63210b57cec5SDimitry Andric "ScheduleData not in scheduling region");
63220b57cec5SDimitry Andric SD->IsScheduled = false;
63230b57cec5SDimitry Andric SD->resetUnscheduledDeps();
63240b57cec5SDimitry Andric });
63250b57cec5SDimitry Andric }
63260b57cec5SDimitry Andric ReadyInsts.clear();
63270b57cec5SDimitry Andric }
63280b57cec5SDimitry Andric
scheduleBlock(BlockScheduling * BS)63290b57cec5SDimitry Andric void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
63300b57cec5SDimitry Andric if (!BS->ScheduleStart)
63310b57cec5SDimitry Andric return;
63320b57cec5SDimitry Andric
63330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
63340b57cec5SDimitry Andric
63350b57cec5SDimitry Andric BS->resetSchedule();
63360b57cec5SDimitry Andric
63370b57cec5SDimitry Andric // For the real scheduling we use a more sophisticated ready-list: it is
63380b57cec5SDimitry Andric // sorted by the original instruction location. This lets the final schedule
63390b57cec5SDimitry Andric // be as close as possible to the original instruction order.
63400b57cec5SDimitry Andric struct ScheduleDataCompare {
63410b57cec5SDimitry Andric bool operator()(ScheduleData *SD1, ScheduleData *SD2) const {
63420b57cec5SDimitry Andric return SD2->SchedulingPriority < SD1->SchedulingPriority;
63430b57cec5SDimitry Andric }
63440b57cec5SDimitry Andric };
63450b57cec5SDimitry Andric std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
63460b57cec5SDimitry Andric
63470b57cec5SDimitry Andric // Ensure that all dependency data is updated and fill the ready-list with
63480b57cec5SDimitry Andric // initial instructions.
63490b57cec5SDimitry Andric int Idx = 0;
63500b57cec5SDimitry Andric int NumToSchedule = 0;
63510b57cec5SDimitry Andric for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
63520b57cec5SDimitry Andric I = I->getNextNode()) {
63530b57cec5SDimitry Andric BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
63545f7ddb14SDimitry Andric assert((isa<InsertElementInst>(SD->Inst) ||
63555f7ddb14SDimitry Andric SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
63560b57cec5SDimitry Andric "scheduler and vectorizer bundle mismatch");
63570b57cec5SDimitry Andric SD->FirstInBundle->SchedulingPriority = Idx++;
63580b57cec5SDimitry Andric if (SD->isSchedulingEntity()) {
63590b57cec5SDimitry Andric BS->calculateDependencies(SD, false, this);
63600b57cec5SDimitry Andric NumToSchedule++;
63610b57cec5SDimitry Andric }
63620b57cec5SDimitry Andric });
63630b57cec5SDimitry Andric }
63640b57cec5SDimitry Andric BS->initialFillReadyList(ReadyInsts);
63650b57cec5SDimitry Andric
63660b57cec5SDimitry Andric Instruction *LastScheduledInst = BS->ScheduleEnd;
63670b57cec5SDimitry Andric
63680b57cec5SDimitry Andric // Do the "real" scheduling.
63690b57cec5SDimitry Andric while (!ReadyInsts.empty()) {
63700b57cec5SDimitry Andric ScheduleData *picked = *ReadyInsts.begin();
63710b57cec5SDimitry Andric ReadyInsts.erase(ReadyInsts.begin());
63720b57cec5SDimitry Andric
63730b57cec5SDimitry Andric // Move the scheduled instruction(s) to their dedicated places, if not
63740b57cec5SDimitry Andric // there yet.
63750b57cec5SDimitry Andric ScheduleData *BundleMember = picked;
63760b57cec5SDimitry Andric while (BundleMember) {
63770b57cec5SDimitry Andric Instruction *pickedInst = BundleMember->Inst;
63785f7ddb14SDimitry Andric if (pickedInst->getNextNode() != LastScheduledInst) {
63790b57cec5SDimitry Andric BS->BB->getInstList().remove(pickedInst);
63800b57cec5SDimitry Andric BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
63810b57cec5SDimitry Andric pickedInst);
63820b57cec5SDimitry Andric }
63830b57cec5SDimitry Andric LastScheduledInst = pickedInst;
63840b57cec5SDimitry Andric BundleMember = BundleMember->NextInBundle;
63850b57cec5SDimitry Andric }
63860b57cec5SDimitry Andric
63870b57cec5SDimitry Andric BS->schedule(picked, ReadyInsts);
63880b57cec5SDimitry Andric NumToSchedule--;
63890b57cec5SDimitry Andric }
63900b57cec5SDimitry Andric assert(NumToSchedule == 0 && "could not schedule all instructions");
63910b57cec5SDimitry Andric
63920b57cec5SDimitry Andric // Avoid duplicate scheduling of the block.
63930b57cec5SDimitry Andric BS->ScheduleStart = nullptr;
63940b57cec5SDimitry Andric }
63950b57cec5SDimitry Andric
getVectorElementSize(Value * V)63965ffd83dbSDimitry Andric unsigned BoUpSLP::getVectorElementSize(Value *V) {
6397af732203SDimitry Andric // If V is a store, just return the width of the stored value (or value
6398af732203SDimitry Andric // truncated just before storing) without traversing the expression tree.
6399af732203SDimitry Andric // This is the common case.
6400af732203SDimitry Andric if (auto *Store = dyn_cast<StoreInst>(V)) {
6401af732203SDimitry Andric if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
6402af732203SDimitry Andric return DL->getTypeSizeInBits(Trunc->getSrcTy());
64030b57cec5SDimitry Andric return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
6404af732203SDimitry Andric }
64050b57cec5SDimitry Andric
64065f7ddb14SDimitry Andric if (auto *IEI = dyn_cast<InsertElementInst>(V))
64075f7ddb14SDimitry Andric return getVectorElementSize(IEI->getOperand(1));
64085f7ddb14SDimitry Andric
64095ffd83dbSDimitry Andric auto E = InstrElementSize.find(V);
64105ffd83dbSDimitry Andric if (E != InstrElementSize.end())
64115ffd83dbSDimitry Andric return E->second;
64125ffd83dbSDimitry Andric
64130b57cec5SDimitry Andric // If V is not a store, we can traverse the expression tree to find loads
64140b57cec5SDimitry Andric // that feed it. The type of the loaded value may indicate a more suitable
64150b57cec5SDimitry Andric // width than V's type. We want to base the vector element size on the width
64160b57cec5SDimitry Andric // of memory operations where possible.
64175f7ddb14SDimitry Andric SmallVector<std::pair<Instruction *, BasicBlock *>, 16> Worklist;
64180b57cec5SDimitry Andric SmallPtrSet<Instruction *, 16> Visited;
64195ffd83dbSDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) {
64205f7ddb14SDimitry Andric Worklist.emplace_back(I, I->getParent());
64215ffd83dbSDimitry Andric Visited.insert(I);
64225ffd83dbSDimitry Andric }
64230b57cec5SDimitry Andric
64240b57cec5SDimitry Andric // Traverse the expression tree in bottom-up order looking for loads. If we
64250b57cec5SDimitry Andric // encounter an instruction we don't yet handle, we give up.
64265f7ddb14SDimitry Andric auto Width = 0u;
64275f7ddb14SDimitry Andric while (!Worklist.empty()) {
64285f7ddb14SDimitry Andric Instruction *I;
64295f7ddb14SDimitry Andric BasicBlock *Parent;
64305f7ddb14SDimitry Andric std::tie(I, Parent) = Worklist.pop_back_val();
64310b57cec5SDimitry Andric
64320b57cec5SDimitry Andric // We should only be looking at scalar instructions here. If the current
64335f7ddb14SDimitry Andric // instruction has a vector type, skip.
64340b57cec5SDimitry Andric auto *Ty = I->getType();
64350b57cec5SDimitry Andric if (isa<VectorType>(Ty))
64365f7ddb14SDimitry Andric continue;
64370b57cec5SDimitry Andric
64380b57cec5SDimitry Andric // If the current instruction is a load, update MaxWidth to reflect the
64390b57cec5SDimitry Andric // width of the loaded value.
64405f7ddb14SDimitry Andric if (isa<LoadInst>(I) || isa<ExtractElementInst>(I) ||
64415f7ddb14SDimitry Andric isa<ExtractValueInst>(I))
64425f7ddb14SDimitry Andric Width = std::max<unsigned>(Width, DL->getTypeSizeInBits(Ty));
64430b57cec5SDimitry Andric
64440b57cec5SDimitry Andric // Otherwise, we need to visit the operands of the instruction. We only
64450b57cec5SDimitry Andric // handle the interesting cases from buildTree here. If an operand is an
64465f7ddb14SDimitry Andric // instruction we haven't yet visited and from the same basic block as the
64475f7ddb14SDimitry Andric // user or the use is a PHI node, we add it to the worklist.
64480b57cec5SDimitry Andric else if (isa<PHINode>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
64495f7ddb14SDimitry Andric isa<CmpInst>(I) || isa<SelectInst>(I) || isa<BinaryOperator>(I) ||
64505f7ddb14SDimitry Andric isa<UnaryOperator>(I)) {
64510b57cec5SDimitry Andric for (Use &U : I->operands())
64520b57cec5SDimitry Andric if (auto *J = dyn_cast<Instruction>(U.get()))
64535f7ddb14SDimitry Andric if (Visited.insert(J).second &&
64545f7ddb14SDimitry Andric (isa<PHINode>(I) || J->getParent() == Parent))
64555f7ddb14SDimitry Andric Worklist.emplace_back(J, J->getParent());
64565f7ddb14SDimitry Andric } else {
64575f7ddb14SDimitry Andric break;
64585f7ddb14SDimitry Andric }
64590b57cec5SDimitry Andric }
64600b57cec5SDimitry Andric
64610b57cec5SDimitry Andric // If we didn't encounter a memory access in the expression tree, or if we
64625ffd83dbSDimitry Andric // gave up for some reason, just return the width of V. Otherwise, return the
64635ffd83dbSDimitry Andric // maximum width we found.
64645f7ddb14SDimitry Andric if (!Width) {
64655f7ddb14SDimitry Andric if (auto *CI = dyn_cast<CmpInst>(V))
64665f7ddb14SDimitry Andric V = CI->getOperand(0);
64675ffd83dbSDimitry Andric Width = DL->getTypeSizeInBits(V->getType());
64685f7ddb14SDimitry Andric }
64690b57cec5SDimitry Andric
64705ffd83dbSDimitry Andric for (Instruction *I : Visited)
64715ffd83dbSDimitry Andric InstrElementSize[I] = Width;
64725ffd83dbSDimitry Andric
64735ffd83dbSDimitry Andric return Width;
64740b57cec5SDimitry Andric }
64750b57cec5SDimitry Andric
64760b57cec5SDimitry Andric // Determine if a value V in a vectorizable expression Expr can be demoted to a
64770b57cec5SDimitry Andric // smaller type with a truncation. We collect the values that will be demoted
64780b57cec5SDimitry Andric // in ToDemote and additional roots that require investigating in Roots.
collectValuesToDemote(Value * V,SmallPtrSetImpl<Value * > & Expr,SmallVectorImpl<Value * > & ToDemote,SmallVectorImpl<Value * > & Roots)64790b57cec5SDimitry Andric static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
64800b57cec5SDimitry Andric SmallVectorImpl<Value *> &ToDemote,
64810b57cec5SDimitry Andric SmallVectorImpl<Value *> &Roots) {
64820b57cec5SDimitry Andric // We can always demote constants.
64830b57cec5SDimitry Andric if (isa<Constant>(V)) {
64840b57cec5SDimitry Andric ToDemote.push_back(V);
64850b57cec5SDimitry Andric return true;
64860b57cec5SDimitry Andric }
64870b57cec5SDimitry Andric
64880b57cec5SDimitry Andric // If the value is not an instruction in the expression with only one use, it
64890b57cec5SDimitry Andric // cannot be demoted.
64900b57cec5SDimitry Andric auto *I = dyn_cast<Instruction>(V);
64910b57cec5SDimitry Andric if (!I || !I->hasOneUse() || !Expr.count(I))
64920b57cec5SDimitry Andric return false;
64930b57cec5SDimitry Andric
64940b57cec5SDimitry Andric switch (I->getOpcode()) {
64950b57cec5SDimitry Andric
64960b57cec5SDimitry Andric // We can always demote truncations and extensions. Since truncations can
64970b57cec5SDimitry Andric // seed additional demotion, we save the truncated value.
64980b57cec5SDimitry Andric case Instruction::Trunc:
64990b57cec5SDimitry Andric Roots.push_back(I->getOperand(0));
65000b57cec5SDimitry Andric break;
65010b57cec5SDimitry Andric case Instruction::ZExt:
65020b57cec5SDimitry Andric case Instruction::SExt:
65035f7ddb14SDimitry Andric if (isa<ExtractElementInst>(I->getOperand(0)) ||
65045f7ddb14SDimitry Andric isa<InsertElementInst>(I->getOperand(0)))
65055f7ddb14SDimitry Andric return false;
65060b57cec5SDimitry Andric break;
65070b57cec5SDimitry Andric
65080b57cec5SDimitry Andric // We can demote certain binary operations if we can demote both of their
65090b57cec5SDimitry Andric // operands.
65100b57cec5SDimitry Andric case Instruction::Add:
65110b57cec5SDimitry Andric case Instruction::Sub:
65120b57cec5SDimitry Andric case Instruction::Mul:
65130b57cec5SDimitry Andric case Instruction::And:
65140b57cec5SDimitry Andric case Instruction::Or:
65150b57cec5SDimitry Andric case Instruction::Xor:
65160b57cec5SDimitry Andric if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
65170b57cec5SDimitry Andric !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
65180b57cec5SDimitry Andric return false;
65190b57cec5SDimitry Andric break;
65200b57cec5SDimitry Andric
65210b57cec5SDimitry Andric // We can demote selects if we can demote their true and false values.
65220b57cec5SDimitry Andric case Instruction::Select: {
65230b57cec5SDimitry Andric SelectInst *SI = cast<SelectInst>(I);
65240b57cec5SDimitry Andric if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
65250b57cec5SDimitry Andric !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
65260b57cec5SDimitry Andric return false;
65270b57cec5SDimitry Andric break;
65280b57cec5SDimitry Andric }
65290b57cec5SDimitry Andric
65300b57cec5SDimitry Andric // We can demote phis if we can demote all their incoming operands. Note that
65310b57cec5SDimitry Andric // we don't need to worry about cycles since we ensure single use above.
65320b57cec5SDimitry Andric case Instruction::PHI: {
65330b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(I);
65340b57cec5SDimitry Andric for (Value *IncValue : PN->incoming_values())
65350b57cec5SDimitry Andric if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
65360b57cec5SDimitry Andric return false;
65370b57cec5SDimitry Andric break;
65380b57cec5SDimitry Andric }
65390b57cec5SDimitry Andric
65400b57cec5SDimitry Andric // Otherwise, conservatively give up.
65410b57cec5SDimitry Andric default:
65420b57cec5SDimitry Andric return false;
65430b57cec5SDimitry Andric }
65440b57cec5SDimitry Andric
65450b57cec5SDimitry Andric // Record the value that we can demote.
65460b57cec5SDimitry Andric ToDemote.push_back(V);
65470b57cec5SDimitry Andric return true;
65480b57cec5SDimitry Andric }
65490b57cec5SDimitry Andric
computeMinimumValueSizes()65500b57cec5SDimitry Andric void BoUpSLP::computeMinimumValueSizes() {
65510b57cec5SDimitry Andric // If there are no external uses, the expression tree must be rooted by a
65520b57cec5SDimitry Andric // store. We can't demote in-memory values, so there is nothing to do here.
65530b57cec5SDimitry Andric if (ExternalUses.empty())
65540b57cec5SDimitry Andric return;
65550b57cec5SDimitry Andric
65560b57cec5SDimitry Andric // We only attempt to truncate integer expressions.
65570b57cec5SDimitry Andric auto &TreeRoot = VectorizableTree[0]->Scalars;
65580b57cec5SDimitry Andric auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
65590b57cec5SDimitry Andric if (!TreeRootIT)
65600b57cec5SDimitry Andric return;
65610b57cec5SDimitry Andric
65620b57cec5SDimitry Andric // If the expression is not rooted by a store, these roots should have
65630b57cec5SDimitry Andric // external uses. We will rely on InstCombine to rewrite the expression in
65640b57cec5SDimitry Andric // the narrower type. However, InstCombine only rewrites single-use values.
65650b57cec5SDimitry Andric // This means that if a tree entry other than a root is used externally, it
65660b57cec5SDimitry Andric // must have multiple uses and InstCombine will not rewrite it. The code
65670b57cec5SDimitry Andric // below ensures that only the roots are used externally.
65680b57cec5SDimitry Andric SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
65690b57cec5SDimitry Andric for (auto &EU : ExternalUses)
65700b57cec5SDimitry Andric if (!Expr.erase(EU.Scalar))
65710b57cec5SDimitry Andric return;
65720b57cec5SDimitry Andric if (!Expr.empty())
65730b57cec5SDimitry Andric return;
65740b57cec5SDimitry Andric
65750b57cec5SDimitry Andric // Collect the scalar values of the vectorizable expression. We will use this
65760b57cec5SDimitry Andric // context to determine which values can be demoted. If we see a truncation,
65770b57cec5SDimitry Andric // we mark it as seeding another demotion.
65780b57cec5SDimitry Andric for (auto &EntryPtr : VectorizableTree)
65790b57cec5SDimitry Andric Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
65800b57cec5SDimitry Andric
65810b57cec5SDimitry Andric // Ensure the roots of the vectorizable tree don't form a cycle. They must
65820b57cec5SDimitry Andric // have a single external user that is not in the vectorizable tree.
65830b57cec5SDimitry Andric for (auto *Root : TreeRoot)
65840b57cec5SDimitry Andric if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
65850b57cec5SDimitry Andric return;
65860b57cec5SDimitry Andric
65870b57cec5SDimitry Andric // Conservatively determine if we can actually truncate the roots of the
65880b57cec5SDimitry Andric // expression. Collect the values that can be demoted in ToDemote and
65890b57cec5SDimitry Andric // additional roots that require investigating in Roots.
65900b57cec5SDimitry Andric SmallVector<Value *, 32> ToDemote;
65910b57cec5SDimitry Andric SmallVector<Value *, 4> Roots;
65920b57cec5SDimitry Andric for (auto *Root : TreeRoot)
65930b57cec5SDimitry Andric if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
65940b57cec5SDimitry Andric return;
65950b57cec5SDimitry Andric
65960b57cec5SDimitry Andric // The maximum bit width required to represent all the values that can be
65970b57cec5SDimitry Andric // demoted without loss of precision. It would be safe to truncate the roots
65980b57cec5SDimitry Andric // of the expression to this width.
65990b57cec5SDimitry Andric auto MaxBitWidth = 8u;
66000b57cec5SDimitry Andric
66010b57cec5SDimitry Andric // We first check if all the bits of the roots are demanded. If they're not,
66020b57cec5SDimitry Andric // we can truncate the roots to this narrower type.
66030b57cec5SDimitry Andric for (auto *Root : TreeRoot) {
66040b57cec5SDimitry Andric auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
66050b57cec5SDimitry Andric MaxBitWidth = std::max<unsigned>(
66060b57cec5SDimitry Andric Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
66070b57cec5SDimitry Andric }
66080b57cec5SDimitry Andric
66090b57cec5SDimitry Andric // True if the roots can be zero-extended back to their original type, rather
66100b57cec5SDimitry Andric // than sign-extended. We know that if the leading bits are not demanded, we
66110b57cec5SDimitry Andric // can safely zero-extend. So we initialize IsKnownPositive to True.
66120b57cec5SDimitry Andric bool IsKnownPositive = true;
66130b57cec5SDimitry Andric
66140b57cec5SDimitry Andric // If all the bits of the roots are demanded, we can try a little harder to
66150b57cec5SDimitry Andric // compute a narrower type. This can happen, for example, if the roots are
66160b57cec5SDimitry Andric // getelementptr indices. InstCombine promotes these indices to the pointer
66170b57cec5SDimitry Andric // width. Thus, all their bits are technically demanded even though the
66180b57cec5SDimitry Andric // address computation might be vectorized in a smaller type.
66190b57cec5SDimitry Andric //
66200b57cec5SDimitry Andric // We start by looking at each entry that can be demoted. We compute the
66210b57cec5SDimitry Andric // maximum bit width required to store the scalar by using ValueTracking to
66220b57cec5SDimitry Andric // compute the number of high-order bits we can truncate.
66230b57cec5SDimitry Andric if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
66240b57cec5SDimitry Andric llvm::all_of(TreeRoot, [](Value *R) {
66250b57cec5SDimitry Andric assert(R->hasOneUse() && "Root should have only one use!");
66260b57cec5SDimitry Andric return isa<GetElementPtrInst>(R->user_back());
66270b57cec5SDimitry Andric })) {
66280b57cec5SDimitry Andric MaxBitWidth = 8u;
66290b57cec5SDimitry Andric
66300b57cec5SDimitry Andric // Determine if the sign bit of all the roots is known to be zero. If not,
66310b57cec5SDimitry Andric // IsKnownPositive is set to False.
66320b57cec5SDimitry Andric IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) {
66330b57cec5SDimitry Andric KnownBits Known = computeKnownBits(R, *DL);
66340b57cec5SDimitry Andric return Known.isNonNegative();
66350b57cec5SDimitry Andric });
66360b57cec5SDimitry Andric
66370b57cec5SDimitry Andric // Determine the maximum number of bits required to store the scalar
66380b57cec5SDimitry Andric // values.
66390b57cec5SDimitry Andric for (auto *Scalar : ToDemote) {
66400b57cec5SDimitry Andric auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT);
66410b57cec5SDimitry Andric auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
66420b57cec5SDimitry Andric MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
66430b57cec5SDimitry Andric }
66440b57cec5SDimitry Andric
66450b57cec5SDimitry Andric // If we can't prove that the sign bit is zero, we must add one to the
66460b57cec5SDimitry Andric // maximum bit width to account for the unknown sign bit. This preserves
66470b57cec5SDimitry Andric // the existing sign bit so we can safely sign-extend the root back to the
66480b57cec5SDimitry Andric // original type. Otherwise, if we know the sign bit is zero, we will
66490b57cec5SDimitry Andric // zero-extend the root instead.
66500b57cec5SDimitry Andric //
66510b57cec5SDimitry Andric // FIXME: This is somewhat suboptimal, as there will be cases where adding
66520b57cec5SDimitry Andric // one to the maximum bit width will yield a larger-than-necessary
66530b57cec5SDimitry Andric // type. In general, we need to add an extra bit only if we can't
66540b57cec5SDimitry Andric // prove that the upper bit of the original type is equal to the
66550b57cec5SDimitry Andric // upper bit of the proposed smaller type. If these two bits are the
66560b57cec5SDimitry Andric // same (either zero or one) we know that sign-extending from the
66570b57cec5SDimitry Andric // smaller type will result in the same value. Here, since we can't
66580b57cec5SDimitry Andric // yet prove this, we are just making the proposed smaller type
66590b57cec5SDimitry Andric // larger to ensure correctness.
66600b57cec5SDimitry Andric if (!IsKnownPositive)
66610b57cec5SDimitry Andric ++MaxBitWidth;
66620b57cec5SDimitry Andric }
66630b57cec5SDimitry Andric
66640b57cec5SDimitry Andric // Round MaxBitWidth up to the next power-of-two.
66650b57cec5SDimitry Andric if (!isPowerOf2_64(MaxBitWidth))
66660b57cec5SDimitry Andric MaxBitWidth = NextPowerOf2(MaxBitWidth);
66670b57cec5SDimitry Andric
66680b57cec5SDimitry Andric // If the maximum bit width we compute is less than the with of the roots'
66690b57cec5SDimitry Andric // type, we can proceed with the narrowing. Otherwise, do nothing.
66700b57cec5SDimitry Andric if (MaxBitWidth >= TreeRootIT->getBitWidth())
66710b57cec5SDimitry Andric return;
66720b57cec5SDimitry Andric
66730b57cec5SDimitry Andric // If we can truncate the root, we must collect additional values that might
66740b57cec5SDimitry Andric // be demoted as a result. That is, those seeded by truncations we will
66750b57cec5SDimitry Andric // modify.
66760b57cec5SDimitry Andric while (!Roots.empty())
66770b57cec5SDimitry Andric collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
66780b57cec5SDimitry Andric
66790b57cec5SDimitry Andric // Finally, map the values we can demote to the maximum bit with we computed.
66800b57cec5SDimitry Andric for (auto *Scalar : ToDemote)
66810b57cec5SDimitry Andric MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
66820b57cec5SDimitry Andric }
66830b57cec5SDimitry Andric
66840b57cec5SDimitry Andric namespace {
66850b57cec5SDimitry Andric
66860b57cec5SDimitry Andric /// The SLPVectorizer Pass.
66870b57cec5SDimitry Andric struct SLPVectorizer : public FunctionPass {
66880b57cec5SDimitry Andric SLPVectorizerPass Impl;
66890b57cec5SDimitry Andric
66900b57cec5SDimitry Andric /// Pass identification, replacement for typeid
66910b57cec5SDimitry Andric static char ID;
66920b57cec5SDimitry Andric
SLPVectorizer__anon75ab86282511::SLPVectorizer66930b57cec5SDimitry Andric explicit SLPVectorizer() : FunctionPass(ID) {
66940b57cec5SDimitry Andric initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
66950b57cec5SDimitry Andric }
66960b57cec5SDimitry Andric
doInitialization__anon75ab86282511::SLPVectorizer66970b57cec5SDimitry Andric bool doInitialization(Module &M) override {
66980b57cec5SDimitry Andric return false;
66990b57cec5SDimitry Andric }
67000b57cec5SDimitry Andric
runOnFunction__anon75ab86282511::SLPVectorizer67010b57cec5SDimitry Andric bool runOnFunction(Function &F) override {
67020b57cec5SDimitry Andric if (skipFunction(F))
67030b57cec5SDimitry Andric return false;
67040b57cec5SDimitry Andric
67050b57cec5SDimitry Andric auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
67060b57cec5SDimitry Andric auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
67070b57cec5SDimitry Andric auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
67088bcb0991SDimitry Andric auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
67090b57cec5SDimitry Andric auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
67100b57cec5SDimitry Andric auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
67110b57cec5SDimitry Andric auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
67120b57cec5SDimitry Andric auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
67130b57cec5SDimitry Andric auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
67140b57cec5SDimitry Andric auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
67150b57cec5SDimitry Andric
67160b57cec5SDimitry Andric return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
67170b57cec5SDimitry Andric }
67180b57cec5SDimitry Andric
getAnalysisUsage__anon75ab86282511::SLPVectorizer67190b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
67200b57cec5SDimitry Andric FunctionPass::getAnalysisUsage(AU);
67210b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>();
67220b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
67230b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>();
67240b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
67250b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
67260b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
67270b57cec5SDimitry Andric AU.addRequired<DemandedBitsWrapperPass>();
67280b57cec5SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
67295ffd83dbSDimitry Andric AU.addRequired<InjectTLIMappingsLegacy>();
67300b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
67310b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
67320b57cec5SDimitry Andric AU.addPreserved<AAResultsWrapperPass>();
67330b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>();
67340b57cec5SDimitry Andric AU.setPreservesCFG();
67350b57cec5SDimitry Andric }
67360b57cec5SDimitry Andric };
67370b57cec5SDimitry Andric
67380b57cec5SDimitry Andric } // end anonymous namespace
67390b57cec5SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)67400b57cec5SDimitry Andric PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
67410b57cec5SDimitry Andric auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
67420b57cec5SDimitry Andric auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
67430b57cec5SDimitry Andric auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
67440b57cec5SDimitry Andric auto *AA = &AM.getResult<AAManager>(F);
67450b57cec5SDimitry Andric auto *LI = &AM.getResult<LoopAnalysis>(F);
67460b57cec5SDimitry Andric auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
67470b57cec5SDimitry Andric auto *AC = &AM.getResult<AssumptionAnalysis>(F);
67480b57cec5SDimitry Andric auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
67490b57cec5SDimitry Andric auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
67500b57cec5SDimitry Andric
67510b57cec5SDimitry Andric bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
67520b57cec5SDimitry Andric if (!Changed)
67530b57cec5SDimitry Andric return PreservedAnalyses::all();
67540b57cec5SDimitry Andric
67550b57cec5SDimitry Andric PreservedAnalyses PA;
67560b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>();
67570b57cec5SDimitry Andric return PA;
67580b57cec5SDimitry Andric }
67590b57cec5SDimitry Andric
runImpl(Function & F,ScalarEvolution * SE_,TargetTransformInfo * TTI_,TargetLibraryInfo * TLI_,AAResults * AA_,LoopInfo * LI_,DominatorTree * DT_,AssumptionCache * AC_,DemandedBits * DB_,OptimizationRemarkEmitter * ORE_)67600b57cec5SDimitry Andric bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
67610b57cec5SDimitry Andric TargetTransformInfo *TTI_,
6762af732203SDimitry Andric TargetLibraryInfo *TLI_, AAResults *AA_,
67630b57cec5SDimitry Andric LoopInfo *LI_, DominatorTree *DT_,
67640b57cec5SDimitry Andric AssumptionCache *AC_, DemandedBits *DB_,
67650b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE_) {
67665ffd83dbSDimitry Andric if (!RunSLPVectorization)
67675ffd83dbSDimitry Andric return false;
67680b57cec5SDimitry Andric SE = SE_;
67690b57cec5SDimitry Andric TTI = TTI_;
67700b57cec5SDimitry Andric TLI = TLI_;
67710b57cec5SDimitry Andric AA = AA_;
67720b57cec5SDimitry Andric LI = LI_;
67730b57cec5SDimitry Andric DT = DT_;
67740b57cec5SDimitry Andric AC = AC_;
67750b57cec5SDimitry Andric DB = DB_;
67760b57cec5SDimitry Andric DL = &F.getParent()->getDataLayout();
67770b57cec5SDimitry Andric
67780b57cec5SDimitry Andric Stores.clear();
67790b57cec5SDimitry Andric GEPs.clear();
67800b57cec5SDimitry Andric bool Changed = false;
67810b57cec5SDimitry Andric
67820b57cec5SDimitry Andric // If the target claims to have no vector registers don't attempt
67830b57cec5SDimitry Andric // vectorization.
67848bcb0991SDimitry Andric if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
67850b57cec5SDimitry Andric return false;
67860b57cec5SDimitry Andric
67870b57cec5SDimitry Andric // Don't vectorize when the attribute NoImplicitFloat is used.
67880b57cec5SDimitry Andric if (F.hasFnAttribute(Attribute::NoImplicitFloat))
67890b57cec5SDimitry Andric return false;
67900b57cec5SDimitry Andric
67910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
67920b57cec5SDimitry Andric
67930b57cec5SDimitry Andric // Use the bottom up slp vectorizer to construct chains that start with
67940b57cec5SDimitry Andric // store instructions.
67950b57cec5SDimitry Andric BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_);
67960b57cec5SDimitry Andric
67970b57cec5SDimitry Andric // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
67980b57cec5SDimitry Andric // delete instructions.
67990b57cec5SDimitry Andric
68005f7ddb14SDimitry Andric // Update DFS numbers now so that we can use them for ordering.
68015f7ddb14SDimitry Andric DT->updateDFSNumbers();
68025f7ddb14SDimitry Andric
68030b57cec5SDimitry Andric // Scan the blocks in the function in post order.
68040b57cec5SDimitry Andric for (auto BB : post_order(&F.getEntryBlock())) {
68050b57cec5SDimitry Andric collectSeedInstructions(BB);
68060b57cec5SDimitry Andric
68070b57cec5SDimitry Andric // Vectorize trees that end at stores.
68080b57cec5SDimitry Andric if (!Stores.empty()) {
68090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
68100b57cec5SDimitry Andric << " underlying objects.\n");
68110b57cec5SDimitry Andric Changed |= vectorizeStoreChains(R);
68120b57cec5SDimitry Andric }
68130b57cec5SDimitry Andric
68140b57cec5SDimitry Andric // Vectorize trees that end at reductions.
68150b57cec5SDimitry Andric Changed |= vectorizeChainsInBlock(BB, R);
68160b57cec5SDimitry Andric
68170b57cec5SDimitry Andric // Vectorize the index computations of getelementptr instructions. This
68180b57cec5SDimitry Andric // is primarily intended to catch gather-like idioms ending at
68190b57cec5SDimitry Andric // non-consecutive loads.
68200b57cec5SDimitry Andric if (!GEPs.empty()) {
68210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
68220b57cec5SDimitry Andric << " underlying objects.\n");
68230b57cec5SDimitry Andric Changed |= vectorizeGEPIndices(BB, R);
68240b57cec5SDimitry Andric }
68250b57cec5SDimitry Andric }
68260b57cec5SDimitry Andric
68270b57cec5SDimitry Andric if (Changed) {
68280b57cec5SDimitry Andric R.optimizeGatherSequence();
68290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
68300b57cec5SDimitry Andric }
68310b57cec5SDimitry Andric return Changed;
68320b57cec5SDimitry Andric }
68330b57cec5SDimitry Andric
68345f7ddb14SDimitry Andric /// Order may have elements assigned special value (size) which is out of
68355f7ddb14SDimitry Andric /// bounds. Such indices only appear on places which correspond to undef values
68365f7ddb14SDimitry Andric /// (see canReuseExtract for details) and used in order to avoid undef values
68375f7ddb14SDimitry Andric /// have effect on operands ordering.
68385f7ddb14SDimitry Andric /// The first loop below simply finds all unused indices and then the next loop
68395f7ddb14SDimitry Andric /// nest assigns these indices for undef values positions.
68405f7ddb14SDimitry Andric /// As an example below Order has two undef positions and they have assigned
68415f7ddb14SDimitry Andric /// values 3 and 7 respectively:
68425f7ddb14SDimitry Andric /// before: 6 9 5 4 9 2 1 0
68435f7ddb14SDimitry Andric /// after: 6 3 5 4 7 2 1 0
68445f7ddb14SDimitry Andric /// \returns Fixed ordering.
fixupOrderingIndices(ArrayRef<unsigned> Order)68455f7ddb14SDimitry Andric static BoUpSLP::OrdersType fixupOrderingIndices(ArrayRef<unsigned> Order) {
68465f7ddb14SDimitry Andric BoUpSLP::OrdersType NewOrder(Order.begin(), Order.end());
68475f7ddb14SDimitry Andric const unsigned Sz = NewOrder.size();
68485f7ddb14SDimitry Andric SmallBitVector UsedIndices(Sz);
68495f7ddb14SDimitry Andric SmallVector<int> MaskedIndices;
68505f7ddb14SDimitry Andric for (int I = 0, E = NewOrder.size(); I < E; ++I) {
68515f7ddb14SDimitry Andric if (NewOrder[I] < Sz)
68525f7ddb14SDimitry Andric UsedIndices.set(NewOrder[I]);
68535f7ddb14SDimitry Andric else
68545f7ddb14SDimitry Andric MaskedIndices.push_back(I);
68555f7ddb14SDimitry Andric }
68565f7ddb14SDimitry Andric if (MaskedIndices.empty())
68575f7ddb14SDimitry Andric return NewOrder;
68585f7ddb14SDimitry Andric SmallVector<int> AvailableIndices(MaskedIndices.size());
68595f7ddb14SDimitry Andric unsigned Cnt = 0;
68605f7ddb14SDimitry Andric int Idx = UsedIndices.find_first();
68615f7ddb14SDimitry Andric do {
68625f7ddb14SDimitry Andric AvailableIndices[Cnt] = Idx;
68635f7ddb14SDimitry Andric Idx = UsedIndices.find_next(Idx);
68645f7ddb14SDimitry Andric ++Cnt;
68655f7ddb14SDimitry Andric } while (Idx > 0);
68665f7ddb14SDimitry Andric assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
68675f7ddb14SDimitry Andric for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
68685f7ddb14SDimitry Andric NewOrder[MaskedIndices[I]] = AvailableIndices[I];
68695f7ddb14SDimitry Andric return NewOrder;
68705f7ddb14SDimitry Andric }
68715f7ddb14SDimitry Andric
vectorizeStoreChain(ArrayRef<Value * > Chain,BoUpSLP & R,unsigned Idx)68720b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
6873480093f4SDimitry Andric unsigned Idx) {
6874480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
68750b57cec5SDimitry Andric << "\n");
68760b57cec5SDimitry Andric const unsigned Sz = R.getVectorElementSize(Chain[0]);
6877480093f4SDimitry Andric const unsigned MinVF = R.getMinVecRegSize() / Sz;
6878480093f4SDimitry Andric unsigned VF = Chain.size();
68790b57cec5SDimitry Andric
6880480093f4SDimitry Andric if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
68810b57cec5SDimitry Andric return false;
68820b57cec5SDimitry Andric
6883480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
68840b57cec5SDimitry Andric << "\n");
68850b57cec5SDimitry Andric
6886480093f4SDimitry Andric R.buildTree(Chain);
6887480093f4SDimitry Andric Optional<ArrayRef<unsigned>> Order = R.bestOrder();
6888480093f4SDimitry Andric // TODO: Handle orders of size less than number of elements in the vector.
6889480093f4SDimitry Andric if (Order && Order->size() == Chain.size()) {
6890480093f4SDimitry Andric // TODO: reorder tree nodes without tree rebuilding.
68915f7ddb14SDimitry Andric SmallVector<Value *, 4> ReorderedOps(Chain.size());
68925f7ddb14SDimitry Andric transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
6893480093f4SDimitry Andric [Chain](const unsigned Idx) { return Chain[Idx]; });
6894480093f4SDimitry Andric R.buildTree(ReorderedOps);
6895480093f4SDimitry Andric }
68960b57cec5SDimitry Andric if (R.isTreeTinyAndNotFullyVectorizable())
6897480093f4SDimitry Andric return false;
68985ffd83dbSDimitry Andric if (R.isLoadCombineCandidate())
68995ffd83dbSDimitry Andric return false;
69000b57cec5SDimitry Andric
69010b57cec5SDimitry Andric R.computeMinimumValueSizes();
69020b57cec5SDimitry Andric
6903af732203SDimitry Andric InstructionCost Cost = R.getTreeCost();
69040b57cec5SDimitry Andric
6905480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n");
69060b57cec5SDimitry Andric if (Cost < -SLPCostThreshold) {
69070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");
69080b57cec5SDimitry Andric
69090b57cec5SDimitry Andric using namespace ore;
69100b57cec5SDimitry Andric
69110b57cec5SDimitry Andric R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
6912480093f4SDimitry Andric cast<StoreInst>(Chain[0]))
69130b57cec5SDimitry Andric << "Stores SLP vectorized with cost " << NV("Cost", Cost)
69140b57cec5SDimitry Andric << " and with tree size "
69150b57cec5SDimitry Andric << NV("TreeSize", R.getTreeSize()));
69160b57cec5SDimitry Andric
69170b57cec5SDimitry Andric R.vectorizeTree();
6918480093f4SDimitry Andric return true;
69190b57cec5SDimitry Andric }
69200b57cec5SDimitry Andric
6921480093f4SDimitry Andric return false;
69220b57cec5SDimitry Andric }
69230b57cec5SDimitry Andric
vectorizeStores(ArrayRef<StoreInst * > Stores,BoUpSLP & R)69240b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
69250b57cec5SDimitry Andric BoUpSLP &R) {
69260b57cec5SDimitry Andric // We may run into multiple chains that merge into a single chain. We mark the
69270b57cec5SDimitry Andric // stores that we vectorized so that we don't visit the same store twice.
69280b57cec5SDimitry Andric BoUpSLP::ValueSet VectorizedStores;
69290b57cec5SDimitry Andric bool Changed = false;
69300b57cec5SDimitry Andric
6931480093f4SDimitry Andric int E = Stores.size();
6932480093f4SDimitry Andric SmallBitVector Tails(E, false);
6933480093f4SDimitry Andric int MaxIter = MaxStoreLookup.getValue();
69345f7ddb14SDimitry Andric SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
69355f7ddb14SDimitry Andric E, std::make_pair(E, INT_MAX));
69365f7ddb14SDimitry Andric SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
6937480093f4SDimitry Andric int IterCnt;
6938480093f4SDimitry Andric auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
69395f7ddb14SDimitry Andric &CheckedPairs,
6940480093f4SDimitry Andric &ConsecutiveChain](int K, int Idx) {
6941480093f4SDimitry Andric if (IterCnt >= MaxIter)
6942480093f4SDimitry Andric return true;
69435f7ddb14SDimitry Andric if (CheckedPairs[Idx].test(K))
69445f7ddb14SDimitry Andric return ConsecutiveChain[K].second == 1 &&
69455f7ddb14SDimitry Andric ConsecutiveChain[K].first == Idx;
6946480093f4SDimitry Andric ++IterCnt;
69475f7ddb14SDimitry Andric CheckedPairs[Idx].set(K);
69485f7ddb14SDimitry Andric CheckedPairs[K].set(Idx);
69495f7ddb14SDimitry Andric Optional<int> Diff = getPointersDiff(
69505f7ddb14SDimitry Andric Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
69515f7ddb14SDimitry Andric Stores[Idx]->getValueOperand()->getType(),
69525f7ddb14SDimitry Andric Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
69535f7ddb14SDimitry Andric if (!Diff || *Diff == 0)
69545f7ddb14SDimitry Andric return false;
69555f7ddb14SDimitry Andric int Val = *Diff;
69565f7ddb14SDimitry Andric if (Val < 0) {
69575f7ddb14SDimitry Andric if (ConsecutiveChain[Idx].second > -Val) {
69585f7ddb14SDimitry Andric Tails.set(K);
69595f7ddb14SDimitry Andric ConsecutiveChain[Idx] = std::make_pair(K, -Val);
69605f7ddb14SDimitry Andric }
69615f7ddb14SDimitry Andric return false;
69625f7ddb14SDimitry Andric }
69635f7ddb14SDimitry Andric if (ConsecutiveChain[K].second <= Val)
69640b57cec5SDimitry Andric return false;
69650b57cec5SDimitry Andric
6966480093f4SDimitry Andric Tails.set(Idx);
69675f7ddb14SDimitry Andric ConsecutiveChain[K] = std::make_pair(Idx, Val);
69685f7ddb14SDimitry Andric return Val == 1;
69690b57cec5SDimitry Andric };
69700b57cec5SDimitry Andric // Do a quadratic search on all of the given stores in reverse order and find
69710b57cec5SDimitry Andric // all of the pairs of stores that follow each other.
69720b57cec5SDimitry Andric for (int Idx = E - 1; Idx >= 0; --Idx) {
69730b57cec5SDimitry Andric // If a store has multiple consecutive store candidates, search according
69740b57cec5SDimitry Andric // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
69750b57cec5SDimitry Andric // This is because usually pairing with immediate succeeding or preceding
69760b57cec5SDimitry Andric // candidate create the best chance to find slp vectorization opportunity.
6977480093f4SDimitry Andric const int MaxLookDepth = std::max(E - Idx, Idx + 1);
6978480093f4SDimitry Andric IterCnt = 0;
6979480093f4SDimitry Andric for (int Offset = 1, F = MaxLookDepth; Offset < F; ++Offset)
69800b57cec5SDimitry Andric if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
69810b57cec5SDimitry Andric (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
69820b57cec5SDimitry Andric break;
69830b57cec5SDimitry Andric }
69840b57cec5SDimitry Andric
69855f7ddb14SDimitry Andric // Tracks if we tried to vectorize stores starting from the given tail
69865f7ddb14SDimitry Andric // already.
69875f7ddb14SDimitry Andric SmallBitVector TriedTails(E, false);
69880b57cec5SDimitry Andric // For stores that start but don't end a link in the chain:
6989480093f4SDimitry Andric for (int Cnt = E; Cnt > 0; --Cnt) {
6990480093f4SDimitry Andric int I = Cnt - 1;
69915f7ddb14SDimitry Andric if (ConsecutiveChain[I].first == E || Tails.test(I))
69920b57cec5SDimitry Andric continue;
69930b57cec5SDimitry Andric // We found a store instr that starts a chain. Now follow the chain and try
69940b57cec5SDimitry Andric // to vectorize it.
69950b57cec5SDimitry Andric BoUpSLP::ValueList Operands;
69960b57cec5SDimitry Andric // Collect the chain into a list.
69975f7ddb14SDimitry Andric while (I != E && !VectorizedStores.count(Stores[I])) {
6998480093f4SDimitry Andric Operands.push_back(Stores[I]);
69995f7ddb14SDimitry Andric Tails.set(I);
70005f7ddb14SDimitry Andric if (ConsecutiveChain[I].second != 1) {
70015f7ddb14SDimitry Andric // Mark the new end in the chain and go back, if required. It might be
70025f7ddb14SDimitry Andric // required if the original stores come in reversed order, for example.
70035f7ddb14SDimitry Andric if (ConsecutiveChain[I].first != E &&
70045f7ddb14SDimitry Andric Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) &&
70055f7ddb14SDimitry Andric !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
70065f7ddb14SDimitry Andric TriedTails.set(I);
70075f7ddb14SDimitry Andric Tails.reset(ConsecutiveChain[I].first);
70085f7ddb14SDimitry Andric if (Cnt < ConsecutiveChain[I].first + 2)
70095f7ddb14SDimitry Andric Cnt = ConsecutiveChain[I].first + 2;
70100b57cec5SDimitry Andric }
70115f7ddb14SDimitry Andric break;
70125f7ddb14SDimitry Andric }
70135f7ddb14SDimitry Andric // Move to the next value in the chain.
70145f7ddb14SDimitry Andric I = ConsecutiveChain[I].first;
70155f7ddb14SDimitry Andric }
70165f7ddb14SDimitry Andric assert(!Operands.empty() && "Expected non-empty list of stores.");
70170b57cec5SDimitry Andric
7018480093f4SDimitry Andric unsigned MaxVecRegSize = R.getMaxVecRegSize();
7019af732203SDimitry Andric unsigned EltSize = R.getVectorElementSize(Operands[0]);
70205f7ddb14SDimitry Andric unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
7021480093f4SDimitry Andric
70225f7ddb14SDimitry Andric unsigned MinVF = std::max(2U, R.getMinVecRegSize() / EltSize);
70235f7ddb14SDimitry Andric unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
70245f7ddb14SDimitry Andric MaxElts);
70255f7ddb14SDimitry Andric
70260b57cec5SDimitry Andric // FIXME: Is division-by-2 the correct step? Should we assert that the
70270b57cec5SDimitry Andric // register size is a power-of-2?
7028480093f4SDimitry Andric unsigned StartIdx = 0;
70295f7ddb14SDimitry Andric for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
7030480093f4SDimitry Andric for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
7031480093f4SDimitry Andric ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size);
7032480093f4SDimitry Andric if (!VectorizedStores.count(Slice.front()) &&
7033480093f4SDimitry Andric !VectorizedStores.count(Slice.back()) &&
7034480093f4SDimitry Andric vectorizeStoreChain(Slice, R, Cnt)) {
70350b57cec5SDimitry Andric // Mark the vectorized stores so that we don't vectorize them again.
7036480093f4SDimitry Andric VectorizedStores.insert(Slice.begin(), Slice.end());
70370b57cec5SDimitry Andric Changed = true;
7038480093f4SDimitry Andric // If we vectorized initial block, no need to try to vectorize it
7039480093f4SDimitry Andric // again.
7040480093f4SDimitry Andric if (Cnt == StartIdx)
7041480093f4SDimitry Andric StartIdx += Size;
7042480093f4SDimitry Andric Cnt += Size;
7043480093f4SDimitry Andric continue;
70440b57cec5SDimitry Andric }
7045480093f4SDimitry Andric ++Cnt;
7046480093f4SDimitry Andric }
7047480093f4SDimitry Andric // Check if the whole array was vectorized already - exit.
7048480093f4SDimitry Andric if (StartIdx >= Operands.size())
7049480093f4SDimitry Andric break;
70500b57cec5SDimitry Andric }
70510b57cec5SDimitry Andric }
70520b57cec5SDimitry Andric
70530b57cec5SDimitry Andric return Changed;
70540b57cec5SDimitry Andric }
70550b57cec5SDimitry Andric
collectSeedInstructions(BasicBlock * BB)70560b57cec5SDimitry Andric void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
70570b57cec5SDimitry Andric // Initialize the collections. We will make a single pass over the block.
70580b57cec5SDimitry Andric Stores.clear();
70590b57cec5SDimitry Andric GEPs.clear();
70600b57cec5SDimitry Andric
70610b57cec5SDimitry Andric // Visit the store and getelementptr instructions in BB and organize them in
70620b57cec5SDimitry Andric // Stores and GEPs according to the underlying objects of their pointer
70630b57cec5SDimitry Andric // operands.
70640b57cec5SDimitry Andric for (Instruction &I : *BB) {
70650b57cec5SDimitry Andric // Ignore store instructions that are volatile or have a pointer operand
70660b57cec5SDimitry Andric // that doesn't point to a scalar type.
70670b57cec5SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) {
70680b57cec5SDimitry Andric if (!SI->isSimple())
70690b57cec5SDimitry Andric continue;
70700b57cec5SDimitry Andric if (!isValidElementType(SI->getValueOperand()->getType()))
70710b57cec5SDimitry Andric continue;
7072af732203SDimitry Andric Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
70730b57cec5SDimitry Andric }
70740b57cec5SDimitry Andric
70750b57cec5SDimitry Andric // Ignore getelementptr instructions that have more than one index, a
70760b57cec5SDimitry Andric // constant index, or a pointer operand that doesn't point to a scalar
70770b57cec5SDimitry Andric // type.
70780b57cec5SDimitry Andric else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
70790b57cec5SDimitry Andric auto Idx = GEP->idx_begin()->get();
70800b57cec5SDimitry Andric if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
70810b57cec5SDimitry Andric continue;
70820b57cec5SDimitry Andric if (!isValidElementType(Idx->getType()))
70830b57cec5SDimitry Andric continue;
70840b57cec5SDimitry Andric if (GEP->getType()->isVectorTy())
70850b57cec5SDimitry Andric continue;
70860b57cec5SDimitry Andric GEPs[GEP->getPointerOperand()].push_back(GEP);
70870b57cec5SDimitry Andric }
70880b57cec5SDimitry Andric }
70890b57cec5SDimitry Andric }
70900b57cec5SDimitry Andric
tryToVectorizePair(Value * A,Value * B,BoUpSLP & R)70910b57cec5SDimitry Andric bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
70920b57cec5SDimitry Andric if (!A || !B)
70930b57cec5SDimitry Andric return false;
70940b57cec5SDimitry Andric Value *VL[] = {A, B};
70955ffd83dbSDimitry Andric return tryToVectorizeList(VL, R, /*AllowReorder=*/true);
70960b57cec5SDimitry Andric }
70970b57cec5SDimitry Andric
tryToVectorizeList(ArrayRef<Value * > VL,BoUpSLP & R,bool AllowReorder)70980b57cec5SDimitry Andric bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
70995f7ddb14SDimitry Andric bool AllowReorder) {
71000b57cec5SDimitry Andric if (VL.size() < 2)
71010b57cec5SDimitry Andric return false;
71020b57cec5SDimitry Andric
71030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
71040b57cec5SDimitry Andric << VL.size() << ".\n");
71050b57cec5SDimitry Andric
71065ffd83dbSDimitry Andric // Check that all of the parts are instructions of the same type,
71070b57cec5SDimitry Andric // we permit an alternate opcode via InstructionsState.
71080b57cec5SDimitry Andric InstructionsState S = getSameOpcode(VL);
71090b57cec5SDimitry Andric if (!S.getOpcode())
71100b57cec5SDimitry Andric return false;
71110b57cec5SDimitry Andric
71120b57cec5SDimitry Andric Instruction *I0 = cast<Instruction>(S.OpValue);
71135ffd83dbSDimitry Andric // Make sure invalid types (including vector type) are rejected before
71145ffd83dbSDimitry Andric // determining vectorization factor for scalar instructions.
71150b57cec5SDimitry Andric for (Value *V : VL) {
71160b57cec5SDimitry Andric Type *Ty = V->getType();
71175f7ddb14SDimitry Andric if (!isa<InsertElementInst>(V) && !isValidElementType(Ty)) {
71180b57cec5SDimitry Andric // NOTE: the following will give user internal llvm type name, which may
71190b57cec5SDimitry Andric // not be useful.
71200b57cec5SDimitry Andric R.getORE()->emit([&]() {
71210b57cec5SDimitry Andric std::string type_str;
71220b57cec5SDimitry Andric llvm::raw_string_ostream rso(type_str);
71230b57cec5SDimitry Andric Ty->print(rso);
71240b57cec5SDimitry Andric return OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0)
71250b57cec5SDimitry Andric << "Cannot SLP vectorize list: type "
71260b57cec5SDimitry Andric << rso.str() + " is unsupported by vectorizer";
71270b57cec5SDimitry Andric });
71280b57cec5SDimitry Andric return false;
71290b57cec5SDimitry Andric }
71300b57cec5SDimitry Andric }
71310b57cec5SDimitry Andric
71325ffd83dbSDimitry Andric unsigned Sz = R.getVectorElementSize(I0);
71335ffd83dbSDimitry Andric unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
71345ffd83dbSDimitry Andric unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
7135af732203SDimitry Andric MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
71365ffd83dbSDimitry Andric if (MaxVF < 2) {
71375ffd83dbSDimitry Andric R.getORE()->emit([&]() {
71385ffd83dbSDimitry Andric return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
71395ffd83dbSDimitry Andric << "Cannot SLP vectorize list: vectorization factor "
71405ffd83dbSDimitry Andric << "less than 2 is not supported";
71415ffd83dbSDimitry Andric });
71425ffd83dbSDimitry Andric return false;
71435ffd83dbSDimitry Andric }
71445ffd83dbSDimitry Andric
71450b57cec5SDimitry Andric bool Changed = false;
71460b57cec5SDimitry Andric bool CandidateFound = false;
7147af732203SDimitry Andric InstructionCost MinCost = SLPCostThreshold.getValue();
71485f7ddb14SDimitry Andric Type *ScalarTy = VL[0]->getType();
71495f7ddb14SDimitry Andric if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
71505f7ddb14SDimitry Andric ScalarTy = IE->getOperand(1)->getType();
71515ffd83dbSDimitry Andric
71520b57cec5SDimitry Andric unsigned NextInst = 0, MaxInst = VL.size();
71538bcb0991SDimitry Andric for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
71540b57cec5SDimitry Andric // No actual vectorization should happen, if number of parts is the same as
71550b57cec5SDimitry Andric // provided vectorization factor (i.e. the scalar type is used for vector
71560b57cec5SDimitry Andric // code during codegen).
71575f7ddb14SDimitry Andric auto *VecTy = FixedVectorType::get(ScalarTy, VF);
71580b57cec5SDimitry Andric if (TTI->getNumberOfParts(VecTy) == VF)
71590b57cec5SDimitry Andric continue;
71600b57cec5SDimitry Andric for (unsigned I = NextInst; I < MaxInst; ++I) {
71610b57cec5SDimitry Andric unsigned OpsWidth = 0;
71620b57cec5SDimitry Andric
71630b57cec5SDimitry Andric if (I + VF > MaxInst)
71640b57cec5SDimitry Andric OpsWidth = MaxInst - I;
71650b57cec5SDimitry Andric else
71660b57cec5SDimitry Andric OpsWidth = VF;
71670b57cec5SDimitry Andric
71685f7ddb14SDimitry Andric if (!isPowerOf2_32(OpsWidth))
71695f7ddb14SDimitry Andric continue;
71705f7ddb14SDimitry Andric
71715f7ddb14SDimitry Andric if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
71720b57cec5SDimitry Andric break;
71730b57cec5SDimitry Andric
71748bcb0991SDimitry Andric ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
71750b57cec5SDimitry Andric // Check that a previous iteration of this loop did not delete the Value.
71768bcb0991SDimitry Andric if (llvm::any_of(Ops, [&R](Value *V) {
71778bcb0991SDimitry Andric auto *I = dyn_cast<Instruction>(V);
71788bcb0991SDimitry Andric return I && R.isDeleted(I);
71798bcb0991SDimitry Andric }))
71800b57cec5SDimitry Andric continue;
71810b57cec5SDimitry Andric
71820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
71830b57cec5SDimitry Andric << "\n");
71840b57cec5SDimitry Andric
71850b57cec5SDimitry Andric R.buildTree(Ops);
71865f7ddb14SDimitry Andric if (AllowReorder) {
71870b57cec5SDimitry Andric Optional<ArrayRef<unsigned>> Order = R.bestOrder();
71885f7ddb14SDimitry Andric if (Order) {
71890b57cec5SDimitry Andric // TODO: reorder tree nodes without tree rebuilding.
71905f7ddb14SDimitry Andric SmallVector<Value *, 4> ReorderedOps(Ops.size());
71915f7ddb14SDimitry Andric transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
71925f7ddb14SDimitry Andric [Ops](const unsigned Idx) { return Ops[Idx]; });
71935f7ddb14SDimitry Andric R.buildTree(ReorderedOps);
71945f7ddb14SDimitry Andric }
71950b57cec5SDimitry Andric }
71960b57cec5SDimitry Andric if (R.isTreeTinyAndNotFullyVectorizable())
71970b57cec5SDimitry Andric continue;
71980b57cec5SDimitry Andric
71990b57cec5SDimitry Andric R.computeMinimumValueSizes();
7200af732203SDimitry Andric InstructionCost Cost = R.getTreeCost();
72010b57cec5SDimitry Andric CandidateFound = true;
72020b57cec5SDimitry Andric MinCost = std::min(MinCost, Cost);
72030b57cec5SDimitry Andric
72040b57cec5SDimitry Andric if (Cost < -SLPCostThreshold) {
72050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
72060b57cec5SDimitry Andric R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
72070b57cec5SDimitry Andric cast<Instruction>(Ops[0]))
72080b57cec5SDimitry Andric << "SLP vectorized with cost " << ore::NV("Cost", Cost)
72090b57cec5SDimitry Andric << " and with tree size "
72100b57cec5SDimitry Andric << ore::NV("TreeSize", R.getTreeSize()));
72110b57cec5SDimitry Andric
72120b57cec5SDimitry Andric R.vectorizeTree();
72130b57cec5SDimitry Andric // Move to the next bundle.
72140b57cec5SDimitry Andric I += VF - 1;
72150b57cec5SDimitry Andric NextInst = I + 1;
72160b57cec5SDimitry Andric Changed = true;
72170b57cec5SDimitry Andric }
72180b57cec5SDimitry Andric }
72190b57cec5SDimitry Andric }
72200b57cec5SDimitry Andric
72210b57cec5SDimitry Andric if (!Changed && CandidateFound) {
72220b57cec5SDimitry Andric R.getORE()->emit([&]() {
72230b57cec5SDimitry Andric return OptimizationRemarkMissed(SV_NAME, "NotBeneficial", I0)
72240b57cec5SDimitry Andric << "List vectorization was possible but not beneficial with cost "
72250b57cec5SDimitry Andric << ore::NV("Cost", MinCost) << " >= "
72260b57cec5SDimitry Andric << ore::NV("Treshold", -SLPCostThreshold);
72270b57cec5SDimitry Andric });
72280b57cec5SDimitry Andric } else if (!Changed) {
72290b57cec5SDimitry Andric R.getORE()->emit([&]() {
72300b57cec5SDimitry Andric return OptimizationRemarkMissed(SV_NAME, "NotPossible", I0)
72310b57cec5SDimitry Andric << "Cannot SLP vectorize list: vectorization was impossible"
72320b57cec5SDimitry Andric << " with available vectorization factors";
72330b57cec5SDimitry Andric });
72340b57cec5SDimitry Andric }
72350b57cec5SDimitry Andric return Changed;
72360b57cec5SDimitry Andric }
72370b57cec5SDimitry Andric
tryToVectorize(Instruction * I,BoUpSLP & R)72380b57cec5SDimitry Andric bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
72390b57cec5SDimitry Andric if (!I)
72400b57cec5SDimitry Andric return false;
72410b57cec5SDimitry Andric
72420b57cec5SDimitry Andric if (!isa<BinaryOperator>(I) && !isa<CmpInst>(I))
72430b57cec5SDimitry Andric return false;
72440b57cec5SDimitry Andric
72450b57cec5SDimitry Andric Value *P = I->getParent();
72460b57cec5SDimitry Andric
72470b57cec5SDimitry Andric // Vectorize in current basic block only.
72480b57cec5SDimitry Andric auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
72490b57cec5SDimitry Andric auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
72500b57cec5SDimitry Andric if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
72510b57cec5SDimitry Andric return false;
72520b57cec5SDimitry Andric
72530b57cec5SDimitry Andric // Try to vectorize V.
72540b57cec5SDimitry Andric if (tryToVectorizePair(Op0, Op1, R))
72550b57cec5SDimitry Andric return true;
72560b57cec5SDimitry Andric
72570b57cec5SDimitry Andric auto *A = dyn_cast<BinaryOperator>(Op0);
72580b57cec5SDimitry Andric auto *B = dyn_cast<BinaryOperator>(Op1);
72590b57cec5SDimitry Andric // Try to skip B.
72600b57cec5SDimitry Andric if (B && B->hasOneUse()) {
72610b57cec5SDimitry Andric auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
72620b57cec5SDimitry Andric auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
72630b57cec5SDimitry Andric if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
72640b57cec5SDimitry Andric return true;
72650b57cec5SDimitry Andric if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
72660b57cec5SDimitry Andric return true;
72670b57cec5SDimitry Andric }
72680b57cec5SDimitry Andric
72690b57cec5SDimitry Andric // Try to skip A.
72700b57cec5SDimitry Andric if (A && A->hasOneUse()) {
72710b57cec5SDimitry Andric auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
72720b57cec5SDimitry Andric auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
72730b57cec5SDimitry Andric if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
72740b57cec5SDimitry Andric return true;
72750b57cec5SDimitry Andric if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
72760b57cec5SDimitry Andric return true;
72770b57cec5SDimitry Andric }
72780b57cec5SDimitry Andric return false;
72790b57cec5SDimitry Andric }
72800b57cec5SDimitry Andric
72810b57cec5SDimitry Andric namespace {
72820b57cec5SDimitry Andric
72830b57cec5SDimitry Andric /// Model horizontal reductions.
72840b57cec5SDimitry Andric ///
7285af732203SDimitry Andric /// A horizontal reduction is a tree of reduction instructions that has values
7286af732203SDimitry Andric /// that can be put into a vector as its leaves. For example:
72870b57cec5SDimitry Andric ///
72880b57cec5SDimitry Andric /// mul mul mul mul
72890b57cec5SDimitry Andric /// \ / \ /
72900b57cec5SDimitry Andric /// + +
72910b57cec5SDimitry Andric /// \ /
72920b57cec5SDimitry Andric /// +
7293af732203SDimitry Andric /// This tree has "mul" as its leaf values and "+" as its reduction
7294af732203SDimitry Andric /// instructions. A reduction can feed into a store or a binary operation
72950b57cec5SDimitry Andric /// feeding a phi.
72960b57cec5SDimitry Andric /// ...
72970b57cec5SDimitry Andric /// \ /
72980b57cec5SDimitry Andric /// +
72990b57cec5SDimitry Andric /// |
73000b57cec5SDimitry Andric /// phi +=
73010b57cec5SDimitry Andric ///
73020b57cec5SDimitry Andric /// Or:
73030b57cec5SDimitry Andric /// ...
73040b57cec5SDimitry Andric /// \ /
73050b57cec5SDimitry Andric /// +
73060b57cec5SDimitry Andric /// |
73070b57cec5SDimitry Andric /// *p =
73080b57cec5SDimitry Andric ///
73090b57cec5SDimitry Andric class HorizontalReduction {
73100b57cec5SDimitry Andric using ReductionOpsType = SmallVector<Value *, 16>;
73110b57cec5SDimitry Andric using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
73120b57cec5SDimitry Andric ReductionOpsListType ReductionOps;
73130b57cec5SDimitry Andric SmallVector<Value *, 32> ReducedVals;
73140b57cec5SDimitry Andric // Use map vector to make stable output.
73150b57cec5SDimitry Andric MapVector<Instruction *, Value *> ExtraArgs;
7316af732203SDimitry Andric WeakTrackingVH ReductionRoot;
7317af732203SDimitry Andric /// The type of reduction operation.
7318af732203SDimitry Andric RecurKind RdxKind;
73190b57cec5SDimitry Andric
73205f7ddb14SDimitry Andric const unsigned INVALID_OPERAND_INDEX = std::numeric_limits<unsigned>::max();
73215f7ddb14SDimitry Andric
isCmpSelMinMax(Instruction * I)73225f7ddb14SDimitry Andric static bool isCmpSelMinMax(Instruction *I) {
73235f7ddb14SDimitry Andric return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
73245f7ddb14SDimitry Andric RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
73255f7ddb14SDimitry Andric }
73265f7ddb14SDimitry Andric
73275f7ddb14SDimitry Andric // And/or are potentially poison-safe logical patterns like:
73285f7ddb14SDimitry Andric // select x, y, false
73295f7ddb14SDimitry Andric // select x, true, y
isBoolLogicOp(Instruction * I)73305f7ddb14SDimitry Andric static bool isBoolLogicOp(Instruction *I) {
73315f7ddb14SDimitry Andric return match(I, m_LogicalAnd(m_Value(), m_Value())) ||
73325f7ddb14SDimitry Andric match(I, m_LogicalOr(m_Value(), m_Value()));
73335f7ddb14SDimitry Andric }
73345f7ddb14SDimitry Andric
73350b57cec5SDimitry Andric /// Checks if instruction is associative and can be vectorized.
isVectorizable(RecurKind Kind,Instruction * I)7336af732203SDimitry Andric static bool isVectorizable(RecurKind Kind, Instruction *I) {
7337af732203SDimitry Andric if (Kind == RecurKind::None)
7338af732203SDimitry Andric return false;
73395f7ddb14SDimitry Andric
73405f7ddb14SDimitry Andric // Integer ops that map to select instructions or intrinsics are fine.
73415f7ddb14SDimitry Andric if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind) ||
73425f7ddb14SDimitry Andric isBoolLogicOp(I))
73430b57cec5SDimitry Andric return true;
7344af732203SDimitry Andric
7345af732203SDimitry Andric if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
7346af732203SDimitry Andric // FP min/max are associative except for NaN and -0.0. We do not
7347af732203SDimitry Andric // have to rule out -0.0 here because the intrinsic semantics do not
7348af732203SDimitry Andric // specify a fixed result for it.
7349af732203SDimitry Andric return I->getFastMathFlags().noNaNs();
73500b57cec5SDimitry Andric }
73510b57cec5SDimitry Andric
7352af732203SDimitry Andric return I->isAssociative();
73530b57cec5SDimitry Andric }
73540b57cec5SDimitry Andric
getRdxOperand(Instruction * I,unsigned Index)73555f7ddb14SDimitry Andric static Value *getRdxOperand(Instruction *I, unsigned Index) {
73565f7ddb14SDimitry Andric // Poison-safe 'or' takes the form: select X, true, Y
73575f7ddb14SDimitry Andric // To make that work with the normal operand processing, we skip the
73585f7ddb14SDimitry Andric // true value operand.
73595f7ddb14SDimitry Andric // TODO: Change the code and data structures to handle this without a hack.
73605f7ddb14SDimitry Andric if (getRdxKind(I) == RecurKind::Or && isa<SelectInst>(I) && Index == 1)
73615f7ddb14SDimitry Andric return I->getOperand(2);
73625f7ddb14SDimitry Andric return I->getOperand(Index);
73635f7ddb14SDimitry Andric }
73645f7ddb14SDimitry Andric
73650b57cec5SDimitry Andric /// Checks if the ParentStackElem.first should be marked as a reduction
73660b57cec5SDimitry Andric /// operation with an extra argument or as extra argument itself.
markExtraArg(std::pair<Instruction *,unsigned> & ParentStackElem,Value * ExtraArg)73670b57cec5SDimitry Andric void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
73680b57cec5SDimitry Andric Value *ExtraArg) {
73690b57cec5SDimitry Andric if (ExtraArgs.count(ParentStackElem.first)) {
73700b57cec5SDimitry Andric ExtraArgs[ParentStackElem.first] = nullptr;
73710b57cec5SDimitry Andric // We ran into something like:
73720b57cec5SDimitry Andric // ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
73730b57cec5SDimitry Andric // The whole ParentStackElem.first should be considered as an extra value
73740b57cec5SDimitry Andric // in this case.
73750b57cec5SDimitry Andric // Do not perform analysis of remaining operands of ParentStackElem.first
73760b57cec5SDimitry Andric // instruction, this whole instruction is an extra argument.
73775f7ddb14SDimitry Andric ParentStackElem.second = INVALID_OPERAND_INDEX;
73780b57cec5SDimitry Andric } else {
73790b57cec5SDimitry Andric // We ran into something like:
73800b57cec5SDimitry Andric // ParentStackElem.first += ... + ExtraArg + ...
73810b57cec5SDimitry Andric ExtraArgs[ParentStackElem.first] = ExtraArg;
73820b57cec5SDimitry Andric }
73830b57cec5SDimitry Andric }
73840b57cec5SDimitry Andric
7385af732203SDimitry Andric /// Creates reduction operation with the current opcode.
createOp(IRBuilder<> & Builder,RecurKind Kind,Value * LHS,Value * RHS,const Twine & Name,bool UseSelect)7386af732203SDimitry Andric static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
73875f7ddb14SDimitry Andric Value *RHS, const Twine &Name, bool UseSelect) {
7388af732203SDimitry Andric unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
7389af732203SDimitry Andric switch (Kind) {
7390af732203SDimitry Andric case RecurKind::Add:
7391af732203SDimitry Andric case RecurKind::Mul:
7392af732203SDimitry Andric case RecurKind::Or:
7393af732203SDimitry Andric case RecurKind::And:
7394af732203SDimitry Andric case RecurKind::Xor:
7395af732203SDimitry Andric case RecurKind::FAdd:
7396af732203SDimitry Andric case RecurKind::FMul:
7397af732203SDimitry Andric return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
7398af732203SDimitry Andric Name);
7399af732203SDimitry Andric case RecurKind::FMax:
7400af732203SDimitry Andric return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
7401af732203SDimitry Andric case RecurKind::FMin:
7402af732203SDimitry Andric return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
74035f7ddb14SDimitry Andric case RecurKind::SMax:
74045f7ddb14SDimitry Andric if (UseSelect) {
7405af732203SDimitry Andric Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
7406af732203SDimitry Andric return Builder.CreateSelect(Cmp, LHS, RHS, Name);
74070b57cec5SDimitry Andric }
74085f7ddb14SDimitry Andric return Builder.CreateBinaryIntrinsic(Intrinsic::smax, LHS, RHS);
74095f7ddb14SDimitry Andric case RecurKind::SMin:
74105f7ddb14SDimitry Andric if (UseSelect) {
7411af732203SDimitry Andric Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
7412af732203SDimitry Andric return Builder.CreateSelect(Cmp, LHS, RHS, Name);
7413af732203SDimitry Andric }
74145f7ddb14SDimitry Andric return Builder.CreateBinaryIntrinsic(Intrinsic::smin, LHS, RHS);
74155f7ddb14SDimitry Andric case RecurKind::UMax:
74165f7ddb14SDimitry Andric if (UseSelect) {
7417af732203SDimitry Andric Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
7418af732203SDimitry Andric return Builder.CreateSelect(Cmp, LHS, RHS, Name);
7419af732203SDimitry Andric }
74205f7ddb14SDimitry Andric return Builder.CreateBinaryIntrinsic(Intrinsic::umax, LHS, RHS);
74215f7ddb14SDimitry Andric case RecurKind::UMin:
74225f7ddb14SDimitry Andric if (UseSelect) {
7423af732203SDimitry Andric Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
7424af732203SDimitry Andric return Builder.CreateSelect(Cmp, LHS, RHS, Name);
7425af732203SDimitry Andric }
74265f7ddb14SDimitry Andric return Builder.CreateBinaryIntrinsic(Intrinsic::umin, LHS, RHS);
7427af732203SDimitry Andric default:
7428af732203SDimitry Andric llvm_unreachable("Unknown reduction operation.");
7429af732203SDimitry Andric }
7430af732203SDimitry Andric }
7431af732203SDimitry Andric
7432af732203SDimitry Andric /// Creates reduction operation with the current opcode with the IR flags
7433af732203SDimitry Andric /// from \p ReductionOps.
createOp(IRBuilder<> & Builder,RecurKind RdxKind,Value * LHS,Value * RHS,const Twine & Name,const ReductionOpsListType & ReductionOps)7434af732203SDimitry Andric static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
7435af732203SDimitry Andric Value *RHS, const Twine &Name,
7436af732203SDimitry Andric const ReductionOpsListType &ReductionOps) {
74375f7ddb14SDimitry Andric bool UseSelect = ReductionOps.size() == 2;
74385f7ddb14SDimitry Andric assert((!UseSelect || isa<SelectInst>(ReductionOps[1][0])) &&
74395f7ddb14SDimitry Andric "Expected cmp + select pairs for reduction");
74405f7ddb14SDimitry Andric Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
7441af732203SDimitry Andric if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
74425f7ddb14SDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(Op)) {
7443af732203SDimitry Andric propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
7444af732203SDimitry Andric propagateIRFlags(Op, ReductionOps[1]);
7445af732203SDimitry Andric return Op;
7446af732203SDimitry Andric }
74475f7ddb14SDimitry Andric }
7448af732203SDimitry Andric propagateIRFlags(Op, ReductionOps[0]);
7449af732203SDimitry Andric return Op;
7450af732203SDimitry Andric }
74515f7ddb14SDimitry Andric
7452af732203SDimitry Andric /// Creates reduction operation with the current opcode with the IR flags
7453af732203SDimitry Andric /// from \p I.
createOp(IRBuilder<> & Builder,RecurKind RdxKind,Value * LHS,Value * RHS,const Twine & Name,Instruction * I)7454af732203SDimitry Andric static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
7455af732203SDimitry Andric Value *RHS, const Twine &Name, Instruction *I) {
74565f7ddb14SDimitry Andric auto *SelI = dyn_cast<SelectInst>(I);
74575f7ddb14SDimitry Andric Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr);
74585f7ddb14SDimitry Andric if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
74595f7ddb14SDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(Op))
74605f7ddb14SDimitry Andric propagateIRFlags(Sel->getCondition(), SelI->getCondition());
7461af732203SDimitry Andric }
7462af732203SDimitry Andric propagateIRFlags(Op, I);
7463af732203SDimitry Andric return Op;
7464af732203SDimitry Andric }
7465af732203SDimitry Andric
getRdxKind(Instruction * I)7466af732203SDimitry Andric static RecurKind getRdxKind(Instruction *I) {
7467af732203SDimitry Andric assert(I && "Expected instruction for reduction matching");
7468af732203SDimitry Andric TargetTransformInfo::ReductionFlags RdxFlags;
7469af732203SDimitry Andric if (match(I, m_Add(m_Value(), m_Value())))
7470af732203SDimitry Andric return RecurKind::Add;
7471af732203SDimitry Andric if (match(I, m_Mul(m_Value(), m_Value())))
7472af732203SDimitry Andric return RecurKind::Mul;
74735f7ddb14SDimitry Andric if (match(I, m_And(m_Value(), m_Value())) ||
74745f7ddb14SDimitry Andric match(I, m_LogicalAnd(m_Value(), m_Value())))
7475af732203SDimitry Andric return RecurKind::And;
74765f7ddb14SDimitry Andric if (match(I, m_Or(m_Value(), m_Value())) ||
74775f7ddb14SDimitry Andric match(I, m_LogicalOr(m_Value(), m_Value())))
7478af732203SDimitry Andric return RecurKind::Or;
7479af732203SDimitry Andric if (match(I, m_Xor(m_Value(), m_Value())))
7480af732203SDimitry Andric return RecurKind::Xor;
7481af732203SDimitry Andric if (match(I, m_FAdd(m_Value(), m_Value())))
7482af732203SDimitry Andric return RecurKind::FAdd;
7483af732203SDimitry Andric if (match(I, m_FMul(m_Value(), m_Value())))
7484af732203SDimitry Andric return RecurKind::FMul;
7485af732203SDimitry Andric
7486af732203SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
7487af732203SDimitry Andric return RecurKind::FMax;
7488af732203SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
7489af732203SDimitry Andric return RecurKind::FMin;
7490af732203SDimitry Andric
74915f7ddb14SDimitry Andric // This matches either cmp+select or intrinsics. SLP is expected to handle
74925f7ddb14SDimitry Andric // either form.
74935f7ddb14SDimitry Andric // TODO: If we are canonicalizing to intrinsics, we can remove several
74945f7ddb14SDimitry Andric // special-case paths that deal with selects.
7495af732203SDimitry Andric if (match(I, m_SMax(m_Value(), m_Value())))
7496af732203SDimitry Andric return RecurKind::SMax;
7497af732203SDimitry Andric if (match(I, m_SMin(m_Value(), m_Value())))
7498af732203SDimitry Andric return RecurKind::SMin;
7499af732203SDimitry Andric if (match(I, m_UMax(m_Value(), m_Value())))
7500af732203SDimitry Andric return RecurKind::UMax;
7501af732203SDimitry Andric if (match(I, m_UMin(m_Value(), m_Value())))
7502af732203SDimitry Andric return RecurKind::UMin;
7503af732203SDimitry Andric
7504af732203SDimitry Andric if (auto *Select = dyn_cast<SelectInst>(I)) {
75050b57cec5SDimitry Andric // Try harder: look for min/max pattern based on instructions producing
75060b57cec5SDimitry Andric // same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
75070b57cec5SDimitry Andric // During the intermediate stages of SLP, it's very common to have
75080b57cec5SDimitry Andric // pattern like this (since optimizeGatherSequence is run only once
75090b57cec5SDimitry Andric // at the end):
75100b57cec5SDimitry Andric // %1 = extractelement <2 x i32> %a, i32 0
75110b57cec5SDimitry Andric // %2 = extractelement <2 x i32> %a, i32 1
75120b57cec5SDimitry Andric // %cond = icmp sgt i32 %1, %2
75130b57cec5SDimitry Andric // %3 = extractelement <2 x i32> %a, i32 0
75140b57cec5SDimitry Andric // %4 = extractelement <2 x i32> %a, i32 1
75150b57cec5SDimitry Andric // %select = select i1 %cond, i32 %3, i32 %4
75160b57cec5SDimitry Andric CmpInst::Predicate Pred;
75170b57cec5SDimitry Andric Instruction *L1;
75180b57cec5SDimitry Andric Instruction *L2;
75190b57cec5SDimitry Andric
7520af732203SDimitry Andric Value *LHS = Select->getTrueValue();
7521af732203SDimitry Andric Value *RHS = Select->getFalseValue();
75220b57cec5SDimitry Andric Value *Cond = Select->getCondition();
75230b57cec5SDimitry Andric
75240b57cec5SDimitry Andric // TODO: Support inverse predicates.
75250b57cec5SDimitry Andric if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
75260b57cec5SDimitry Andric if (!isa<ExtractElementInst>(RHS) ||
75270b57cec5SDimitry Andric !L2->isIdenticalTo(cast<Instruction>(RHS)))
7528af732203SDimitry Andric return RecurKind::None;
75290b57cec5SDimitry Andric } else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
75300b57cec5SDimitry Andric if (!isa<ExtractElementInst>(LHS) ||
75310b57cec5SDimitry Andric !L1->isIdenticalTo(cast<Instruction>(LHS)))
7532af732203SDimitry Andric return RecurKind::None;
75330b57cec5SDimitry Andric } else {
75340b57cec5SDimitry Andric if (!isa<ExtractElementInst>(LHS) || !isa<ExtractElementInst>(RHS))
7535af732203SDimitry Andric return RecurKind::None;
75360b57cec5SDimitry Andric if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) ||
75370b57cec5SDimitry Andric !L1->isIdenticalTo(cast<Instruction>(LHS)) ||
75380b57cec5SDimitry Andric !L2->isIdenticalTo(cast<Instruction>(RHS)))
7539af732203SDimitry Andric return RecurKind::None;
75400b57cec5SDimitry Andric }
7541af732203SDimitry Andric
7542af732203SDimitry Andric TargetTransformInfo::ReductionFlags RdxFlags;
75430b57cec5SDimitry Andric switch (Pred) {
75440b57cec5SDimitry Andric default:
7545af732203SDimitry Andric return RecurKind::None;
75460b57cec5SDimitry Andric case CmpInst::ICMP_SGT:
75470b57cec5SDimitry Andric case CmpInst::ICMP_SGE:
7548af732203SDimitry Andric return RecurKind::SMax;
7549af732203SDimitry Andric case CmpInst::ICMP_SLT:
7550af732203SDimitry Andric case CmpInst::ICMP_SLE:
7551af732203SDimitry Andric return RecurKind::SMin;
7552af732203SDimitry Andric case CmpInst::ICMP_UGT:
7553af732203SDimitry Andric case CmpInst::ICMP_UGE:
7554af732203SDimitry Andric return RecurKind::UMax;
7555af732203SDimitry Andric case CmpInst::ICMP_ULT:
7556af732203SDimitry Andric case CmpInst::ICMP_ULE:
7557af732203SDimitry Andric return RecurKind::UMin;
7558af732203SDimitry Andric }
7559af732203SDimitry Andric }
7560af732203SDimitry Andric return RecurKind::None;
7561af732203SDimitry Andric }
75620b57cec5SDimitry Andric
7563af732203SDimitry Andric /// Get the index of the first operand.
getFirstOperandIndex(Instruction * I)75645f7ddb14SDimitry Andric static unsigned getFirstOperandIndex(Instruction *I) {
75655f7ddb14SDimitry Andric return isCmpSelMinMax(I) ? 1 : 0;
7566af732203SDimitry Andric }
7567af732203SDimitry Andric
7568af732203SDimitry Andric /// Total number of operands in the reduction operation.
getNumberOfOperands(Instruction * I)75695f7ddb14SDimitry Andric static unsigned getNumberOfOperands(Instruction *I) {
75705f7ddb14SDimitry Andric return isCmpSelMinMax(I) ? 3 : 2;
7571af732203SDimitry Andric }
7572af732203SDimitry Andric
7573af732203SDimitry Andric /// Checks if the instruction is in basic block \p BB.
75745f7ddb14SDimitry Andric /// For a cmp+sel min/max reduction check that both ops are in \p BB.
hasSameParent(Instruction * I,BasicBlock * BB)75755f7ddb14SDimitry Andric static bool hasSameParent(Instruction *I, BasicBlock *BB) {
75765f7ddb14SDimitry Andric if (isCmpSelMinMax(I)) {
75775f7ddb14SDimitry Andric auto *Sel = cast<SelectInst>(I);
75785f7ddb14SDimitry Andric auto *Cmp = cast<Instruction>(Sel->getCondition());
75795f7ddb14SDimitry Andric return Sel->getParent() == BB && Cmp->getParent() == BB;
7580af732203SDimitry Andric }
7581af732203SDimitry Andric return I->getParent() == BB;
7582af732203SDimitry Andric }
7583af732203SDimitry Andric
7584af732203SDimitry Andric /// Expected number of uses for reduction operations/reduced values.
hasRequiredNumberOfUses(bool IsCmpSelMinMax,Instruction * I)75855f7ddb14SDimitry Andric static bool hasRequiredNumberOfUses(bool IsCmpSelMinMax, Instruction *I) {
75865f7ddb14SDimitry Andric if (IsCmpSelMinMax) {
7587af732203SDimitry Andric // SelectInst must be used twice while the condition op must have single
7588af732203SDimitry Andric // use only.
75895f7ddb14SDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(I))
75905f7ddb14SDimitry Andric return Sel->hasNUses(2) && Sel->getCondition()->hasOneUse();
75915f7ddb14SDimitry Andric return I->hasNUses(2);
75925f7ddb14SDimitry Andric }
7593af732203SDimitry Andric
7594af732203SDimitry Andric // Arithmetic reduction operation must be used once only.
7595af732203SDimitry Andric return I->hasOneUse();
7596af732203SDimitry Andric }
7597af732203SDimitry Andric
7598af732203SDimitry Andric /// Initializes the list of reduction operations.
initReductionOps(Instruction * I)75995f7ddb14SDimitry Andric void initReductionOps(Instruction *I) {
76005f7ddb14SDimitry Andric if (isCmpSelMinMax(I))
7601af732203SDimitry Andric ReductionOps.assign(2, ReductionOpsType());
7602af732203SDimitry Andric else
7603af732203SDimitry Andric ReductionOps.assign(1, ReductionOpsType());
7604af732203SDimitry Andric }
7605af732203SDimitry Andric
7606af732203SDimitry Andric /// Add all reduction operations for the reduction instruction \p I.
addReductionOps(Instruction * I)76075f7ddb14SDimitry Andric void addReductionOps(Instruction *I) {
76085f7ddb14SDimitry Andric if (isCmpSelMinMax(I)) {
7609af732203SDimitry Andric ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
7610af732203SDimitry Andric ReductionOps[1].emplace_back(I);
7611af732203SDimitry Andric } else {
7612af732203SDimitry Andric ReductionOps[0].emplace_back(I);
76130b57cec5SDimitry Andric }
76140b57cec5SDimitry Andric }
7615af732203SDimitry Andric
getLHS(RecurKind Kind,Instruction * I)7616af732203SDimitry Andric static Value *getLHS(RecurKind Kind, Instruction *I) {
7617af732203SDimitry Andric if (Kind == RecurKind::None)
7618af732203SDimitry Andric return nullptr;
76195f7ddb14SDimitry Andric return I->getOperand(getFirstOperandIndex(I));
76200b57cec5SDimitry Andric }
getRHS(RecurKind Kind,Instruction * I)7621af732203SDimitry Andric static Value *getRHS(RecurKind Kind, Instruction *I) {
7622af732203SDimitry Andric if (Kind == RecurKind::None)
7623af732203SDimitry Andric return nullptr;
76245f7ddb14SDimitry Andric return I->getOperand(getFirstOperandIndex(I) + 1);
76250b57cec5SDimitry Andric }
76260b57cec5SDimitry Andric
76270b57cec5SDimitry Andric public:
76280b57cec5SDimitry Andric HorizontalReduction() = default;
76290b57cec5SDimitry Andric
76300b57cec5SDimitry Andric /// Try to find a reduction tree.
matchAssociativeReduction(PHINode * Phi,Instruction * Inst)76315f7ddb14SDimitry Andric bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst) {
76325f7ddb14SDimitry Andric assert((!Phi || is_contained(Phi->operands(), Inst)) &&
7633af732203SDimitry Andric "Phi needs to use the binary operator");
76345f7ddb14SDimitry Andric assert((isa<BinaryOperator>(Inst) || isa<SelectInst>(Inst) ||
76355f7ddb14SDimitry Andric isa<IntrinsicInst>(Inst)) &&
76365f7ddb14SDimitry Andric "Expected binop, select, or intrinsic for reduction matching");
76375f7ddb14SDimitry Andric RdxKind = getRdxKind(Inst);
76380b57cec5SDimitry Andric
76390b57cec5SDimitry Andric // We could have a initial reductions that is not an add.
76400b57cec5SDimitry Andric // r *= v1 + v2 + v3 + v4
76410b57cec5SDimitry Andric // In such a case start looking for a tree rooted in the first '+'.
76420b57cec5SDimitry Andric if (Phi) {
76435f7ddb14SDimitry Andric if (getLHS(RdxKind, Inst) == Phi) {
76440b57cec5SDimitry Andric Phi = nullptr;
76455f7ddb14SDimitry Andric Inst = dyn_cast<Instruction>(getRHS(RdxKind, Inst));
76465f7ddb14SDimitry Andric if (!Inst)
7647af732203SDimitry Andric return false;
76485f7ddb14SDimitry Andric RdxKind = getRdxKind(Inst);
76495f7ddb14SDimitry Andric } else if (getRHS(RdxKind, Inst) == Phi) {
76500b57cec5SDimitry Andric Phi = nullptr;
76515f7ddb14SDimitry Andric Inst = dyn_cast<Instruction>(getLHS(RdxKind, Inst));
76525f7ddb14SDimitry Andric if (!Inst)
7653af732203SDimitry Andric return false;
76545f7ddb14SDimitry Andric RdxKind = getRdxKind(Inst);
76550b57cec5SDimitry Andric }
76560b57cec5SDimitry Andric }
76570b57cec5SDimitry Andric
76585f7ddb14SDimitry Andric if (!isVectorizable(RdxKind, Inst))
76590b57cec5SDimitry Andric return false;
76600b57cec5SDimitry Andric
7661af732203SDimitry Andric // Analyze "regular" integer/FP types for reductions - no target-specific
7662af732203SDimitry Andric // types or pointers.
76635f7ddb14SDimitry Andric Type *Ty = Inst->getType();
7664af732203SDimitry Andric if (!isValidElementType(Ty) || Ty->isPointerTy())
76650b57cec5SDimitry Andric return false;
76660b57cec5SDimitry Andric
76675f7ddb14SDimitry Andric // Though the ultimate reduction may have multiple uses, its condition must
76685f7ddb14SDimitry Andric // have only single use.
76695f7ddb14SDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(Inst))
76705f7ddb14SDimitry Andric if (!Sel->getCondition()->hasOneUse())
76715f7ddb14SDimitry Andric return false;
76725f7ddb14SDimitry Andric
76735f7ddb14SDimitry Andric ReductionRoot = Inst;
76740b57cec5SDimitry Andric
7675af732203SDimitry Andric // The opcode for leaf values that we perform a reduction on.
7676af732203SDimitry Andric // For example: load(x) + load(y) + load(z) + fptoui(w)
7677af732203SDimitry Andric // The leaf opcode for 'w' does not match, so we don't include it as a
7678af732203SDimitry Andric // potential candidate for the reduction.
7679af732203SDimitry Andric unsigned LeafOpcode = 0;
7680af732203SDimitry Andric
76815f7ddb14SDimitry Andric // Post-order traverse the reduction tree starting at Inst. We only handle
76825f7ddb14SDimitry Andric // true trees containing binary operators or selects.
76830b57cec5SDimitry Andric SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
76845f7ddb14SDimitry Andric Stack.push_back(std::make_pair(Inst, getFirstOperandIndex(Inst)));
76855f7ddb14SDimitry Andric initReductionOps(Inst);
76860b57cec5SDimitry Andric while (!Stack.empty()) {
76870b57cec5SDimitry Andric Instruction *TreeN = Stack.back().first;
7688af732203SDimitry Andric unsigned EdgeToVisit = Stack.back().second++;
7689af732203SDimitry Andric const RecurKind TreeRdxKind = getRdxKind(TreeN);
7690af732203SDimitry Andric bool IsReducedValue = TreeRdxKind != RdxKind;
76910b57cec5SDimitry Andric
7692af732203SDimitry Andric // Postorder visit.
76935f7ddb14SDimitry Andric if (IsReducedValue || EdgeToVisit >= getNumberOfOperands(TreeN)) {
76940b57cec5SDimitry Andric if (IsReducedValue)
76950b57cec5SDimitry Andric ReducedVals.push_back(TreeN);
76960b57cec5SDimitry Andric else {
76975f7ddb14SDimitry Andric auto ExtraArgsIter = ExtraArgs.find(TreeN);
76985f7ddb14SDimitry Andric if (ExtraArgsIter != ExtraArgs.end() && !ExtraArgsIter->second) {
76990b57cec5SDimitry Andric // Check if TreeN is an extra argument of its parent operation.
77000b57cec5SDimitry Andric if (Stack.size() <= 1) {
77010b57cec5SDimitry Andric // TreeN can't be an extra argument as it is a root reduction
77020b57cec5SDimitry Andric // operation.
77030b57cec5SDimitry Andric return false;
77040b57cec5SDimitry Andric }
77050b57cec5SDimitry Andric // Yes, TreeN is an extra argument, do not add it to a list of
77060b57cec5SDimitry Andric // reduction operations.
77070b57cec5SDimitry Andric // Stack[Stack.size() - 2] always points to the parent operation.
77080b57cec5SDimitry Andric markExtraArg(Stack[Stack.size() - 2], TreeN);
77090b57cec5SDimitry Andric ExtraArgs.erase(TreeN);
77100b57cec5SDimitry Andric } else
77115f7ddb14SDimitry Andric addReductionOps(TreeN);
77120b57cec5SDimitry Andric }
77130b57cec5SDimitry Andric // Retract.
77140b57cec5SDimitry Andric Stack.pop_back();
77150b57cec5SDimitry Andric continue;
77160b57cec5SDimitry Andric }
77170b57cec5SDimitry Andric
77185f7ddb14SDimitry Andric // Visit operands.
77195f7ddb14SDimitry Andric Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit);
77205f7ddb14SDimitry Andric auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
77215f7ddb14SDimitry Andric if (!EdgeInst) {
7722af732203SDimitry Andric // Edge value is not a reduction instruction or a leaf instruction.
7723af732203SDimitry Andric // (It may be a constant, function argument, or something else.)
7724af732203SDimitry Andric markExtraArg(Stack.back(), EdgeVal);
77250b57cec5SDimitry Andric continue;
77260b57cec5SDimitry Andric }
77275f7ddb14SDimitry Andric RecurKind EdgeRdxKind = getRdxKind(EdgeInst);
7728af732203SDimitry Andric // Continue analysis if the next operand is a reduction operation or
7729af732203SDimitry Andric // (possibly) a leaf value. If the leaf value opcode is not set,
7730af732203SDimitry Andric // the first met operation != reduction operation is considered as the
7731af732203SDimitry Andric // leaf opcode.
7732af732203SDimitry Andric // Only handle trees in the current basic block.
77330b57cec5SDimitry Andric // Each tree node needs to have minimal number of users except for the
77340b57cec5SDimitry Andric // ultimate reduction.
7735af732203SDimitry Andric const bool IsRdxInst = EdgeRdxKind == RdxKind;
77365f7ddb14SDimitry Andric if (EdgeInst != Phi && EdgeInst != Inst &&
77375f7ddb14SDimitry Andric hasSameParent(EdgeInst, Inst->getParent()) &&
77385f7ddb14SDimitry Andric hasRequiredNumberOfUses(isCmpSelMinMax(Inst), EdgeInst) &&
77395f7ddb14SDimitry Andric (!LeafOpcode || LeafOpcode == EdgeInst->getOpcode() || IsRdxInst)) {
7740af732203SDimitry Andric if (IsRdxInst) {
77410b57cec5SDimitry Andric // We need to be able to reassociate the reduction operations.
77425f7ddb14SDimitry Andric if (!isVectorizable(EdgeRdxKind, EdgeInst)) {
77430b57cec5SDimitry Andric // I is an extra argument for TreeN (its parent operation).
77445f7ddb14SDimitry Andric markExtraArg(Stack.back(), EdgeInst);
77450b57cec5SDimitry Andric continue;
77460b57cec5SDimitry Andric }
7747af732203SDimitry Andric } else if (!LeafOpcode) {
77485f7ddb14SDimitry Andric LeafOpcode = EdgeInst->getOpcode();
7749af732203SDimitry Andric }
77505f7ddb14SDimitry Andric Stack.push_back(
77515f7ddb14SDimitry Andric std::make_pair(EdgeInst, getFirstOperandIndex(EdgeInst)));
7752af732203SDimitry Andric continue;
7753af732203SDimitry Andric }
77540b57cec5SDimitry Andric // I is an extra argument for TreeN (its parent operation).
77555f7ddb14SDimitry Andric markExtraArg(Stack.back(), EdgeInst);
77560b57cec5SDimitry Andric }
77570b57cec5SDimitry Andric return true;
77580b57cec5SDimitry Andric }
77590b57cec5SDimitry Andric
7760af732203SDimitry Andric /// Attempt to vectorize the tree found by matchAssociativeReduction.
tryToReduce(BoUpSLP & V,TargetTransformInfo * TTI)77610b57cec5SDimitry Andric bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
7762af732203SDimitry Andric // If there are a sufficient number of reduction values, reduce
7763af732203SDimitry Andric // to a nearby power-of-2. We can safely generate oversized
77640b57cec5SDimitry Andric // vectors and rely on the backend to split them to legal sizes.
77650b57cec5SDimitry Andric unsigned NumReducedVals = ReducedVals.size();
77660b57cec5SDimitry Andric if (NumReducedVals < 4)
77670b57cec5SDimitry Andric return false;
77680b57cec5SDimitry Andric
7769af732203SDimitry Andric // Intersect the fast-math-flags from all reduction operations.
7770af732203SDimitry Andric FastMathFlags RdxFMF;
7771af732203SDimitry Andric RdxFMF.set();
7772af732203SDimitry Andric for (ReductionOpsType &RdxOp : ReductionOps) {
7773af732203SDimitry Andric for (Value *RdxVal : RdxOp) {
7774af732203SDimitry Andric if (auto *FPMO = dyn_cast<FPMathOperator>(RdxVal))
7775af732203SDimitry Andric RdxFMF &= FPMO->getFastMathFlags();
7776af732203SDimitry Andric }
7777af732203SDimitry Andric }
77780b57cec5SDimitry Andric
77790b57cec5SDimitry Andric IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
7780af732203SDimitry Andric Builder.setFastMathFlags(RdxFMF);
77810b57cec5SDimitry Andric
77820b57cec5SDimitry Andric BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
7783af732203SDimitry Andric // The same extra argument may be used several times, so log each attempt
77840b57cec5SDimitry Andric // to use it.
7785af732203SDimitry Andric for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
77860b57cec5SDimitry Andric assert(Pair.first && "DebugLoc must be set.");
77870b57cec5SDimitry Andric ExternallyUsedValues[Pair.second].push_back(Pair.first);
77880b57cec5SDimitry Andric }
7789480093f4SDimitry Andric
7790480093f4SDimitry Andric // The compare instruction of a min/max is the insertion point for new
7791480093f4SDimitry Andric // instructions and may be replaced with a new compare instruction.
7792480093f4SDimitry Andric auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
7793480093f4SDimitry Andric assert(isa<SelectInst>(RdxRootInst) &&
7794480093f4SDimitry Andric "Expected min/max reduction to have select root instruction");
7795480093f4SDimitry Andric Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
7796480093f4SDimitry Andric assert(isa<Instruction>(ScalarCond) &&
7797480093f4SDimitry Andric "Expected min/max reduction to have compare condition");
7798480093f4SDimitry Andric return cast<Instruction>(ScalarCond);
7799480093f4SDimitry Andric };
7800480093f4SDimitry Andric
78010b57cec5SDimitry Andric // The reduction root is used as the insertion point for new instructions,
78020b57cec5SDimitry Andric // so set it as externally used to prevent it from being deleted.
78030b57cec5SDimitry Andric ExternallyUsedValues[ReductionRoot];
78040b57cec5SDimitry Andric SmallVector<Value *, 16> IgnoreList;
7805af732203SDimitry Andric for (ReductionOpsType &RdxOp : ReductionOps)
7806af732203SDimitry Andric IgnoreList.append(RdxOp.begin(), RdxOp.end());
7807af732203SDimitry Andric
7808af732203SDimitry Andric unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
7809af732203SDimitry Andric if (NumReducedVals > ReduxWidth) {
7810af732203SDimitry Andric // In the loop below, we are building a tree based on a window of
7811af732203SDimitry Andric // 'ReduxWidth' values.
7812af732203SDimitry Andric // If the operands of those values have common traits (compare predicate,
7813af732203SDimitry Andric // constant operand, etc), then we want to group those together to
7814af732203SDimitry Andric // minimize the cost of the reduction.
7815af732203SDimitry Andric
7816af732203SDimitry Andric // TODO: This should be extended to count common operands for
7817af732203SDimitry Andric // compares and binops.
7818af732203SDimitry Andric
7819af732203SDimitry Andric // Step 1: Count the number of times each compare predicate occurs.
7820af732203SDimitry Andric SmallDenseMap<unsigned, unsigned> PredCountMap;
7821af732203SDimitry Andric for (Value *RdxVal : ReducedVals) {
7822af732203SDimitry Andric CmpInst::Predicate Pred;
7823af732203SDimitry Andric if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value())))
7824af732203SDimitry Andric ++PredCountMap[Pred];
7825af732203SDimitry Andric }
7826af732203SDimitry Andric // Step 2: Sort the values so the most common predicates come first.
7827af732203SDimitry Andric stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) {
7828af732203SDimitry Andric CmpInst::Predicate PredA, PredB;
7829af732203SDimitry Andric if (match(A, m_Cmp(PredA, m_Value(), m_Value())) &&
7830af732203SDimitry Andric match(B, m_Cmp(PredB, m_Value(), m_Value()))) {
7831af732203SDimitry Andric return PredCountMap[PredA] > PredCountMap[PredB];
7832af732203SDimitry Andric }
7833af732203SDimitry Andric return false;
7834af732203SDimitry Andric });
7835af732203SDimitry Andric }
7836af732203SDimitry Andric
7837af732203SDimitry Andric Value *VectorizedTree = nullptr;
7838af732203SDimitry Andric unsigned i = 0;
78390b57cec5SDimitry Andric while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
7840af732203SDimitry Andric ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
78410b57cec5SDimitry Andric V.buildTree(VL, ExternallyUsedValues, IgnoreList);
78420b57cec5SDimitry Andric Optional<ArrayRef<unsigned>> Order = V.bestOrder();
7843af732203SDimitry Andric if (Order) {
7844af732203SDimitry Andric assert(Order->size() == VL.size() &&
7845af732203SDimitry Andric "Order size must be the same as number of vectorized "
7846af732203SDimitry Andric "instructions.");
78470b57cec5SDimitry Andric // TODO: reorder tree nodes without tree rebuilding.
78480b57cec5SDimitry Andric SmallVector<Value *, 4> ReorderedOps(VL.size());
78495f7ddb14SDimitry Andric transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
78500b57cec5SDimitry Andric [VL](const unsigned Idx) { return VL[Idx]; });
78510b57cec5SDimitry Andric V.buildTree(ReorderedOps, ExternallyUsedValues, IgnoreList);
78520b57cec5SDimitry Andric }
78530b57cec5SDimitry Andric if (V.isTreeTinyAndNotFullyVectorizable())
78540b57cec5SDimitry Andric break;
7855af732203SDimitry Andric if (V.isLoadCombineReductionCandidate(RdxKind))
78568bcb0991SDimitry Andric break;
78570b57cec5SDimitry Andric
78585f7ddb14SDimitry Andric // For a poison-safe boolean logic reduction, do not replace select
78595f7ddb14SDimitry Andric // instructions with logic ops. All reduced values will be frozen (see
78605f7ddb14SDimitry Andric // below) to prevent leaking poison.
78615f7ddb14SDimitry Andric if (isa<SelectInst>(ReductionRoot) &&
78625f7ddb14SDimitry Andric isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
78635f7ddb14SDimitry Andric NumReducedVals != ReduxWidth)
78645f7ddb14SDimitry Andric break;
78655f7ddb14SDimitry Andric
78660b57cec5SDimitry Andric V.computeMinimumValueSizes();
78670b57cec5SDimitry Andric
78680b57cec5SDimitry Andric // Estimate cost.
78695f7ddb14SDimitry Andric InstructionCost TreeCost =
78705f7ddb14SDimitry Andric V.getTreeCost(makeArrayRef(&ReducedVals[i], ReduxWidth));
7871af732203SDimitry Andric InstructionCost ReductionCost =
78725f7ddb14SDimitry Andric getReductionCost(TTI, ReducedVals[i], ReduxWidth, RdxFMF);
7873af732203SDimitry Andric InstructionCost Cost = TreeCost + ReductionCost;
7874af732203SDimitry Andric if (!Cost.isValid()) {
7875af732203SDimitry Andric LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
7876af732203SDimitry Andric return false;
7877af732203SDimitry Andric }
78780b57cec5SDimitry Andric if (Cost >= -SLPCostThreshold) {
78790b57cec5SDimitry Andric V.getORE()->emit([&]() {
7880af732203SDimitry Andric return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
7881af732203SDimitry Andric cast<Instruction>(VL[0]))
78820b57cec5SDimitry Andric << "Vectorizing horizontal reduction is possible"
7883af732203SDimitry Andric << "but not beneficial with cost " << ore::NV("Cost", Cost)
7884af732203SDimitry Andric << " and threshold "
78850b57cec5SDimitry Andric << ore::NV("Threshold", -SLPCostThreshold);
78860b57cec5SDimitry Andric });
78870b57cec5SDimitry Andric break;
78880b57cec5SDimitry Andric }
78890b57cec5SDimitry Andric
78900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
78910b57cec5SDimitry Andric << Cost << ". (HorRdx)\n");
78920b57cec5SDimitry Andric V.getORE()->emit([&]() {
7893af732203SDimitry Andric return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
7894af732203SDimitry Andric cast<Instruction>(VL[0]))
78950b57cec5SDimitry Andric << "Vectorized horizontal reduction with cost "
78960b57cec5SDimitry Andric << ore::NV("Cost", Cost) << " and with tree size "
78970b57cec5SDimitry Andric << ore::NV("TreeSize", V.getTreeSize());
78980b57cec5SDimitry Andric });
78990b57cec5SDimitry Andric
79000b57cec5SDimitry Andric // Vectorize a tree.
79010b57cec5SDimitry Andric DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
79020b57cec5SDimitry Andric Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
79030b57cec5SDimitry Andric
7904af732203SDimitry Andric // Emit a reduction. If the root is a select (min/max idiom), the insert
7905480093f4SDimitry Andric // point is the compare condition of that select.
7906480093f4SDimitry Andric Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
79075f7ddb14SDimitry Andric if (isCmpSelMinMax(RdxRootInst))
7908480093f4SDimitry Andric Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
7909480093f4SDimitry Andric else
7910480093f4SDimitry Andric Builder.SetInsertPoint(RdxRootInst);
7911480093f4SDimitry Andric
79125f7ddb14SDimitry Andric // To prevent poison from leaking across what used to be sequential, safe,
79135f7ddb14SDimitry Andric // scalar boolean logic operations, the reduction operand must be frozen.
79145f7ddb14SDimitry Andric if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
79155f7ddb14SDimitry Andric VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
79165f7ddb14SDimitry Andric
79170b57cec5SDimitry Andric Value *ReducedSubTree =
79180b57cec5SDimitry Andric emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
7919af732203SDimitry Andric
7920af732203SDimitry Andric if (!VectorizedTree) {
7921af732203SDimitry Andric // Initialize the final value in the reduction.
79220b57cec5SDimitry Andric VectorizedTree = ReducedSubTree;
7923af732203SDimitry Andric } else {
7924af732203SDimitry Andric // Update the final value in the reduction.
7925af732203SDimitry Andric Builder.SetCurrentDebugLocation(Loc);
7926af732203SDimitry Andric VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
7927af732203SDimitry Andric ReducedSubTree, "op.rdx", ReductionOps);
7928af732203SDimitry Andric }
79290b57cec5SDimitry Andric i += ReduxWidth;
79300b57cec5SDimitry Andric ReduxWidth = PowerOf2Floor(NumReducedVals - i);
79310b57cec5SDimitry Andric }
79320b57cec5SDimitry Andric
79330b57cec5SDimitry Andric if (VectorizedTree) {
79340b57cec5SDimitry Andric // Finish the reduction.
79350b57cec5SDimitry Andric for (; i < NumReducedVals; ++i) {
79360b57cec5SDimitry Andric auto *I = cast<Instruction>(ReducedVals[i]);
79370b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(I->getDebugLoc());
7938af732203SDimitry Andric VectorizedTree =
7939af732203SDimitry Andric createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps);
79400b57cec5SDimitry Andric }
79410b57cec5SDimitry Andric for (auto &Pair : ExternallyUsedValues) {
79420b57cec5SDimitry Andric // Add each externally used value to the final reduction.
79430b57cec5SDimitry Andric for (auto *I : Pair.second) {
79440b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(I->getDebugLoc());
7945af732203SDimitry Andric VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
7946af732203SDimitry Andric Pair.first, "op.extra", I);
79470b57cec5SDimitry Andric }
79480b57cec5SDimitry Andric }
7949480093f4SDimitry Andric
79500b57cec5SDimitry Andric ReductionRoot->replaceAllUsesWith(VectorizedTree);
7951480093f4SDimitry Andric
79528bcb0991SDimitry Andric // Mark all scalar reduction ops for deletion, they are replaced by the
79538bcb0991SDimitry Andric // vector reductions.
79548bcb0991SDimitry Andric V.eraseInstructions(IgnoreList);
79550b57cec5SDimitry Andric }
79560b57cec5SDimitry Andric return VectorizedTree != nullptr;
79570b57cec5SDimitry Andric }
79580b57cec5SDimitry Andric
numReductionValues() const7959af732203SDimitry Andric unsigned numReductionValues() const { return ReducedVals.size(); }
79600b57cec5SDimitry Andric
79610b57cec5SDimitry Andric private:
79620b57cec5SDimitry Andric /// Calculate the cost of a reduction.
getReductionCost(TargetTransformInfo * TTI,Value * FirstReducedVal,unsigned ReduxWidth,FastMathFlags FMF)7963af732203SDimitry Andric InstructionCost getReductionCost(TargetTransformInfo *TTI,
79645f7ddb14SDimitry Andric Value *FirstReducedVal, unsigned ReduxWidth,
79655f7ddb14SDimitry Andric FastMathFlags FMF) {
79660b57cec5SDimitry Andric Type *ScalarTy = FirstReducedVal->getType();
7967af732203SDimitry Andric FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
7968af732203SDimitry Andric InstructionCost VectorCost, ScalarCost;
7969af732203SDimitry Andric switch (RdxKind) {
7970af732203SDimitry Andric case RecurKind::Add:
7971af732203SDimitry Andric case RecurKind::Mul:
7972af732203SDimitry Andric case RecurKind::Or:
7973af732203SDimitry Andric case RecurKind::And:
7974af732203SDimitry Andric case RecurKind::Xor:
7975af732203SDimitry Andric case RecurKind::FAdd:
7976af732203SDimitry Andric case RecurKind::FMul: {
7977af732203SDimitry Andric unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
79785f7ddb14SDimitry Andric VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF);
7979af732203SDimitry Andric ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
79800b57cec5SDimitry Andric break;
79810b57cec5SDimitry Andric }
7982af732203SDimitry Andric case RecurKind::FMax:
7983af732203SDimitry Andric case RecurKind::FMin: {
7984af732203SDimitry Andric auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
79855f7ddb14SDimitry Andric VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
79865f7ddb14SDimitry Andric /*unsigned=*/false);
7987af732203SDimitry Andric ScalarCost =
7988af732203SDimitry Andric TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) +
79890b57cec5SDimitry Andric TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
79900b57cec5SDimitry Andric CmpInst::makeCmpResultType(ScalarTy));
79910b57cec5SDimitry Andric break;
7992af732203SDimitry Andric }
7993af732203SDimitry Andric case RecurKind::SMax:
7994af732203SDimitry Andric case RecurKind::SMin:
7995af732203SDimitry Andric case RecurKind::UMax:
7996af732203SDimitry Andric case RecurKind::UMin: {
7997af732203SDimitry Andric auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
7998af732203SDimitry Andric bool IsUnsigned =
7999af732203SDimitry Andric RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
80005f7ddb14SDimitry Andric VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned);
8001af732203SDimitry Andric ScalarCost =
8002af732203SDimitry Andric TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) +
8003af732203SDimitry Andric TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
8004af732203SDimitry Andric CmpInst::makeCmpResultType(ScalarTy));
8005af732203SDimitry Andric break;
8006af732203SDimitry Andric }
8007af732203SDimitry Andric default:
80080b57cec5SDimitry Andric llvm_unreachable("Expected arithmetic or min/max reduction operation");
80090b57cec5SDimitry Andric }
80100b57cec5SDimitry Andric
8011af732203SDimitry Andric // Scalar cost is repeated for N-1 elements.
8012af732203SDimitry Andric ScalarCost *= (ReduxWidth - 1);
8013af732203SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
80140b57cec5SDimitry Andric << " for reduction that starts with " << *FirstReducedVal
8015af732203SDimitry Andric << " (It is a splitting reduction)\n");
8016af732203SDimitry Andric return VectorCost - ScalarCost;
80170b57cec5SDimitry Andric }
80180b57cec5SDimitry Andric
80190b57cec5SDimitry Andric /// Emit a horizontal reduction of the vectorized value.
emitReduction(Value * VectorizedValue,IRBuilder<> & Builder,unsigned ReduxWidth,const TargetTransformInfo * TTI)80200b57cec5SDimitry Andric Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder,
80210b57cec5SDimitry Andric unsigned ReduxWidth, const TargetTransformInfo *TTI) {
80220b57cec5SDimitry Andric assert(VectorizedValue && "Need to have a vectorized tree node");
80230b57cec5SDimitry Andric assert(isPowerOf2_32(ReduxWidth) &&
80240b57cec5SDimitry Andric "We only handle power-of-two reductions for now");
80250b57cec5SDimitry Andric
8026af732203SDimitry Andric return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
8027af732203SDimitry Andric ReductionOps.back());
80280b57cec5SDimitry Andric }
80290b57cec5SDimitry Andric };
80300b57cec5SDimitry Andric
80310b57cec5SDimitry Andric } // end anonymous namespace
80320b57cec5SDimitry Andric
getAggregateSize(Instruction * InsertInst)8033af732203SDimitry Andric static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
8034af732203SDimitry Andric if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
8035af732203SDimitry Andric return cast<FixedVectorType>(IE->getType())->getNumElements();
8036af732203SDimitry Andric
8037af732203SDimitry Andric unsigned AggregateSize = 1;
8038af732203SDimitry Andric auto *IV = cast<InsertValueInst>(InsertInst);
8039af732203SDimitry Andric Type *CurrentType = IV->getType();
8040af732203SDimitry Andric do {
8041af732203SDimitry Andric if (auto *ST = dyn_cast<StructType>(CurrentType)) {
8042af732203SDimitry Andric for (auto *Elt : ST->elements())
8043af732203SDimitry Andric if (Elt != ST->getElementType(0)) // check homogeneity
8044af732203SDimitry Andric return None;
8045af732203SDimitry Andric AggregateSize *= ST->getNumElements();
8046af732203SDimitry Andric CurrentType = ST->getElementType(0);
8047af732203SDimitry Andric } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
8048af732203SDimitry Andric AggregateSize *= AT->getNumElements();
8049af732203SDimitry Andric CurrentType = AT->getElementType();
8050af732203SDimitry Andric } else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
8051af732203SDimitry Andric AggregateSize *= VT->getNumElements();
8052af732203SDimitry Andric return AggregateSize;
8053af732203SDimitry Andric } else if (CurrentType->isSingleValueType()) {
8054af732203SDimitry Andric return AggregateSize;
8055af732203SDimitry Andric } else {
8056af732203SDimitry Andric return None;
8057af732203SDimitry Andric }
8058af732203SDimitry Andric } while (true);
8059af732203SDimitry Andric }
8060af732203SDimitry Andric
findBuildAggregate_rec(Instruction * LastInsertInst,TargetTransformInfo * TTI,SmallVectorImpl<Value * > & BuildVectorOpds,SmallVectorImpl<Value * > & InsertElts,unsigned OperandOffset)8061af732203SDimitry Andric static bool findBuildAggregate_rec(Instruction *LastInsertInst,
8062af732203SDimitry Andric TargetTransformInfo *TTI,
8063af732203SDimitry Andric SmallVectorImpl<Value *> &BuildVectorOpds,
8064af732203SDimitry Andric SmallVectorImpl<Value *> &InsertElts,
8065af732203SDimitry Andric unsigned OperandOffset) {
8066af732203SDimitry Andric do {
8067af732203SDimitry Andric Value *InsertedOperand = LastInsertInst->getOperand(1);
80685f7ddb14SDimitry Andric Optional<int> OperandIndex = getInsertIndex(LastInsertInst, OperandOffset);
8069af732203SDimitry Andric if (!OperandIndex)
8070af732203SDimitry Andric return false;
8071af732203SDimitry Andric if (isa<InsertElementInst>(InsertedOperand) ||
8072af732203SDimitry Andric isa<InsertValueInst>(InsertedOperand)) {
8073af732203SDimitry Andric if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
8074af732203SDimitry Andric BuildVectorOpds, InsertElts, *OperandIndex))
8075af732203SDimitry Andric return false;
8076af732203SDimitry Andric } else {
8077af732203SDimitry Andric BuildVectorOpds[*OperandIndex] = InsertedOperand;
8078af732203SDimitry Andric InsertElts[*OperandIndex] = LastInsertInst;
8079af732203SDimitry Andric }
8080af732203SDimitry Andric LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
8081af732203SDimitry Andric } while (LastInsertInst != nullptr &&
8082af732203SDimitry Andric (isa<InsertValueInst>(LastInsertInst) ||
8083af732203SDimitry Andric isa<InsertElementInst>(LastInsertInst)) &&
8084af732203SDimitry Andric LastInsertInst->hasOneUse());
80855f7ddb14SDimitry Andric return true;
8086af732203SDimitry Andric }
8087af732203SDimitry Andric
80880b57cec5SDimitry Andric /// Recognize construction of vectors like
8089af732203SDimitry Andric /// %ra = insertelement <4 x float> poison, float %s0, i32 0
80900b57cec5SDimitry Andric /// %rb = insertelement <4 x float> %ra, float %s1, i32 1
80910b57cec5SDimitry Andric /// %rc = insertelement <4 x float> %rb, float %s2, i32 2
80920b57cec5SDimitry Andric /// %rd = insertelement <4 x float> %rc, float %s3, i32 3
8093480093f4SDimitry Andric /// starting from the last insertelement or insertvalue instruction.
80940b57cec5SDimitry Andric ///
8095af732203SDimitry Andric /// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
8096480093f4SDimitry Andric /// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
8097480093f4SDimitry Andric /// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
8098480093f4SDimitry Andric ///
8099480093f4SDimitry Andric /// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
81000b57cec5SDimitry Andric ///
81010b57cec5SDimitry Andric /// \return true if it matches.
findBuildAggregate(Instruction * LastInsertInst,TargetTransformInfo * TTI,SmallVectorImpl<Value * > & BuildVectorOpds,SmallVectorImpl<Value * > & InsertElts)8102af732203SDimitry Andric static bool findBuildAggregate(Instruction *LastInsertInst,
8103af732203SDimitry Andric TargetTransformInfo *TTI,
8104480093f4SDimitry Andric SmallVectorImpl<Value *> &BuildVectorOpds,
81055ffd83dbSDimitry Andric SmallVectorImpl<Value *> &InsertElts) {
8106af732203SDimitry Andric
8107480093f4SDimitry Andric assert((isa<InsertElementInst>(LastInsertInst) ||
8108480093f4SDimitry Andric isa<InsertValueInst>(LastInsertInst)) &&
8109480093f4SDimitry Andric "Expected insertelement or insertvalue instruction!");
8110af732203SDimitry Andric
8111af732203SDimitry Andric assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
8112af732203SDimitry Andric "Expected empty result vectors!");
8113af732203SDimitry Andric
8114af732203SDimitry Andric Optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
8115af732203SDimitry Andric if (!AggregateSize)
8116480093f4SDimitry Andric return false;
8117af732203SDimitry Andric BuildVectorOpds.resize(*AggregateSize);
8118af732203SDimitry Andric InsertElts.resize(*AggregateSize);
8119af732203SDimitry Andric
8120af732203SDimitry Andric if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
8121af732203SDimitry Andric 0)) {
8122af732203SDimitry Andric llvm::erase_value(BuildVectorOpds, nullptr);
8123af732203SDimitry Andric llvm::erase_value(InsertElts, nullptr);
8124af732203SDimitry Andric if (BuildVectorOpds.size() >= 2)
81250b57cec5SDimitry Andric return true;
81260b57cec5SDimitry Andric }
81270b57cec5SDimitry Andric
8128af732203SDimitry Andric return false;
8129af732203SDimitry Andric }
8130af732203SDimitry Andric
81310b57cec5SDimitry Andric /// Try and get a reduction value from a phi node.
81320b57cec5SDimitry Andric ///
81330b57cec5SDimitry Andric /// Given a phi node \p P in a block \p ParentBB, consider possible reductions
81340b57cec5SDimitry Andric /// if they come from either \p ParentBB or a containing loop latch.
81350b57cec5SDimitry Andric ///
81360b57cec5SDimitry Andric /// \returns A candidate reduction value if possible, or \code nullptr \endcode
81370b57cec5SDimitry Andric /// if not possible.
getReductionValue(const DominatorTree * DT,PHINode * P,BasicBlock * ParentBB,LoopInfo * LI)81380b57cec5SDimitry Andric static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
81390b57cec5SDimitry Andric BasicBlock *ParentBB, LoopInfo *LI) {
81400b57cec5SDimitry Andric // There are situations where the reduction value is not dominated by the
81410b57cec5SDimitry Andric // reduction phi. Vectorizing such cases has been reported to cause
81420b57cec5SDimitry Andric // miscompiles. See PR25787.
81430b57cec5SDimitry Andric auto DominatedReduxValue = [&](Value *R) {
81440b57cec5SDimitry Andric return isa<Instruction>(R) &&
81450b57cec5SDimitry Andric DT->dominates(P->getParent(), cast<Instruction>(R)->getParent());
81460b57cec5SDimitry Andric };
81470b57cec5SDimitry Andric
81480b57cec5SDimitry Andric Value *Rdx = nullptr;
81490b57cec5SDimitry Andric
81500b57cec5SDimitry Andric // Return the incoming value if it comes from the same BB as the phi node.
81510b57cec5SDimitry Andric if (P->getIncomingBlock(0) == ParentBB) {
81520b57cec5SDimitry Andric Rdx = P->getIncomingValue(0);
81530b57cec5SDimitry Andric } else if (P->getIncomingBlock(1) == ParentBB) {
81540b57cec5SDimitry Andric Rdx = P->getIncomingValue(1);
81550b57cec5SDimitry Andric }
81560b57cec5SDimitry Andric
81570b57cec5SDimitry Andric if (Rdx && DominatedReduxValue(Rdx))
81580b57cec5SDimitry Andric return Rdx;
81590b57cec5SDimitry Andric
81600b57cec5SDimitry Andric // Otherwise, check whether we have a loop latch to look at.
81610b57cec5SDimitry Andric Loop *BBL = LI->getLoopFor(ParentBB);
81620b57cec5SDimitry Andric if (!BBL)
81630b57cec5SDimitry Andric return nullptr;
81640b57cec5SDimitry Andric BasicBlock *BBLatch = BBL->getLoopLatch();
81650b57cec5SDimitry Andric if (!BBLatch)
81660b57cec5SDimitry Andric return nullptr;
81670b57cec5SDimitry Andric
81680b57cec5SDimitry Andric // There is a loop latch, return the incoming value if it comes from
81690b57cec5SDimitry Andric // that. This reduction pattern occasionally turns up.
81700b57cec5SDimitry Andric if (P->getIncomingBlock(0) == BBLatch) {
81710b57cec5SDimitry Andric Rdx = P->getIncomingValue(0);
81720b57cec5SDimitry Andric } else if (P->getIncomingBlock(1) == BBLatch) {
81730b57cec5SDimitry Andric Rdx = P->getIncomingValue(1);
81740b57cec5SDimitry Andric }
81750b57cec5SDimitry Andric
81760b57cec5SDimitry Andric if (Rdx && DominatedReduxValue(Rdx))
81770b57cec5SDimitry Andric return Rdx;
81780b57cec5SDimitry Andric
81790b57cec5SDimitry Andric return nullptr;
81800b57cec5SDimitry Andric }
81810b57cec5SDimitry Andric
matchRdxBop(Instruction * I,Value * & V0,Value * & V1)8182af732203SDimitry Andric static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
8183af732203SDimitry Andric if (match(I, m_BinOp(m_Value(V0), m_Value(V1))))
8184af732203SDimitry Andric return true;
8185af732203SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(V0), m_Value(V1))))
8186af732203SDimitry Andric return true;
8187af732203SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(V0), m_Value(V1))))
8188af732203SDimitry Andric return true;
81895f7ddb14SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::smax>(m_Value(V0), m_Value(V1))))
81905f7ddb14SDimitry Andric return true;
81915f7ddb14SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::smin>(m_Value(V0), m_Value(V1))))
81925f7ddb14SDimitry Andric return true;
81935f7ddb14SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::umax>(m_Value(V0), m_Value(V1))))
81945f7ddb14SDimitry Andric return true;
81955f7ddb14SDimitry Andric if (match(I, m_Intrinsic<Intrinsic::umin>(m_Value(V0), m_Value(V1))))
81965f7ddb14SDimitry Andric return true;
8197af732203SDimitry Andric return false;
8198af732203SDimitry Andric }
8199af732203SDimitry Andric
82000b57cec5SDimitry Andric /// Attempt to reduce a horizontal reduction.
82010b57cec5SDimitry Andric /// If it is legal to match a horizontal reduction feeding the phi node \a P
82020b57cec5SDimitry Andric /// with reduction operators \a Root (or one of its operands) in a basic block
82030b57cec5SDimitry Andric /// \a BB, then check if it can be done. If horizontal reduction is not found
82040b57cec5SDimitry Andric /// and root instruction is a binary operation, vectorization of the operands is
82050b57cec5SDimitry Andric /// attempted.
82060b57cec5SDimitry Andric /// \returns true if a horizontal reduction was matched and reduced or operands
82070b57cec5SDimitry Andric /// of one of the binary instruction were vectorized.
82080b57cec5SDimitry Andric /// \returns false if a horizontal reduction was not matched (or not possible)
82090b57cec5SDimitry Andric /// or no vectorization of any binary operation feeding \a Root instruction was
82100b57cec5SDimitry Andric /// performed.
tryToVectorizeHorReductionOrInstOperands(PHINode * P,Instruction * Root,BasicBlock * BB,BoUpSLP & R,TargetTransformInfo * TTI,const function_ref<bool (Instruction *,BoUpSLP &)> Vectorize)82110b57cec5SDimitry Andric static bool tryToVectorizeHorReductionOrInstOperands(
82120b57cec5SDimitry Andric PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
82130b57cec5SDimitry Andric TargetTransformInfo *TTI,
82140b57cec5SDimitry Andric const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
82150b57cec5SDimitry Andric if (!ShouldVectorizeHor)
82160b57cec5SDimitry Andric return false;
82170b57cec5SDimitry Andric
82180b57cec5SDimitry Andric if (!Root)
82190b57cec5SDimitry Andric return false;
82200b57cec5SDimitry Andric
82210b57cec5SDimitry Andric if (Root->getParent() != BB || isa<PHINode>(Root))
82220b57cec5SDimitry Andric return false;
82230b57cec5SDimitry Andric // Start analysis starting from Root instruction. If horizontal reduction is
82240b57cec5SDimitry Andric // found, try to vectorize it. If it is not a horizontal reduction or
82250b57cec5SDimitry Andric // vectorization is not possible or not effective, and currently analyzed
82260b57cec5SDimitry Andric // instruction is a binary operation, try to vectorize the operands, using
82270b57cec5SDimitry Andric // pre-order DFS traversal order. If the operands were not vectorized, repeat
82280b57cec5SDimitry Andric // the same procedure considering each operand as a possible root of the
82290b57cec5SDimitry Andric // horizontal reduction.
82300b57cec5SDimitry Andric // Interrupt the process if the Root instruction itself was vectorized or all
82310b57cec5SDimitry Andric // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
82325f7ddb14SDimitry Andric // Skip the analysis of CmpInsts.Compiler implements postanalysis of the
82335f7ddb14SDimitry Andric // CmpInsts so we can skip extra attempts in
82345f7ddb14SDimitry Andric // tryToVectorizeHorReductionOrInstOperands and save compile time.
82358bcb0991SDimitry Andric SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
82360b57cec5SDimitry Andric SmallPtrSet<Value *, 8> VisitedInstrs;
82370b57cec5SDimitry Andric bool Res = false;
82380b57cec5SDimitry Andric while (!Stack.empty()) {
82398bcb0991SDimitry Andric Instruction *Inst;
82400b57cec5SDimitry Andric unsigned Level;
82418bcb0991SDimitry Andric std::tie(Inst, Level) = Stack.pop_back_val();
82425f7ddb14SDimitry Andric // Do not try to analyze instruction that has already been vectorized.
82435f7ddb14SDimitry Andric // This may happen when we vectorize instruction operands on a previous
82445f7ddb14SDimitry Andric // iteration while stack was populated before that happened.
82455f7ddb14SDimitry Andric if (R.isDeleted(Inst))
82465f7ddb14SDimitry Andric continue;
8247af732203SDimitry Andric Value *B0, *B1;
8248af732203SDimitry Andric bool IsBinop = matchRdxBop(Inst, B0, B1);
8249af732203SDimitry Andric bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
8250af732203SDimitry Andric if (IsBinop || IsSelect) {
82510b57cec5SDimitry Andric HorizontalReduction HorRdx;
82520b57cec5SDimitry Andric if (HorRdx.matchAssociativeReduction(P, Inst)) {
82530b57cec5SDimitry Andric if (HorRdx.tryToReduce(R, TTI)) {
82540b57cec5SDimitry Andric Res = true;
82550b57cec5SDimitry Andric // Set P to nullptr to avoid re-analysis of phi node in
82560b57cec5SDimitry Andric // matchAssociativeReduction function unless this is the root node.
82570b57cec5SDimitry Andric P = nullptr;
82580b57cec5SDimitry Andric continue;
82590b57cec5SDimitry Andric }
82600b57cec5SDimitry Andric }
8261af732203SDimitry Andric if (P && IsBinop) {
8262af732203SDimitry Andric Inst = dyn_cast<Instruction>(B0);
82630b57cec5SDimitry Andric if (Inst == P)
8264af732203SDimitry Andric Inst = dyn_cast<Instruction>(B1);
82650b57cec5SDimitry Andric if (!Inst) {
82660b57cec5SDimitry Andric // Set P to nullptr to avoid re-analysis of phi node in
82670b57cec5SDimitry Andric // matchAssociativeReduction function unless this is the root node.
82680b57cec5SDimitry Andric P = nullptr;
82690b57cec5SDimitry Andric continue;
82700b57cec5SDimitry Andric }
82710b57cec5SDimitry Andric }
82720b57cec5SDimitry Andric }
82730b57cec5SDimitry Andric // Set P to nullptr to avoid re-analysis of phi node in
82740b57cec5SDimitry Andric // matchAssociativeReduction function unless this is the root node.
82750b57cec5SDimitry Andric P = nullptr;
82765f7ddb14SDimitry Andric // Do not try to vectorize CmpInst operands, this is done separately.
82775f7ddb14SDimitry Andric if (!isa<CmpInst>(Inst) && Vectorize(Inst, R)) {
82780b57cec5SDimitry Andric Res = true;
82790b57cec5SDimitry Andric continue;
82800b57cec5SDimitry Andric }
82810b57cec5SDimitry Andric
82820b57cec5SDimitry Andric // Try to vectorize operands.
82830b57cec5SDimitry Andric // Continue analysis for the instruction from the same basic block only to
82840b57cec5SDimitry Andric // save compile time.
82850b57cec5SDimitry Andric if (++Level < RecursionMaxDepth)
82860b57cec5SDimitry Andric for (auto *Op : Inst->operand_values())
82870b57cec5SDimitry Andric if (VisitedInstrs.insert(Op).second)
82880b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(Op))
82895f7ddb14SDimitry Andric // Do not try to vectorize CmpInst operands, this is done
82905f7ddb14SDimitry Andric // separately.
82915f7ddb14SDimitry Andric if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
82925f7ddb14SDimitry Andric I->getParent() == BB)
82938bcb0991SDimitry Andric Stack.emplace_back(I, Level);
82940b57cec5SDimitry Andric }
82950b57cec5SDimitry Andric return Res;
82960b57cec5SDimitry Andric }
82970b57cec5SDimitry Andric
vectorizeRootInstruction(PHINode * P,Value * V,BasicBlock * BB,BoUpSLP & R,TargetTransformInfo * TTI)82980b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
82990b57cec5SDimitry Andric BasicBlock *BB, BoUpSLP &R,
83000b57cec5SDimitry Andric TargetTransformInfo *TTI) {
8301af732203SDimitry Andric auto *I = dyn_cast_or_null<Instruction>(V);
83020b57cec5SDimitry Andric if (!I)
83030b57cec5SDimitry Andric return false;
83040b57cec5SDimitry Andric
83050b57cec5SDimitry Andric if (!isa<BinaryOperator>(I))
83060b57cec5SDimitry Andric P = nullptr;
83070b57cec5SDimitry Andric // Try to match and vectorize a horizontal reduction.
83080b57cec5SDimitry Andric auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
83090b57cec5SDimitry Andric return tryToVectorize(I, R);
83100b57cec5SDimitry Andric };
83110b57cec5SDimitry Andric return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
83120b57cec5SDimitry Andric ExtraVectorization);
83130b57cec5SDimitry Andric }
83140b57cec5SDimitry Andric
vectorizeInsertValueInst(InsertValueInst * IVI,BasicBlock * BB,BoUpSLP & R)83150b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
83160b57cec5SDimitry Andric BasicBlock *BB, BoUpSLP &R) {
83170b57cec5SDimitry Andric const DataLayout &DL = BB->getModule()->getDataLayout();
83180b57cec5SDimitry Andric if (!R.canMapToVector(IVI->getType(), DL))
83190b57cec5SDimitry Andric return false;
83200b57cec5SDimitry Andric
83210b57cec5SDimitry Andric SmallVector<Value *, 16> BuildVectorOpds;
83225ffd83dbSDimitry Andric SmallVector<Value *, 16> BuildVectorInsts;
8323af732203SDimitry Andric if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
83240b57cec5SDimitry Andric return false;
83250b57cec5SDimitry Andric
83260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
83270b57cec5SDimitry Andric // Aggregate value is unlikely to be processed in vector register, we need to
83280b57cec5SDimitry Andric // extract scalars into scalar registers, so NeedExtraction is set true.
83295f7ddb14SDimitry Andric return tryToVectorizeList(BuildVectorOpds, R, /*AllowReorder=*/false);
83300b57cec5SDimitry Andric }
83310b57cec5SDimitry Andric
vectorizeInsertElementInst(InsertElementInst * IEI,BasicBlock * BB,BoUpSLP & R)83320b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
83330b57cec5SDimitry Andric BasicBlock *BB, BoUpSLP &R) {
83345ffd83dbSDimitry Andric SmallVector<Value *, 16> BuildVectorInsts;
83350b57cec5SDimitry Andric SmallVector<Value *, 16> BuildVectorOpds;
83365f7ddb14SDimitry Andric SmallVector<int> Mask;
83375ffd83dbSDimitry Andric if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
83380b57cec5SDimitry Andric (llvm::all_of(BuildVectorOpds,
83390b57cec5SDimitry Andric [](Value *V) { return isa<ExtractElementInst>(V); }) &&
83405f7ddb14SDimitry Andric isShuffle(BuildVectorOpds, Mask)))
83410b57cec5SDimitry Andric return false;
83420b57cec5SDimitry Andric
83435f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");
83445f7ddb14SDimitry Andric return tryToVectorizeList(BuildVectorInsts, R, /*AllowReorder=*/true);
83450b57cec5SDimitry Andric }
83460b57cec5SDimitry Andric
vectorizeSimpleInstructions(SmallVectorImpl<Instruction * > & Instructions,BasicBlock * BB,BoUpSLP & R,bool AtTerminator)83470b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeSimpleInstructions(
83485f7ddb14SDimitry Andric SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
83495f7ddb14SDimitry Andric bool AtTerminator) {
83500b57cec5SDimitry Andric bool OpsChanged = false;
83515f7ddb14SDimitry Andric SmallVector<Instruction *, 4> PostponedCmps;
83528bcb0991SDimitry Andric for (auto *I : reverse(Instructions)) {
83538bcb0991SDimitry Andric if (R.isDeleted(I))
83540b57cec5SDimitry Andric continue;
83550b57cec5SDimitry Andric if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
83560b57cec5SDimitry Andric OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
83570b57cec5SDimitry Andric else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
83580b57cec5SDimitry Andric OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
83595f7ddb14SDimitry Andric else if (isa<CmpInst>(I))
83605f7ddb14SDimitry Andric PostponedCmps.push_back(I);
83615f7ddb14SDimitry Andric }
83625f7ddb14SDimitry Andric if (AtTerminator) {
83635f7ddb14SDimitry Andric // Try to find reductions first.
83645f7ddb14SDimitry Andric for (Instruction *I : PostponedCmps) {
83655f7ddb14SDimitry Andric if (R.isDeleted(I))
83665f7ddb14SDimitry Andric continue;
83675f7ddb14SDimitry Andric for (Value *Op : I->operands())
83685f7ddb14SDimitry Andric OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
83695f7ddb14SDimitry Andric }
83705f7ddb14SDimitry Andric // Try to vectorize operands as vector bundles.
83715f7ddb14SDimitry Andric for (Instruction *I : PostponedCmps) {
83725f7ddb14SDimitry Andric if (R.isDeleted(I))
83735f7ddb14SDimitry Andric continue;
83745f7ddb14SDimitry Andric OpsChanged |= tryToVectorize(I, R);
83750b57cec5SDimitry Andric }
83760b57cec5SDimitry Andric Instructions.clear();
83775f7ddb14SDimitry Andric } else {
83785f7ddb14SDimitry Andric // Insert in reverse order since the PostponedCmps vector was filled in
83795f7ddb14SDimitry Andric // reverse order.
83805f7ddb14SDimitry Andric Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
83815f7ddb14SDimitry Andric }
83820b57cec5SDimitry Andric return OpsChanged;
83830b57cec5SDimitry Andric }
83840b57cec5SDimitry Andric
vectorizeChainsInBlock(BasicBlock * BB,BoUpSLP & R)83850b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
83860b57cec5SDimitry Andric bool Changed = false;
83870b57cec5SDimitry Andric SmallVector<Value *, 4> Incoming;
83880b57cec5SDimitry Andric SmallPtrSet<Value *, 16> VisitedInstrs;
83895f7ddb14SDimitry Andric // Maps phi nodes to the non-phi nodes found in the use tree for each phi
83905f7ddb14SDimitry Andric // node. Allows better to identify the chains that can be vectorized in the
83915f7ddb14SDimitry Andric // better way.
83925f7ddb14SDimitry Andric DenseMap<Value *, SmallVector<Value *, 4>> PHIToOpcodes;
83930b57cec5SDimitry Andric
83940b57cec5SDimitry Andric bool HaveVectorizedPhiNodes = true;
83950b57cec5SDimitry Andric while (HaveVectorizedPhiNodes) {
83960b57cec5SDimitry Andric HaveVectorizedPhiNodes = false;
83970b57cec5SDimitry Andric
83980b57cec5SDimitry Andric // Collect the incoming values from the PHIs.
83990b57cec5SDimitry Andric Incoming.clear();
84000b57cec5SDimitry Andric for (Instruction &I : *BB) {
84010b57cec5SDimitry Andric PHINode *P = dyn_cast<PHINode>(&I);
84020b57cec5SDimitry Andric if (!P)
84030b57cec5SDimitry Andric break;
84040b57cec5SDimitry Andric
84055f7ddb14SDimitry Andric // No need to analyze deleted, vectorized and non-vectorizable
84065f7ddb14SDimitry Andric // instructions.
84075f7ddb14SDimitry Andric if (!VisitedInstrs.count(P) && !R.isDeleted(P) &&
84085f7ddb14SDimitry Andric isValidElementType(P->getType()))
84090b57cec5SDimitry Andric Incoming.push_back(P);
84100b57cec5SDimitry Andric }
84110b57cec5SDimitry Andric
84125f7ddb14SDimitry Andric // Find the corresponding non-phi nodes for better matching when trying to
84135f7ddb14SDimitry Andric // build the tree.
84145f7ddb14SDimitry Andric for (Value *V : Incoming) {
84155f7ddb14SDimitry Andric SmallVectorImpl<Value *> &Opcodes =
84165f7ddb14SDimitry Andric PHIToOpcodes.try_emplace(V).first->getSecond();
84175f7ddb14SDimitry Andric if (!Opcodes.empty())
84185f7ddb14SDimitry Andric continue;
84195f7ddb14SDimitry Andric SmallVector<Value *, 4> Nodes(1, V);
84205f7ddb14SDimitry Andric SmallPtrSet<Value *, 4> Visited;
84215f7ddb14SDimitry Andric while (!Nodes.empty()) {
84225f7ddb14SDimitry Andric auto *PHI = cast<PHINode>(Nodes.pop_back_val());
84235f7ddb14SDimitry Andric if (!Visited.insert(PHI).second)
84245f7ddb14SDimitry Andric continue;
84255f7ddb14SDimitry Andric for (Value *V : PHI->incoming_values()) {
84265f7ddb14SDimitry Andric if (auto *PHI1 = dyn_cast<PHINode>((V))) {
84275f7ddb14SDimitry Andric Nodes.push_back(PHI1);
84285f7ddb14SDimitry Andric continue;
84295f7ddb14SDimitry Andric }
84305f7ddb14SDimitry Andric Opcodes.emplace_back(V);
84315f7ddb14SDimitry Andric }
84325f7ddb14SDimitry Andric }
84335f7ddb14SDimitry Andric }
84345f7ddb14SDimitry Andric
84355f7ddb14SDimitry Andric // Sort by type, parent, operands.
84365f7ddb14SDimitry Andric stable_sort(Incoming, [this, &PHIToOpcodes](Value *V1, Value *V2) {
84375f7ddb14SDimitry Andric assert(isValidElementType(V1->getType()) &&
84385f7ddb14SDimitry Andric isValidElementType(V2->getType()) &&
84395f7ddb14SDimitry Andric "Expected vectorizable types only.");
84405f7ddb14SDimitry Andric // It is fine to compare type IDs here, since we expect only vectorizable
84415f7ddb14SDimitry Andric // types, like ints, floats and pointers, we don't care about other type.
84425f7ddb14SDimitry Andric if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
84435f7ddb14SDimitry Andric return true;
84445f7ddb14SDimitry Andric if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
84455f7ddb14SDimitry Andric return false;
84465f7ddb14SDimitry Andric ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
84475f7ddb14SDimitry Andric ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
84485f7ddb14SDimitry Andric if (Opcodes1.size() < Opcodes2.size())
84495f7ddb14SDimitry Andric return true;
84505f7ddb14SDimitry Andric if (Opcodes1.size() > Opcodes2.size())
84515f7ddb14SDimitry Andric return false;
84525f7ddb14SDimitry Andric for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
84535f7ddb14SDimitry Andric // Undefs are compatible with any other value.
84545f7ddb14SDimitry Andric if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
84555f7ddb14SDimitry Andric continue;
84565f7ddb14SDimitry Andric if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
84575f7ddb14SDimitry Andric if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
84585f7ddb14SDimitry Andric DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
84595f7ddb14SDimitry Andric DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
84605f7ddb14SDimitry Andric if (!NodeI1)
84615f7ddb14SDimitry Andric return NodeI2 != nullptr;
84625f7ddb14SDimitry Andric if (!NodeI2)
84635f7ddb14SDimitry Andric return false;
84645f7ddb14SDimitry Andric assert((NodeI1 == NodeI2) ==
84655f7ddb14SDimitry Andric (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
84665f7ddb14SDimitry Andric "Different nodes should have different DFS numbers");
84675f7ddb14SDimitry Andric if (NodeI1 != NodeI2)
84685f7ddb14SDimitry Andric return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
84695f7ddb14SDimitry Andric InstructionsState S = getSameOpcode({I1, I2});
84705f7ddb14SDimitry Andric if (S.getOpcode())
84715f7ddb14SDimitry Andric continue;
84725f7ddb14SDimitry Andric return I1->getOpcode() < I2->getOpcode();
84735f7ddb14SDimitry Andric }
84745f7ddb14SDimitry Andric if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
84755f7ddb14SDimitry Andric continue;
84765f7ddb14SDimitry Andric if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
84775f7ddb14SDimitry Andric return true;
84785f7ddb14SDimitry Andric if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
84795f7ddb14SDimitry Andric return false;
84805f7ddb14SDimitry Andric }
84815f7ddb14SDimitry Andric return false;
84825f7ddb14SDimitry Andric });
84835f7ddb14SDimitry Andric
84845f7ddb14SDimitry Andric auto &&AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
84855f7ddb14SDimitry Andric if (V1 == V2)
84865f7ddb14SDimitry Andric return true;
84875f7ddb14SDimitry Andric if (V1->getType() != V2->getType())
84885f7ddb14SDimitry Andric return false;
84895f7ddb14SDimitry Andric ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
84905f7ddb14SDimitry Andric ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
84915f7ddb14SDimitry Andric if (Opcodes1.size() != Opcodes2.size())
84925f7ddb14SDimitry Andric return false;
84935f7ddb14SDimitry Andric for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
84945f7ddb14SDimitry Andric // Undefs are compatible with any other value.
84955f7ddb14SDimitry Andric if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
84965f7ddb14SDimitry Andric continue;
84975f7ddb14SDimitry Andric if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
84985f7ddb14SDimitry Andric if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
84995f7ddb14SDimitry Andric if (I1->getParent() != I2->getParent())
85005f7ddb14SDimitry Andric return false;
85015f7ddb14SDimitry Andric InstructionsState S = getSameOpcode({I1, I2});
85025f7ddb14SDimitry Andric if (S.getOpcode())
85035f7ddb14SDimitry Andric continue;
85045f7ddb14SDimitry Andric return false;
85055f7ddb14SDimitry Andric }
85065f7ddb14SDimitry Andric if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
85075f7ddb14SDimitry Andric continue;
85085f7ddb14SDimitry Andric if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
85095f7ddb14SDimitry Andric return false;
85105f7ddb14SDimitry Andric }
85115f7ddb14SDimitry Andric return true;
85125f7ddb14SDimitry Andric };
85130b57cec5SDimitry Andric
85140b57cec5SDimitry Andric // Try to vectorize elements base on their type.
85155f7ddb14SDimitry Andric SmallVector<Value *, 4> Candidates;
85160b57cec5SDimitry Andric for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
85170b57cec5SDimitry Andric E = Incoming.end();
85180b57cec5SDimitry Andric IncIt != E;) {
85190b57cec5SDimitry Andric
85205f7ddb14SDimitry Andric // Look for the next elements with the same type, parent and operand
85215f7ddb14SDimitry Andric // kinds.
85220b57cec5SDimitry Andric SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
85235f7ddb14SDimitry Andric while (SameTypeIt != E && AreCompatiblePHIs(*SameTypeIt, *IncIt)) {
85240b57cec5SDimitry Andric VisitedInstrs.insert(*SameTypeIt);
85250b57cec5SDimitry Andric ++SameTypeIt;
85260b57cec5SDimitry Andric }
85270b57cec5SDimitry Andric
85280b57cec5SDimitry Andric // Try to vectorize them.
85290b57cec5SDimitry Andric unsigned NumElts = (SameTypeIt - IncIt);
85300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs ("
85310b57cec5SDimitry Andric << NumElts << ")\n");
85320b57cec5SDimitry Andric // The order in which the phi nodes appear in the program does not matter.
85330b57cec5SDimitry Andric // So allow tryToVectorizeList to reorder them if it is beneficial. This
85340b57cec5SDimitry Andric // is done when there are exactly two elements since tryToVectorizeList
85350b57cec5SDimitry Andric // asserts that there are only two values when AllowReorder is true.
85365f7ddb14SDimitry Andric if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
85375f7ddb14SDimitry Andric /*AllowReorder=*/true)) {
85380b57cec5SDimitry Andric // Success start over because instructions might have been changed.
85390b57cec5SDimitry Andric HaveVectorizedPhiNodes = true;
85400b57cec5SDimitry Andric Changed = true;
85415f7ddb14SDimitry Andric } else if (NumElts < 4 &&
85425f7ddb14SDimitry Andric (Candidates.empty() ||
85435f7ddb14SDimitry Andric Candidates.front()->getType() == (*IncIt)->getType())) {
85445f7ddb14SDimitry Andric Candidates.append(IncIt, std::next(IncIt, NumElts));
85455f7ddb14SDimitry Andric }
85465f7ddb14SDimitry Andric // Final attempt to vectorize phis with the same types.
85475f7ddb14SDimitry Andric if (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType()) {
85485f7ddb14SDimitry Andric if (Candidates.size() > 1 &&
85495f7ddb14SDimitry Andric tryToVectorizeList(Candidates, R, /*AllowReorder=*/true)) {
85505f7ddb14SDimitry Andric // Success start over because instructions might have been changed.
85515f7ddb14SDimitry Andric HaveVectorizedPhiNodes = true;
85525f7ddb14SDimitry Andric Changed = true;
85535f7ddb14SDimitry Andric }
85545f7ddb14SDimitry Andric Candidates.clear();
85550b57cec5SDimitry Andric }
85560b57cec5SDimitry Andric
85570b57cec5SDimitry Andric // Start over at the next instruction of a different type (or the end).
85580b57cec5SDimitry Andric IncIt = SameTypeIt;
85590b57cec5SDimitry Andric }
85600b57cec5SDimitry Andric }
85610b57cec5SDimitry Andric
85620b57cec5SDimitry Andric VisitedInstrs.clear();
85630b57cec5SDimitry Andric
85648bcb0991SDimitry Andric SmallVector<Instruction *, 8> PostProcessInstructions;
85650b57cec5SDimitry Andric SmallDenseSet<Instruction *, 4> KeyNodes;
85660b57cec5SDimitry Andric for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
8567af732203SDimitry Andric // Skip instructions with scalable type. The num of elements is unknown at
8568af732203SDimitry Andric // compile-time for scalable type.
8569af732203SDimitry Andric if (isa<ScalableVectorType>(it->getType()))
8570af732203SDimitry Andric continue;
8571af732203SDimitry Andric
85728bcb0991SDimitry Andric // Skip instructions marked for the deletion.
85738bcb0991SDimitry Andric if (R.isDeleted(&*it))
85748bcb0991SDimitry Andric continue;
85750b57cec5SDimitry Andric // We may go through BB multiple times so skip the one we have checked.
85760b57cec5SDimitry Andric if (!VisitedInstrs.insert(&*it).second) {
8577af732203SDimitry Andric if (it->use_empty() && KeyNodes.contains(&*it) &&
85785f7ddb14SDimitry Andric vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
85795f7ddb14SDimitry Andric it->isTerminator())) {
85800b57cec5SDimitry Andric // We would like to start over since some instructions are deleted
85810b57cec5SDimitry Andric // and the iterator may become invalid value.
85820b57cec5SDimitry Andric Changed = true;
85830b57cec5SDimitry Andric it = BB->begin();
85840b57cec5SDimitry Andric e = BB->end();
85850b57cec5SDimitry Andric }
85860b57cec5SDimitry Andric continue;
85870b57cec5SDimitry Andric }
85880b57cec5SDimitry Andric
85890b57cec5SDimitry Andric if (isa<DbgInfoIntrinsic>(it))
85900b57cec5SDimitry Andric continue;
85910b57cec5SDimitry Andric
85920b57cec5SDimitry Andric // Try to vectorize reductions that use PHINodes.
85930b57cec5SDimitry Andric if (PHINode *P = dyn_cast<PHINode>(it)) {
85940b57cec5SDimitry Andric // Check that the PHI is a reduction PHI.
8595af732203SDimitry Andric if (P->getNumIncomingValues() == 2) {
85960b57cec5SDimitry Andric // Try to match and vectorize a horizontal reduction.
85970b57cec5SDimitry Andric if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
85980b57cec5SDimitry Andric TTI)) {
85990b57cec5SDimitry Andric Changed = true;
86000b57cec5SDimitry Andric it = BB->begin();
86010b57cec5SDimitry Andric e = BB->end();
86020b57cec5SDimitry Andric continue;
86030b57cec5SDimitry Andric }
8604af732203SDimitry Andric }
8605af732203SDimitry Andric // Try to vectorize the incoming values of the PHI, to catch reductions
8606af732203SDimitry Andric // that feed into PHIs.
8607af732203SDimitry Andric for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) {
8608af732203SDimitry Andric // Skip if the incoming block is the current BB for now. Also, bypass
8609af732203SDimitry Andric // unreachable IR for efficiency and to avoid crashing.
8610af732203SDimitry Andric // TODO: Collect the skipped incoming values and try to vectorize them
8611af732203SDimitry Andric // after processing BB.
8612af732203SDimitry Andric if (BB == P->getIncomingBlock(I) ||
8613af732203SDimitry Andric !DT->isReachableFromEntry(P->getIncomingBlock(I)))
8614af732203SDimitry Andric continue;
8615af732203SDimitry Andric
8616af732203SDimitry Andric Changed |= vectorizeRootInstruction(nullptr, P->getIncomingValue(I),
8617af732203SDimitry Andric P->getIncomingBlock(I), R, TTI);
8618af732203SDimitry Andric }
86190b57cec5SDimitry Andric continue;
86200b57cec5SDimitry Andric }
86210b57cec5SDimitry Andric
86220b57cec5SDimitry Andric // Ran into an instruction without users, like terminator, or function call
86230b57cec5SDimitry Andric // with ignored return value, store. Ignore unused instructions (basing on
86240b57cec5SDimitry Andric // instruction type, except for CallInst and InvokeInst).
86250b57cec5SDimitry Andric if (it->use_empty() && (it->getType()->isVoidTy() || isa<CallInst>(it) ||
86260b57cec5SDimitry Andric isa<InvokeInst>(it))) {
86270b57cec5SDimitry Andric KeyNodes.insert(&*it);
86280b57cec5SDimitry Andric bool OpsChanged = false;
86290b57cec5SDimitry Andric if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(it)) {
86300b57cec5SDimitry Andric for (auto *V : it->operand_values()) {
86310b57cec5SDimitry Andric // Try to match and vectorize a horizontal reduction.
86320b57cec5SDimitry Andric OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
86330b57cec5SDimitry Andric }
86340b57cec5SDimitry Andric }
86350b57cec5SDimitry Andric // Start vectorization of post-process list of instructions from the
86360b57cec5SDimitry Andric // top-tree instructions to try to vectorize as many instructions as
86370b57cec5SDimitry Andric // possible.
86385f7ddb14SDimitry Andric OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
86395f7ddb14SDimitry Andric it->isTerminator());
86400b57cec5SDimitry Andric if (OpsChanged) {
86410b57cec5SDimitry Andric // We would like to start over since some instructions are deleted
86420b57cec5SDimitry Andric // and the iterator may become invalid value.
86430b57cec5SDimitry Andric Changed = true;
86440b57cec5SDimitry Andric it = BB->begin();
86450b57cec5SDimitry Andric e = BB->end();
86460b57cec5SDimitry Andric continue;
86470b57cec5SDimitry Andric }
86480b57cec5SDimitry Andric }
86490b57cec5SDimitry Andric
86500b57cec5SDimitry Andric if (isa<InsertElementInst>(it) || isa<CmpInst>(it) ||
86510b57cec5SDimitry Andric isa<InsertValueInst>(it))
86520b57cec5SDimitry Andric PostProcessInstructions.push_back(&*it);
86530b57cec5SDimitry Andric }
86540b57cec5SDimitry Andric
86550b57cec5SDimitry Andric return Changed;
86560b57cec5SDimitry Andric }
86570b57cec5SDimitry Andric
vectorizeGEPIndices(BasicBlock * BB,BoUpSLP & R)86580b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
86590b57cec5SDimitry Andric auto Changed = false;
86600b57cec5SDimitry Andric for (auto &Entry : GEPs) {
86610b57cec5SDimitry Andric // If the getelementptr list has fewer than two elements, there's nothing
86620b57cec5SDimitry Andric // to do.
86630b57cec5SDimitry Andric if (Entry.second.size() < 2)
86640b57cec5SDimitry Andric continue;
86650b57cec5SDimitry Andric
86660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length "
86670b57cec5SDimitry Andric << Entry.second.size() << ".\n");
86680b57cec5SDimitry Andric
86698bcb0991SDimitry Andric // Process the GEP list in chunks suitable for the target's supported
86705ffd83dbSDimitry Andric // vector size. If a vector register can't hold 1 element, we are done. We
86715ffd83dbSDimitry Andric // are trying to vectorize the index computations, so the maximum number of
86725ffd83dbSDimitry Andric // elements is based on the size of the index expression, rather than the
86735ffd83dbSDimitry Andric // size of the GEP itself (the target's pointer size).
86748bcb0991SDimitry Andric unsigned MaxVecRegSize = R.getMaxVecRegSize();
86755ffd83dbSDimitry Andric unsigned EltSize = R.getVectorElementSize(*Entry.second[0]->idx_begin());
86768bcb0991SDimitry Andric if (MaxVecRegSize < EltSize)
86778bcb0991SDimitry Andric continue;
86788bcb0991SDimitry Andric
86798bcb0991SDimitry Andric unsigned MaxElts = MaxVecRegSize / EltSize;
86808bcb0991SDimitry Andric for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) {
86818bcb0991SDimitry Andric auto Len = std::min<unsigned>(BE - BI, MaxElts);
8682af732203SDimitry Andric ArrayRef<GetElementPtrInst *> GEPList(&Entry.second[BI], Len);
86830b57cec5SDimitry Andric
86840b57cec5SDimitry Andric // Initialize a set a candidate getelementptrs. Note that we use a
86850b57cec5SDimitry Andric // SetVector here to preserve program order. If the index computations
86860b57cec5SDimitry Andric // are vectorizable and begin with loads, we want to minimize the chance
86870b57cec5SDimitry Andric // of having to reorder them later.
86880b57cec5SDimitry Andric SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
86890b57cec5SDimitry Andric
86900b57cec5SDimitry Andric // Some of the candidates may have already been vectorized after we
86918bcb0991SDimitry Andric // initially collected them. If so, they are marked as deleted, so remove
86928bcb0991SDimitry Andric // them from the set of candidates.
86938bcb0991SDimitry Andric Candidates.remove_if(
86948bcb0991SDimitry Andric [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
86950b57cec5SDimitry Andric
86960b57cec5SDimitry Andric // Remove from the set of candidates all pairs of getelementptrs with
86970b57cec5SDimitry Andric // constant differences. Such getelementptrs are likely not good
86980b57cec5SDimitry Andric // candidates for vectorization in a bottom-up phase since one can be
86990b57cec5SDimitry Andric // computed from the other. We also ensure all candidate getelementptr
87000b57cec5SDimitry Andric // indices are unique.
87010b57cec5SDimitry Andric for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
87028bcb0991SDimitry Andric auto *GEPI = GEPList[I];
87030b57cec5SDimitry Andric if (!Candidates.count(GEPI))
87040b57cec5SDimitry Andric continue;
87050b57cec5SDimitry Andric auto *SCEVI = SE->getSCEV(GEPList[I]);
87060b57cec5SDimitry Andric for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
87078bcb0991SDimitry Andric auto *GEPJ = GEPList[J];
87080b57cec5SDimitry Andric auto *SCEVJ = SE->getSCEV(GEPList[J]);
87090b57cec5SDimitry Andric if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
87108bcb0991SDimitry Andric Candidates.remove(GEPI);
87118bcb0991SDimitry Andric Candidates.remove(GEPJ);
87120b57cec5SDimitry Andric } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
87138bcb0991SDimitry Andric Candidates.remove(GEPJ);
87140b57cec5SDimitry Andric }
87150b57cec5SDimitry Andric }
87160b57cec5SDimitry Andric }
87170b57cec5SDimitry Andric
87180b57cec5SDimitry Andric // We break out of the above computation as soon as we know there are
87190b57cec5SDimitry Andric // fewer than two candidates remaining.
87200b57cec5SDimitry Andric if (Candidates.size() < 2)
87210b57cec5SDimitry Andric continue;
87220b57cec5SDimitry Andric
87230b57cec5SDimitry Andric // Add the single, non-constant index of each candidate to the bundle. We
87240b57cec5SDimitry Andric // ensured the indices met these constraints when we originally collected
87250b57cec5SDimitry Andric // the getelementptrs.
87260b57cec5SDimitry Andric SmallVector<Value *, 16> Bundle(Candidates.size());
87270b57cec5SDimitry Andric auto BundleIndex = 0u;
87280b57cec5SDimitry Andric for (auto *V : Candidates) {
87290b57cec5SDimitry Andric auto *GEP = cast<GetElementPtrInst>(V);
87300b57cec5SDimitry Andric auto *GEPIdx = GEP->idx_begin()->get();
87310b57cec5SDimitry Andric assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
87320b57cec5SDimitry Andric Bundle[BundleIndex++] = GEPIdx;
87330b57cec5SDimitry Andric }
87340b57cec5SDimitry Andric
87350b57cec5SDimitry Andric // Try and vectorize the indices. We are currently only interested in
87360b57cec5SDimitry Andric // gather-like cases of the form:
87370b57cec5SDimitry Andric //
87380b57cec5SDimitry Andric // ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
87390b57cec5SDimitry Andric //
87400b57cec5SDimitry Andric // where the loads of "a", the loads of "b", and the subtractions can be
87410b57cec5SDimitry Andric // performed in parallel. It's likely that detecting this pattern in a
87420b57cec5SDimitry Andric // bottom-up phase will be simpler and less costly than building a
87430b57cec5SDimitry Andric // full-blown top-down phase beginning at the consecutive loads.
87440b57cec5SDimitry Andric Changed |= tryToVectorizeList(Bundle, R);
87450b57cec5SDimitry Andric }
87460b57cec5SDimitry Andric }
87470b57cec5SDimitry Andric return Changed;
87480b57cec5SDimitry Andric }
87490b57cec5SDimitry Andric
vectorizeStoreChains(BoUpSLP & R)87500b57cec5SDimitry Andric bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
87510b57cec5SDimitry Andric bool Changed = false;
87525f7ddb14SDimitry Andric // Sort by type, base pointers and values operand. Value operands must be
87535f7ddb14SDimitry Andric // compatible (have the same opcode, same parent), otherwise it is
87545f7ddb14SDimitry Andric // definitely not profitable to try to vectorize them.
87555f7ddb14SDimitry Andric auto &&StoreSorter = [this](StoreInst *V, StoreInst *V2) {
87565f7ddb14SDimitry Andric if (V->getPointerOperandType()->getTypeID() <
87575f7ddb14SDimitry Andric V2->getPointerOperandType()->getTypeID())
87585f7ddb14SDimitry Andric return true;
87595f7ddb14SDimitry Andric if (V->getPointerOperandType()->getTypeID() >
87605f7ddb14SDimitry Andric V2->getPointerOperandType()->getTypeID())
87615f7ddb14SDimitry Andric return false;
87625f7ddb14SDimitry Andric // UndefValues are compatible with all other values.
87635f7ddb14SDimitry Andric if (isa<UndefValue>(V->getValueOperand()) ||
87645f7ddb14SDimitry Andric isa<UndefValue>(V2->getValueOperand()))
87655f7ddb14SDimitry Andric return false;
87665f7ddb14SDimitry Andric if (auto *I1 = dyn_cast<Instruction>(V->getValueOperand()))
87675f7ddb14SDimitry Andric if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
87685f7ddb14SDimitry Andric DomTreeNodeBase<llvm::BasicBlock> *NodeI1 =
87695f7ddb14SDimitry Andric DT->getNode(I1->getParent());
87705f7ddb14SDimitry Andric DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
87715f7ddb14SDimitry Andric DT->getNode(I2->getParent());
87725f7ddb14SDimitry Andric assert(NodeI1 && "Should only process reachable instructions");
87735f7ddb14SDimitry Andric assert(NodeI1 && "Should only process reachable instructions");
87745f7ddb14SDimitry Andric assert((NodeI1 == NodeI2) ==
87755f7ddb14SDimitry Andric (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
87765f7ddb14SDimitry Andric "Different nodes should have different DFS numbers");
87775f7ddb14SDimitry Andric if (NodeI1 != NodeI2)
87785f7ddb14SDimitry Andric return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
87795f7ddb14SDimitry Andric InstructionsState S = getSameOpcode({I1, I2});
87805f7ddb14SDimitry Andric if (S.getOpcode())
87815f7ddb14SDimitry Andric return false;
87825f7ddb14SDimitry Andric return I1->getOpcode() < I2->getOpcode();
87835f7ddb14SDimitry Andric }
87845f7ddb14SDimitry Andric if (isa<Constant>(V->getValueOperand()) &&
87855f7ddb14SDimitry Andric isa<Constant>(V2->getValueOperand()))
87865f7ddb14SDimitry Andric return false;
87875f7ddb14SDimitry Andric return V->getValueOperand()->getValueID() <
87885f7ddb14SDimitry Andric V2->getValueOperand()->getValueID();
87895f7ddb14SDimitry Andric };
87905f7ddb14SDimitry Andric
87915f7ddb14SDimitry Andric auto &&AreCompatibleStores = [](StoreInst *V1, StoreInst *V2) {
87925f7ddb14SDimitry Andric if (V1 == V2)
87935f7ddb14SDimitry Andric return true;
87945f7ddb14SDimitry Andric if (V1->getPointerOperandType() != V2->getPointerOperandType())
87955f7ddb14SDimitry Andric return false;
87965f7ddb14SDimitry Andric // Undefs are compatible with any other value.
87975f7ddb14SDimitry Andric if (isa<UndefValue>(V1->getValueOperand()) ||
87985f7ddb14SDimitry Andric isa<UndefValue>(V2->getValueOperand()))
87995f7ddb14SDimitry Andric return true;
88005f7ddb14SDimitry Andric if (auto *I1 = dyn_cast<Instruction>(V1->getValueOperand()))
88015f7ddb14SDimitry Andric if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
88025f7ddb14SDimitry Andric if (I1->getParent() != I2->getParent())
88035f7ddb14SDimitry Andric return false;
88045f7ddb14SDimitry Andric InstructionsState S = getSameOpcode({I1, I2});
88055f7ddb14SDimitry Andric return S.getOpcode() > 0;
88065f7ddb14SDimitry Andric }
88075f7ddb14SDimitry Andric if (isa<Constant>(V1->getValueOperand()) &&
88085f7ddb14SDimitry Andric isa<Constant>(V2->getValueOperand()))
88095f7ddb14SDimitry Andric return true;
88105f7ddb14SDimitry Andric return V1->getValueOperand()->getValueID() ==
88115f7ddb14SDimitry Andric V2->getValueOperand()->getValueID();
88125f7ddb14SDimitry Andric };
88135f7ddb14SDimitry Andric
88140b57cec5SDimitry Andric // Attempt to sort and vectorize each of the store-groups.
88155f7ddb14SDimitry Andric for (auto &Pair : Stores) {
88165f7ddb14SDimitry Andric if (Pair.second.size() < 2)
88170b57cec5SDimitry Andric continue;
88180b57cec5SDimitry Andric
88190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
88205f7ddb14SDimitry Andric << Pair.second.size() << ".\n");
88210b57cec5SDimitry Andric
88225f7ddb14SDimitry Andric stable_sort(Pair.second, StoreSorter);
88235f7ddb14SDimitry Andric
88245f7ddb14SDimitry Andric // Try to vectorize elements based on their compatibility.
88255f7ddb14SDimitry Andric for (ArrayRef<StoreInst *>::iterator IncIt = Pair.second.begin(),
88265f7ddb14SDimitry Andric E = Pair.second.end();
88275f7ddb14SDimitry Andric IncIt != E;) {
88285f7ddb14SDimitry Andric
88295f7ddb14SDimitry Andric // Look for the next elements with the same type.
88305f7ddb14SDimitry Andric ArrayRef<StoreInst *>::iterator SameTypeIt = IncIt;
88315f7ddb14SDimitry Andric Type *EltTy = (*IncIt)->getPointerOperand()->getType();
88325f7ddb14SDimitry Andric
88335f7ddb14SDimitry Andric while (SameTypeIt != E && AreCompatibleStores(*SameTypeIt, *IncIt))
88345f7ddb14SDimitry Andric ++SameTypeIt;
88355f7ddb14SDimitry Andric
88365f7ddb14SDimitry Andric // Try to vectorize them.
88375f7ddb14SDimitry Andric unsigned NumElts = (SameTypeIt - IncIt);
88385f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at stores ("
88395f7ddb14SDimitry Andric << NumElts << ")\n");
88405f7ddb14SDimitry Andric if (NumElts > 1 && !EltTy->getPointerElementType()->isVectorTy() &&
88415f7ddb14SDimitry Andric vectorizeStores(makeArrayRef(IncIt, NumElts), R)) {
88425f7ddb14SDimitry Andric // Success start over because instructions might have been changed.
88435f7ddb14SDimitry Andric Changed = true;
88445f7ddb14SDimitry Andric }
88455f7ddb14SDimitry Andric
88465f7ddb14SDimitry Andric // Start over at the next instruction of a different type (or the end).
88475f7ddb14SDimitry Andric IncIt = SameTypeIt;
88485f7ddb14SDimitry Andric }
88490b57cec5SDimitry Andric }
88500b57cec5SDimitry Andric return Changed;
88510b57cec5SDimitry Andric }
88520b57cec5SDimitry Andric
88530b57cec5SDimitry Andric char SLPVectorizer::ID = 0;
88540b57cec5SDimitry Andric
88550b57cec5SDimitry Andric static const char lv_name[] = "SLP Vectorizer";
88560b57cec5SDimitry Andric
INITIALIZE_PASS_BEGIN(SLPVectorizer,SV_NAME,lv_name,false,false)88570b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
88580b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
88590b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
88600b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
88610b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
88620b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
88630b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
88640b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
88655ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(InjectTLIMappingsLegacy)
88660b57cec5SDimitry Andric INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
88670b57cec5SDimitry Andric
88680b57cec5SDimitry Andric Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }
8869