1f22ef01cSRoman Divacky //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky // The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This transformation analyzes and transforms the induction variables (and
11f22ef01cSRoman Divacky // computations derived from them) into forms suitable for efficient execution
12f22ef01cSRoman Divacky // on the target.
13f22ef01cSRoman Divacky //
14f22ef01cSRoman Divacky // This pass performs a strength reduction on array references inside loops that
15f22ef01cSRoman Divacky // have as one or more of their components the loop induction variable, it
16f22ef01cSRoman Divacky // rewrites expressions to take advantage of scaled-index addressing modes
17f22ef01cSRoman Divacky // available on the target, and it performs a variety of other optimizations
18f22ef01cSRoman Divacky // related to loop induction variables.
19f22ef01cSRoman Divacky //
20f22ef01cSRoman Divacky // Terminology note: this code has a lot of handling for "post-increment" or
21f22ef01cSRoman Divacky // "post-inc" users. This is not talking about post-increment addressing modes;
22f22ef01cSRoman Divacky // it is instead talking about code like this:
23f22ef01cSRoman Divacky //
24f22ef01cSRoman Divacky // %i = phi [ 0, %entry ], [ %i.next, %latch ]
25f22ef01cSRoman Divacky // ...
26f22ef01cSRoman Divacky // %i.next = add %i, 1
27f22ef01cSRoman Divacky // %c = icmp eq %i.next, %n
28f22ef01cSRoman Divacky //
29f22ef01cSRoman Divacky // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30f22ef01cSRoman Divacky // it's useful to think about these as the same register, with some uses using
31ff0cc061SDimitry Andric // the value of the register before the add and some using it after. In this
32f22ef01cSRoman Divacky // example, the icmp is a post-increment user, since it uses %i.next, which is
33f22ef01cSRoman Divacky // the value of the induction variable after the increment. The other common
34f22ef01cSRoman Divacky // case of post-increment users is users outside the loop.
35f22ef01cSRoman Divacky //
36f22ef01cSRoman Divacky // TODO: More sophistication in the way Formulae are generated and filtered.
37f22ef01cSRoman Divacky //
38f22ef01cSRoman Divacky // TODO: Handle multiple loops at a time.
39f22ef01cSRoman Divacky //
40139f7f9bSDimitry Andric // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41139f7f9bSDimitry Andric // of a GlobalValue?
42f22ef01cSRoman Divacky //
43f22ef01cSRoman Divacky // TODO: When truncation is free, truncate ICmp users' operands to make it a
44f22ef01cSRoman Divacky // smaller encoding (on x86 at least).
45f22ef01cSRoman Divacky //
46f22ef01cSRoman Divacky // TODO: When a negated register is used by an add (such as in a list of
47f22ef01cSRoman Divacky // multiple base registers, or as the increment expression in an addrec),
48f22ef01cSRoman Divacky // we may not actually need both reg and (-1 * reg) in registers; the
49f22ef01cSRoman Divacky // negation can be implemented by using a sub instead of an add. The
50f22ef01cSRoman Divacky // lack of support for taking this into consideration when making
51f22ef01cSRoman Divacky // register pressure decisions is partly worked around by the "Special"
52f22ef01cSRoman Divacky // use kind.
53f22ef01cSRoman Divacky //
54f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
55f22ef01cSRoman Divacky
56d88c1a5aSDimitry Andric #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
57d88c1a5aSDimitry Andric #include "llvm/ADT/APInt.h"
58d88c1a5aSDimitry Andric #include "llvm/ADT/DenseMap.h"
59139f7f9bSDimitry Andric #include "llvm/ADT/DenseSet.h"
6091bc56edSDimitry Andric #include "llvm/ADT/Hashing.h"
61d88c1a5aSDimitry Andric #include "llvm/ADT/PointerIntPair.h"
62f1a29dd3SDimitry Andric #include "llvm/ADT/STLExtras.h"
63139f7f9bSDimitry Andric #include "llvm/ADT/SetVector.h"
64139f7f9bSDimitry Andric #include "llvm/ADT/SmallBitVector.h"
65d88c1a5aSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
66d88c1a5aSDimitry Andric #include "llvm/ADT/SmallSet.h"
67d88c1a5aSDimitry Andric #include "llvm/ADT/SmallVector.h"
682cab237bSDimitry Andric #include "llvm/ADT/iterator_range.h"
69139f7f9bSDimitry Andric #include "llvm/Analysis/IVUsers.h"
702cab237bSDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
71d88c1a5aSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
72f22ef01cSRoman Divacky #include "llvm/Analysis/LoopPass.h"
73d88c1a5aSDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
74f22ef01cSRoman Divacky #include "llvm/Analysis/ScalarEvolutionExpander.h"
75d88c1a5aSDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
76d88c1a5aSDimitry Andric #include "llvm/Analysis/ScalarEvolutionNormalization.h"
77139f7f9bSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
784ba319b5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
794ba319b5SDimitry Andric #include "llvm/Config/llvm-config.h"
80d88c1a5aSDimitry Andric #include "llvm/IR/BasicBlock.h"
81d88c1a5aSDimitry Andric #include "llvm/IR/Constant.h"
82139f7f9bSDimitry Andric #include "llvm/IR/Constants.h"
83139f7f9bSDimitry Andric #include "llvm/IR/DerivedTypes.h"
8491bc56edSDimitry Andric #include "llvm/IR/Dominators.h"
85d88c1a5aSDimitry Andric #include "llvm/IR/GlobalValue.h"
86f1a29dd3SDimitry Andric #include "llvm/IR/IRBuilder.h"
872cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
88d88c1a5aSDimitry Andric #include "llvm/IR/Instruction.h"
89139f7f9bSDimitry Andric #include "llvm/IR/Instructions.h"
90139f7f9bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
912cab237bSDimitry Andric #include "llvm/IR/Intrinsics.h"
92f1a29dd3SDimitry Andric #include "llvm/IR/Module.h"
93d88c1a5aSDimitry Andric #include "llvm/IR/OperandTraits.h"
94d88c1a5aSDimitry Andric #include "llvm/IR/Operator.h"
952cab237bSDimitry Andric #include "llvm/IR/PassManager.h"
96d88c1a5aSDimitry Andric #include "llvm/IR/Type.h"
972cab237bSDimitry Andric #include "llvm/IR/Use.h"
982cab237bSDimitry Andric #include "llvm/IR/User.h"
99d88c1a5aSDimitry Andric #include "llvm/IR/Value.h"
10091bc56edSDimitry Andric #include "llvm/IR/ValueHandle.h"
101d88c1a5aSDimitry Andric #include "llvm/Pass.h"
102d88c1a5aSDimitry Andric #include "llvm/Support/Casting.h"
1036122f3e6SDimitry Andric #include "llvm/Support/CommandLine.h"
104d88c1a5aSDimitry Andric #include "llvm/Support/Compiler.h"
105139f7f9bSDimitry Andric #include "llvm/Support/Debug.h"
106d88c1a5aSDimitry Andric #include "llvm/Support/ErrorHandling.h"
107d88c1a5aSDimitry Andric #include "llvm/Support/MathExtras.h"
108f22ef01cSRoman Divacky #include "llvm/Support/raw_ostream.h"
109d88c1a5aSDimitry Andric #include "llvm/Transforms/Scalar.h"
1104ba319b5SDimitry Andric #include "llvm/Transforms/Utils.h"
111139f7f9bSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
112f22ef01cSRoman Divacky #include <algorithm>
113d88c1a5aSDimitry Andric #include <cassert>
114d88c1a5aSDimitry Andric #include <cstddef>
115d88c1a5aSDimitry Andric #include <cstdint>
116d88c1a5aSDimitry Andric #include <cstdlib>
117d88c1a5aSDimitry Andric #include <iterator>
1182cab237bSDimitry Andric #include <limits>
119d88c1a5aSDimitry Andric #include <map>
120d88c1a5aSDimitry Andric #include <utility>
121d88c1a5aSDimitry Andric
122f22ef01cSRoman Divacky using namespace llvm;
123f22ef01cSRoman Divacky
12491bc56edSDimitry Andric #define DEBUG_TYPE "loop-reduce"
12591bc56edSDimitry Andric
1264ba319b5SDimitry Andric /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
127cb4dff85SDimitry Andric /// bail out. This threshold is far beyond the number of users that LSR can
128cb4dff85SDimitry Andric /// conceivably solve, so it should not affect generated code, but catches the
129cb4dff85SDimitry Andric /// worst cases before LSR burns too much compile time and stack space.
130cb4dff85SDimitry Andric static const unsigned MaxIVUsers = 200;
131cb4dff85SDimitry Andric
1326122f3e6SDimitry Andric // Temporary flag to cleanup congruent phis after LSR phi expansion.
1336122f3e6SDimitry Andric // It's currently disabled until we can determine whether it's truly useful or
1346122f3e6SDimitry Andric // not. The flag should be removed after the v3.0 release.
135dff0c46cSDimitry Andric // This is now needed for ivchains.
136dff0c46cSDimitry Andric static cl::opt<bool> EnablePhiElim(
137dff0c46cSDimitry Andric "enable-lsr-phielim", cl::Hidden, cl::init(true),
138dff0c46cSDimitry Andric cl::desc("Enable LSR phi elimination"));
139dff0c46cSDimitry Andric
1407a7e6055SDimitry Andric // The flag adds instruction count to solutions cost comparision.
1417a7e6055SDimitry Andric static cl::opt<bool> InsnsCost(
1422cab237bSDimitry Andric "lsr-insns-cost", cl::Hidden, cl::init(true),
1437a7e6055SDimitry Andric cl::desc("Add instruction count to a LSR cost model"));
1447a7e6055SDimitry Andric
1457a7e6055SDimitry Andric // Flag to choose how to narrow complex lsr solution
1467a7e6055SDimitry Andric static cl::opt<bool> LSRExpNarrow(
1477a7e6055SDimitry Andric "lsr-exp-narrow", cl::Hidden, cl::init(false),
1487a7e6055SDimitry Andric cl::desc("Narrow LSR complex solution using"
1497a7e6055SDimitry Andric " expectation of registers number"));
1507a7e6055SDimitry Andric
151c4394386SDimitry Andric // Flag to narrow search space by filtering non-optimal formulae with
152c4394386SDimitry Andric // the same ScaledReg and Scale.
153c4394386SDimitry Andric static cl::opt<bool> FilterSameScaledReg(
154c4394386SDimitry Andric "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
155c4394386SDimitry Andric cl::desc("Narrow LSR search space by filtering non-optimal formulae"
156c4394386SDimitry Andric " with the same ScaledReg and Scale"));
157c4394386SDimitry Andric
158*b5893f02SDimitry Andric static cl::opt<unsigned> ComplexityLimit(
159*b5893f02SDimitry Andric "lsr-complexity-limit", cl::Hidden,
160*b5893f02SDimitry Andric cl::init(std::numeric_limits<uint16_t>::max()),
161*b5893f02SDimitry Andric cl::desc("LSR search space complexity limit"));
162*b5893f02SDimitry Andric
163dff0c46cSDimitry Andric #ifndef NDEBUG
164dff0c46cSDimitry Andric // Stress test IV chain generation.
165dff0c46cSDimitry Andric static cl::opt<bool> StressIVChain(
166dff0c46cSDimitry Andric "stress-ivchain", cl::Hidden, cl::init(false),
167dff0c46cSDimitry Andric cl::desc("Stress test LSR IV chains"));
168dff0c46cSDimitry Andric #else
169dff0c46cSDimitry Andric static bool StressIVChain = false;
170dff0c46cSDimitry Andric #endif
1716122f3e6SDimitry Andric
172f22ef01cSRoman Divacky namespace {
173f22ef01cSRoman Divacky
1747d523365SDimitry Andric struct MemAccessTy {
1757d523365SDimitry Andric /// Used in situations where the accessed memory type is unknown.
1762cab237bSDimitry Andric static const unsigned UnknownAddressSpace =
1772cab237bSDimitry Andric std::numeric_limits<unsigned>::max();
1787d523365SDimitry Andric
1792cab237bSDimitry Andric Type *MemTy = nullptr;
1802cab237bSDimitry Andric unsigned AddrSpace = UnknownAddressSpace;
1817d523365SDimitry Andric
1822cab237bSDimitry Andric MemAccessTy() = default;
MemAccessTy__anon244971d90111::MemAccessTy1832cab237bSDimitry Andric MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
1847d523365SDimitry Andric
operator ==__anon244971d90111::MemAccessTy1857d523365SDimitry Andric bool operator==(MemAccessTy Other) const {
1867d523365SDimitry Andric return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
1877d523365SDimitry Andric }
1887d523365SDimitry Andric
operator !=__anon244971d90111::MemAccessTy1897d523365SDimitry Andric bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
1907d523365SDimitry Andric
getUnknown__anon244971d90111::MemAccessTy191899ca3d6SDimitry Andric static MemAccessTy getUnknown(LLVMContext &Ctx,
192899ca3d6SDimitry Andric unsigned AS = UnknownAddressSpace) {
193899ca3d6SDimitry Andric return MemAccessTy(Type::getVoidTy(Ctx), AS);
1947d523365SDimitry Andric }
1954ba319b5SDimitry Andric
getType__anon244971d90111::MemAccessTy1964ba319b5SDimitry Andric Type *getType() { return MemTy; }
1977d523365SDimitry Andric };
1987d523365SDimitry Andric
1997d523365SDimitry Andric /// This class holds data which is used to order reuse candidates.
200f22ef01cSRoman Divacky class RegSortData {
201f22ef01cSRoman Divacky public:
2027d523365SDimitry Andric /// This represents the set of LSRUse indices which reference
203f22ef01cSRoman Divacky /// a particular register.
204f22ef01cSRoman Divacky SmallBitVector UsedByIndices;
205f22ef01cSRoman Divacky
206f22ef01cSRoman Divacky void print(raw_ostream &OS) const;
207f22ef01cSRoman Divacky void dump() const;
208f22ef01cSRoman Divacky };
209f22ef01cSRoman Divacky
210d88c1a5aSDimitry Andric } // end anonymous namespace
211f22ef01cSRoman Divacky
2122cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const213f22ef01cSRoman Divacky void RegSortData::print(raw_ostream &OS) const {
214f22ef01cSRoman Divacky OS << "[NumUses=" << UsedByIndices.count() << ']';
215f22ef01cSRoman Divacky }
216f22ef01cSRoman Divacky
dump() const2177a7e6055SDimitry Andric LLVM_DUMP_METHOD void RegSortData::dump() const {
218f22ef01cSRoman Divacky print(errs()); errs() << '\n';
219f22ef01cSRoman Divacky }
2207a7e6055SDimitry Andric #endif
221f22ef01cSRoman Divacky
222f22ef01cSRoman Divacky namespace {
223f22ef01cSRoman Divacky
2247d523365SDimitry Andric /// Map register candidates to information about how they are used.
225f22ef01cSRoman Divacky class RegUseTracker {
2262cab237bSDimitry Andric using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
227f22ef01cSRoman Divacky
228f22ef01cSRoman Divacky RegUsesTy RegUsesMap;
229f22ef01cSRoman Divacky SmallVector<const SCEV *, 16> RegSequence;
230f22ef01cSRoman Divacky
231f22ef01cSRoman Divacky public:
2327d523365SDimitry Andric void countRegister(const SCEV *Reg, size_t LUIdx);
2337d523365SDimitry Andric void dropRegister(const SCEV *Reg, size_t LUIdx);
2347d523365SDimitry Andric void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
235f22ef01cSRoman Divacky
236f22ef01cSRoman Divacky bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
237f22ef01cSRoman Divacky
238f22ef01cSRoman Divacky const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
239f22ef01cSRoman Divacky
240f22ef01cSRoman Divacky void clear();
241f22ef01cSRoman Divacky
2422cab237bSDimitry Andric using iterator = SmallVectorImpl<const SCEV *>::iterator;
2432cab237bSDimitry Andric using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
2442cab237bSDimitry Andric
begin()245f22ef01cSRoman Divacky iterator begin() { return RegSequence.begin(); }
end()246f22ef01cSRoman Divacky iterator end() { return RegSequence.end(); }
begin() const247f22ef01cSRoman Divacky const_iterator begin() const { return RegSequence.begin(); }
end() const248f22ef01cSRoman Divacky const_iterator end() const { return RegSequence.end(); }
249f22ef01cSRoman Divacky };
250f22ef01cSRoman Divacky
251d88c1a5aSDimitry Andric } // end anonymous namespace
252f22ef01cSRoman Divacky
253f22ef01cSRoman Divacky void
countRegister(const SCEV * Reg,size_t LUIdx)2547d523365SDimitry Andric RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
255f22ef01cSRoman Divacky std::pair<RegUsesTy::iterator, bool> Pair =
256f22ef01cSRoman Divacky RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
257f22ef01cSRoman Divacky RegSortData &RSD = Pair.first->second;
258f22ef01cSRoman Divacky if (Pair.second)
259f22ef01cSRoman Divacky RegSequence.push_back(Reg);
260f22ef01cSRoman Divacky RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
261f22ef01cSRoman Divacky RSD.UsedByIndices.set(LUIdx);
262f22ef01cSRoman Divacky }
263f22ef01cSRoman Divacky
264f22ef01cSRoman Divacky void
dropRegister(const SCEV * Reg,size_t LUIdx)2657d523365SDimitry Andric RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
266f22ef01cSRoman Divacky RegUsesTy::iterator It = RegUsesMap.find(Reg);
267f22ef01cSRoman Divacky assert(It != RegUsesMap.end());
268f22ef01cSRoman Divacky RegSortData &RSD = It->second;
269f22ef01cSRoman Divacky assert(RSD.UsedByIndices.size() > LUIdx);
270f22ef01cSRoman Divacky RSD.UsedByIndices.reset(LUIdx);
271f22ef01cSRoman Divacky }
272f22ef01cSRoman Divacky
273f22ef01cSRoman Divacky void
swapAndDropUse(size_t LUIdx,size_t LastLUIdx)2747d523365SDimitry Andric RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
2752754fe60SDimitry Andric assert(LUIdx <= LastLUIdx);
2762754fe60SDimitry Andric
2772754fe60SDimitry Andric // Update RegUses. The data structure is not optimized for this purpose;
2782754fe60SDimitry Andric // we must iterate through it and update each of the bit vectors.
279ff0cc061SDimitry Andric for (auto &Pair : RegUsesMap) {
280ff0cc061SDimitry Andric SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
2812754fe60SDimitry Andric if (LUIdx < UsedByIndices.size())
2822754fe60SDimitry Andric UsedByIndices[LUIdx] =
283d88c1a5aSDimitry Andric LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
2842754fe60SDimitry Andric UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
2852754fe60SDimitry Andric }
286f22ef01cSRoman Divacky }
287f22ef01cSRoman Divacky
288f22ef01cSRoman Divacky bool
isRegUsedByUsesOtherThan(const SCEV * Reg,size_t LUIdx) const289f22ef01cSRoman Divacky RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
290e580952dSDimitry Andric RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
291e580952dSDimitry Andric if (I == RegUsesMap.end())
292e580952dSDimitry Andric return false;
293e580952dSDimitry Andric const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
294f22ef01cSRoman Divacky int i = UsedByIndices.find_first();
295f22ef01cSRoman Divacky if (i == -1) return false;
296f22ef01cSRoman Divacky if ((size_t)i != LUIdx) return true;
297f22ef01cSRoman Divacky return UsedByIndices.find_next(i) != -1;
298f22ef01cSRoman Divacky }
299f22ef01cSRoman Divacky
getUsedByIndices(const SCEV * Reg) const300f22ef01cSRoman Divacky const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
301f22ef01cSRoman Divacky RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
302f22ef01cSRoman Divacky assert(I != RegUsesMap.end() && "Unknown register!");
303f22ef01cSRoman Divacky return I->second.UsedByIndices;
304f22ef01cSRoman Divacky }
305f22ef01cSRoman Divacky
clear()306f22ef01cSRoman Divacky void RegUseTracker::clear() {
307f22ef01cSRoman Divacky RegUsesMap.clear();
308f22ef01cSRoman Divacky RegSequence.clear();
309f22ef01cSRoman Divacky }
310f22ef01cSRoman Divacky
311f22ef01cSRoman Divacky namespace {
312f22ef01cSRoman Divacky
3137d523365SDimitry Andric /// This class holds information that describes a formula for computing
3147d523365SDimitry Andric /// satisfying a use. It may include broken-out immediates and scaled registers.
315f22ef01cSRoman Divacky struct Formula {
316139f7f9bSDimitry Andric /// Global base address used for complex addressing.
3172cab237bSDimitry Andric GlobalValue *BaseGV = nullptr;
318139f7f9bSDimitry Andric
319139f7f9bSDimitry Andric /// Base offset for complex addressing.
3202cab237bSDimitry Andric int64_t BaseOffset = 0;
321139f7f9bSDimitry Andric
322139f7f9bSDimitry Andric /// Whether any complex addressing has a base register.
3232cab237bSDimitry Andric bool HasBaseReg = false;
324139f7f9bSDimitry Andric
325139f7f9bSDimitry Andric /// The scale of any complex addressing.
3262cab237bSDimitry Andric int64_t Scale = 0;
327f22ef01cSRoman Divacky
3287d523365SDimitry Andric /// The list of "base" registers for this use. When this is non-empty. The
3297d523365SDimitry Andric /// canonical representation of a formula is
33091bc56edSDimitry Andric /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
33191bc56edSDimitry Andric /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
3327a7e6055SDimitry Andric /// 3. The reg containing recurrent expr related with currect loop in the
3337a7e6055SDimitry Andric /// formula should be put in the ScaledReg.
33491bc56edSDimitry Andric /// #1 enforces that the scaled register is always used when at least two
33591bc56edSDimitry Andric /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
33691bc56edSDimitry Andric /// #2 enforces that 1 * reg is reg.
3377a7e6055SDimitry Andric /// #3 ensures invariant regs with respect to current loop can be combined
3387a7e6055SDimitry Andric /// together in LSR codegen.
3394ba319b5SDimitry Andric /// This invariant can be temporarily broken while building a formula.
34091bc56edSDimitry Andric /// However, every formula inserted into the LSRInstance must be in canonical
34191bc56edSDimitry Andric /// form.
342139f7f9bSDimitry Andric SmallVector<const SCEV *, 4> BaseRegs;
343f22ef01cSRoman Divacky
3447d523365SDimitry Andric /// The 'scaled' register for this use. This should be non-null when Scale is
3457d523365SDimitry Andric /// not zero.
3462cab237bSDimitry Andric const SCEV *ScaledReg = nullptr;
347f22ef01cSRoman Divacky
3487d523365SDimitry Andric /// An additional constant offset which added near the use. This requires a
3497d523365SDimitry Andric /// temporary register, but the offset itself can live in an add immediate
3507d523365SDimitry Andric /// field rather than a register.
3512cab237bSDimitry Andric int64_t UnfoldedOffset = 0;
352bd5abe19SDimitry Andric
3532cab237bSDimitry Andric Formula() = default;
354f22ef01cSRoman Divacky
3557d523365SDimitry Andric void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
356f22ef01cSRoman Divacky
3577a7e6055SDimitry Andric bool isCanonical(const Loop &L) const;
35891bc56edSDimitry Andric
3597a7e6055SDimitry Andric void canonicalize(const Loop &L);
36091bc56edSDimitry Andric
3617d523365SDimitry Andric bool unscale();
36291bc56edSDimitry Andric
3637a7e6055SDimitry Andric bool hasZeroEnd() const;
3647a7e6055SDimitry Andric
36591bc56edSDimitry Andric size_t getNumRegs() const;
3666122f3e6SDimitry Andric Type *getType() const;
367f22ef01cSRoman Divacky
3687d523365SDimitry Andric void deleteBaseReg(const SCEV *&S);
369f22ef01cSRoman Divacky
370f22ef01cSRoman Divacky bool referencesReg(const SCEV *S) const;
371f22ef01cSRoman Divacky bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
372f22ef01cSRoman Divacky const RegUseTracker &RegUses) const;
373f22ef01cSRoman Divacky
374f22ef01cSRoman Divacky void print(raw_ostream &OS) const;
375f22ef01cSRoman Divacky void dump() const;
376f22ef01cSRoman Divacky };
377f22ef01cSRoman Divacky
378d88c1a5aSDimitry Andric } // end anonymous namespace
379f22ef01cSRoman Divacky
3807d523365SDimitry Andric /// Recursion helper for initialMatch.
DoInitialMatch(const SCEV * S,Loop * L,SmallVectorImpl<const SCEV * > & Good,SmallVectorImpl<const SCEV * > & Bad,ScalarEvolution & SE)381f22ef01cSRoman Divacky static void DoInitialMatch(const SCEV *S, Loop *L,
382f22ef01cSRoman Divacky SmallVectorImpl<const SCEV *> &Good,
383f22ef01cSRoman Divacky SmallVectorImpl<const SCEV *> &Bad,
3842754fe60SDimitry Andric ScalarEvolution &SE) {
385f22ef01cSRoman Divacky // Collect expressions which properly dominate the loop header.
3862754fe60SDimitry Andric if (SE.properlyDominates(S, L->getHeader())) {
387f22ef01cSRoman Divacky Good.push_back(S);
388f22ef01cSRoman Divacky return;
389f22ef01cSRoman Divacky }
390f22ef01cSRoman Divacky
391f22ef01cSRoman Divacky // Look at add operands.
392f22ef01cSRoman Divacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
393ff0cc061SDimitry Andric for (const SCEV *S : Add->operands())
394ff0cc061SDimitry Andric DoInitialMatch(S, L, Good, Bad, SE);
395f22ef01cSRoman Divacky return;
396f22ef01cSRoman Divacky }
397f22ef01cSRoman Divacky
398f22ef01cSRoman Divacky // Look at addrec operands.
399f22ef01cSRoman Divacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
400d88c1a5aSDimitry Andric if (!AR->getStart()->isZero() && AR->isAffine()) {
4012754fe60SDimitry Andric DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
402f22ef01cSRoman Divacky DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
403f22ef01cSRoman Divacky AR->getStepRecurrence(SE),
4043b0f4066SDimitry Andric // FIXME: AR->getNoWrapFlags()
4053b0f4066SDimitry Andric AR->getLoop(), SCEV::FlagAnyWrap),
4062754fe60SDimitry Andric L, Good, Bad, SE);
407f22ef01cSRoman Divacky return;
408f22ef01cSRoman Divacky }
409f22ef01cSRoman Divacky
410f22ef01cSRoman Divacky // Handle a multiplication by -1 (negation) if it didn't fold.
411f22ef01cSRoman Divacky if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
412f22ef01cSRoman Divacky if (Mul->getOperand(0)->isAllOnesValue()) {
413f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
414f22ef01cSRoman Divacky const SCEV *NewMul = SE.getMulExpr(Ops);
415f22ef01cSRoman Divacky
416f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> MyGood;
417f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> MyBad;
4182754fe60SDimitry Andric DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
419f22ef01cSRoman Divacky const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
420f22ef01cSRoman Divacky SE.getEffectiveSCEVType(NewMul->getType())));
421ff0cc061SDimitry Andric for (const SCEV *S : MyGood)
422ff0cc061SDimitry Andric Good.push_back(SE.getMulExpr(NegOne, S));
423ff0cc061SDimitry Andric for (const SCEV *S : MyBad)
424ff0cc061SDimitry Andric Bad.push_back(SE.getMulExpr(NegOne, S));
425f22ef01cSRoman Divacky return;
426f22ef01cSRoman Divacky }
427f22ef01cSRoman Divacky
428f22ef01cSRoman Divacky // Ok, we can't do anything interesting. Just stuff the whole thing into a
429f22ef01cSRoman Divacky // register and hope for the best.
430f22ef01cSRoman Divacky Bad.push_back(S);
431f22ef01cSRoman Divacky }
432f22ef01cSRoman Divacky
4337d523365SDimitry Andric /// Incorporate loop-variant parts of S into this Formula, attempting to keep
4347d523365SDimitry Andric /// all loop-invariant and loop-computable values in a single base register.
initialMatch(const SCEV * S,Loop * L,ScalarEvolution & SE)4357d523365SDimitry Andric void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
436f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> Good;
437f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> Bad;
4382754fe60SDimitry Andric DoInitialMatch(S, L, Good, Bad, SE);
439f22ef01cSRoman Divacky if (!Good.empty()) {
440f22ef01cSRoman Divacky const SCEV *Sum = SE.getAddExpr(Good);
441f22ef01cSRoman Divacky if (!Sum->isZero())
442f22ef01cSRoman Divacky BaseRegs.push_back(Sum);
443139f7f9bSDimitry Andric HasBaseReg = true;
444f22ef01cSRoman Divacky }
445f22ef01cSRoman Divacky if (!Bad.empty()) {
446f22ef01cSRoman Divacky const SCEV *Sum = SE.getAddExpr(Bad);
447f22ef01cSRoman Divacky if (!Sum->isZero())
448f22ef01cSRoman Divacky BaseRegs.push_back(Sum);
449139f7f9bSDimitry Andric HasBaseReg = true;
450f22ef01cSRoman Divacky }
4517a7e6055SDimitry Andric canonicalize(*L);
45291bc56edSDimitry Andric }
45391bc56edSDimitry Andric
4544ba319b5SDimitry Andric /// Check whether or not this formula satisfies the canonical
45591bc56edSDimitry Andric /// representation.
45691bc56edSDimitry Andric /// \see Formula::BaseRegs.
isCanonical(const Loop & L) const4577a7e6055SDimitry Andric bool Formula::isCanonical(const Loop &L) const {
4587a7e6055SDimitry Andric if (!ScaledReg)
45991bc56edSDimitry Andric return BaseRegs.size() <= 1;
4607a7e6055SDimitry Andric
4617a7e6055SDimitry Andric if (Scale != 1)
4627a7e6055SDimitry Andric return true;
4637a7e6055SDimitry Andric
4647a7e6055SDimitry Andric if (Scale == 1 && BaseRegs.empty())
4657a7e6055SDimitry Andric return false;
4667a7e6055SDimitry Andric
4677a7e6055SDimitry Andric const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
4687a7e6055SDimitry Andric if (SAR && SAR->getLoop() == &L)
4697a7e6055SDimitry Andric return true;
4707a7e6055SDimitry Andric
4717a7e6055SDimitry Andric // If ScaledReg is not a recurrent expr, or it is but its loop is not current
4727a7e6055SDimitry Andric // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
4737a7e6055SDimitry Andric // loop, we want to swap the reg in BaseRegs with ScaledReg.
4747a7e6055SDimitry Andric auto I =
4757a7e6055SDimitry Andric find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) {
4767a7e6055SDimitry Andric return isa<const SCEVAddRecExpr>(S) &&
4777a7e6055SDimitry Andric (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
4787a7e6055SDimitry Andric });
4797a7e6055SDimitry Andric return I == BaseRegs.end();
48091bc56edSDimitry Andric }
48191bc56edSDimitry Andric
4824ba319b5SDimitry Andric /// Helper method to morph a formula into its canonical representation.
48391bc56edSDimitry Andric /// \see Formula::BaseRegs.
48491bc56edSDimitry Andric /// Every formula having more than one base register, must use the ScaledReg
48591bc56edSDimitry Andric /// field. Otherwise, we would have to do special cases everywhere in LSR
48691bc56edSDimitry Andric /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
48791bc56edSDimitry Andric /// On the other hand, 1*reg should be canonicalized into reg.
canonicalize(const Loop & L)4887a7e6055SDimitry Andric void Formula::canonicalize(const Loop &L) {
4897a7e6055SDimitry Andric if (isCanonical(L))
49091bc56edSDimitry Andric return;
49191bc56edSDimitry Andric // So far we did not need this case. This is easy to implement but it is
49291bc56edSDimitry Andric // useless to maintain dead code. Beside it could hurt compile time.
49391bc56edSDimitry Andric assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
4947a7e6055SDimitry Andric
49591bc56edSDimitry Andric // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
4967a7e6055SDimitry Andric if (!ScaledReg) {
49791bc56edSDimitry Andric ScaledReg = BaseRegs.back();
49891bc56edSDimitry Andric BaseRegs.pop_back();
49991bc56edSDimitry Andric Scale = 1;
5007a7e6055SDimitry Andric }
5017a7e6055SDimitry Andric
5027a7e6055SDimitry Andric // If ScaledReg is an invariant with respect to L, find the reg from
5037a7e6055SDimitry Andric // BaseRegs containing the recurrent expr related with Loop L. Swap the
5047a7e6055SDimitry Andric // reg with ScaledReg.
5057a7e6055SDimitry Andric const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
5067a7e6055SDimitry Andric if (!SAR || SAR->getLoop() != &L) {
5077a7e6055SDimitry Andric auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()),
5087a7e6055SDimitry Andric [&](const SCEV *S) {
5097a7e6055SDimitry Andric return isa<const SCEVAddRecExpr>(S) &&
5107a7e6055SDimitry Andric (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
5117a7e6055SDimitry Andric });
5127a7e6055SDimitry Andric if (I != BaseRegs.end())
5137a7e6055SDimitry Andric std::swap(ScaledReg, *I);
5147a7e6055SDimitry Andric }
51591bc56edSDimitry Andric }
51691bc56edSDimitry Andric
5174ba319b5SDimitry Andric /// Get rid of the scale in the formula.
51891bc56edSDimitry Andric /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
51991bc56edSDimitry Andric /// \return true if it was possible to get rid of the scale, false otherwise.
52091bc56edSDimitry Andric /// \note After this operation the formula may not be in the canonical form.
unscale()5217d523365SDimitry Andric bool Formula::unscale() {
52291bc56edSDimitry Andric if (Scale != 1)
52391bc56edSDimitry Andric return false;
52491bc56edSDimitry Andric Scale = 0;
52591bc56edSDimitry Andric BaseRegs.push_back(ScaledReg);
52691bc56edSDimitry Andric ScaledReg = nullptr;
52791bc56edSDimitry Andric return true;
528f22ef01cSRoman Divacky }
529f22ef01cSRoman Divacky
hasZeroEnd() const5307a7e6055SDimitry Andric bool Formula::hasZeroEnd() const {
5317a7e6055SDimitry Andric if (UnfoldedOffset || BaseOffset)
5327a7e6055SDimitry Andric return false;
5337a7e6055SDimitry Andric if (BaseRegs.size() != 1 || ScaledReg)
5347a7e6055SDimitry Andric return false;
5357a7e6055SDimitry Andric return true;
5367a7e6055SDimitry Andric }
5377a7e6055SDimitry Andric
5387d523365SDimitry Andric /// Return the total number of register operands used by this formula. This does
5397d523365SDimitry Andric /// not include register uses implied by non-constant addrec strides.
getNumRegs() const54091bc56edSDimitry Andric size_t Formula::getNumRegs() const {
541f22ef01cSRoman Divacky return !!ScaledReg + BaseRegs.size();
542f22ef01cSRoman Divacky }
543f22ef01cSRoman Divacky
5447d523365SDimitry Andric /// Return the type of this formula, if it has one, or null otherwise. This type
5457d523365SDimitry Andric /// is meaningless except for the bit size.
getType() const5466122f3e6SDimitry Andric Type *Formula::getType() const {
547f22ef01cSRoman Divacky return !BaseRegs.empty() ? BaseRegs.front()->getType() :
548f22ef01cSRoman Divacky ScaledReg ? ScaledReg->getType() :
549139f7f9bSDimitry Andric BaseGV ? BaseGV->getType() :
55091bc56edSDimitry Andric nullptr;
551f22ef01cSRoman Divacky }
552f22ef01cSRoman Divacky
5537d523365SDimitry Andric /// Delete the given base reg from the BaseRegs list.
deleteBaseReg(const SCEV * & S)5547d523365SDimitry Andric void Formula::deleteBaseReg(const SCEV *&S) {
555f22ef01cSRoman Divacky if (&S != &BaseRegs.back())
556f22ef01cSRoman Divacky std::swap(S, BaseRegs.back());
557f22ef01cSRoman Divacky BaseRegs.pop_back();
558f22ef01cSRoman Divacky }
559f22ef01cSRoman Divacky
5607d523365SDimitry Andric /// Test if this formula references the given register.
referencesReg(const SCEV * S) const561f22ef01cSRoman Divacky bool Formula::referencesReg(const SCEV *S) const {
562d88c1a5aSDimitry Andric return S == ScaledReg || is_contained(BaseRegs, S);
563f22ef01cSRoman Divacky }
564f22ef01cSRoman Divacky
5657d523365SDimitry Andric /// Test whether this formula uses registers which are used by uses other than
5667d523365SDimitry Andric /// the use with the given index.
hasRegsUsedByUsesOtherThan(size_t LUIdx,const RegUseTracker & RegUses) const567f22ef01cSRoman Divacky bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
568f22ef01cSRoman Divacky const RegUseTracker &RegUses) const {
569f22ef01cSRoman Divacky if (ScaledReg)
570f22ef01cSRoman Divacky if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
571f22ef01cSRoman Divacky return true;
572ff0cc061SDimitry Andric for (const SCEV *BaseReg : BaseRegs)
573ff0cc061SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
574f22ef01cSRoman Divacky return true;
575f22ef01cSRoman Divacky return false;
576f22ef01cSRoman Divacky }
577f22ef01cSRoman Divacky
5782cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const579f22ef01cSRoman Divacky void Formula::print(raw_ostream &OS) const {
580f22ef01cSRoman Divacky bool First = true;
581139f7f9bSDimitry Andric if (BaseGV) {
582f22ef01cSRoman Divacky if (!First) OS << " + "; else First = false;
58391bc56edSDimitry Andric BaseGV->printAsOperand(OS, /*PrintType=*/false);
584f22ef01cSRoman Divacky }
585139f7f9bSDimitry Andric if (BaseOffset != 0) {
586f22ef01cSRoman Divacky if (!First) OS << " + "; else First = false;
587139f7f9bSDimitry Andric OS << BaseOffset;
588f22ef01cSRoman Divacky }
589ff0cc061SDimitry Andric for (const SCEV *BaseReg : BaseRegs) {
590f22ef01cSRoman Divacky if (!First) OS << " + "; else First = false;
591ff0cc061SDimitry Andric OS << "reg(" << *BaseReg << ')';
592f22ef01cSRoman Divacky }
593139f7f9bSDimitry Andric if (HasBaseReg && BaseRegs.empty()) {
594f22ef01cSRoman Divacky if (!First) OS << " + "; else First = false;
595f22ef01cSRoman Divacky OS << "**error: HasBaseReg**";
596139f7f9bSDimitry Andric } else if (!HasBaseReg && !BaseRegs.empty()) {
597f22ef01cSRoman Divacky if (!First) OS << " + "; else First = false;
598f22ef01cSRoman Divacky OS << "**error: !HasBaseReg**";
599f22ef01cSRoman Divacky }
600139f7f9bSDimitry Andric if (Scale != 0) {
601f22ef01cSRoman Divacky if (!First) OS << " + "; else First = false;
602139f7f9bSDimitry Andric OS << Scale << "*reg(";
603f22ef01cSRoman Divacky if (ScaledReg)
604f22ef01cSRoman Divacky OS << *ScaledReg;
605f22ef01cSRoman Divacky else
606f22ef01cSRoman Divacky OS << "<unknown>";
607f22ef01cSRoman Divacky OS << ')';
608f22ef01cSRoman Divacky }
609bd5abe19SDimitry Andric if (UnfoldedOffset != 0) {
61091bc56edSDimitry Andric if (!First) OS << " + ";
611bd5abe19SDimitry Andric OS << "imm(" << UnfoldedOffset << ')';
612bd5abe19SDimitry Andric }
613f22ef01cSRoman Divacky }
614f22ef01cSRoman Divacky
dump() const6157a7e6055SDimitry Andric LLVM_DUMP_METHOD void Formula::dump() const {
616f22ef01cSRoman Divacky print(errs()); errs() << '\n';
617f22ef01cSRoman Divacky }
6187a7e6055SDimitry Andric #endif
619f22ef01cSRoman Divacky
6207d523365SDimitry Andric /// Return true if the given addrec can be sign-extended without changing its
6217d523365SDimitry Andric /// value.
isAddRecSExtable(const SCEVAddRecExpr * AR,ScalarEvolution & SE)622f22ef01cSRoman Divacky static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
6236122f3e6SDimitry Andric Type *WideTy =
624f22ef01cSRoman Divacky IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
625f22ef01cSRoman Divacky return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
626f22ef01cSRoman Divacky }
627f22ef01cSRoman Divacky
6287d523365SDimitry Andric /// Return true if the given add can be sign-extended without changing its
6297d523365SDimitry Andric /// value.
isAddSExtable(const SCEVAddExpr * A,ScalarEvolution & SE)630f22ef01cSRoman Divacky static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
6316122f3e6SDimitry Andric Type *WideTy =
632f22ef01cSRoman Divacky IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
633f22ef01cSRoman Divacky return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
634f22ef01cSRoman Divacky }
635f22ef01cSRoman Divacky
6367d523365SDimitry Andric /// Return true if the given mul can be sign-extended without changing its
6377d523365SDimitry Andric /// value.
isMulSExtable(const SCEVMulExpr * M,ScalarEvolution & SE)638ffd1746dSEd Schouten static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
6396122f3e6SDimitry Andric Type *WideTy =
640ffd1746dSEd Schouten IntegerType::get(SE.getContext(),
641ffd1746dSEd Schouten SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
642ffd1746dSEd Schouten return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
643f22ef01cSRoman Divacky }
644f22ef01cSRoman Divacky
6457d523365SDimitry Andric /// Return an expression for LHS /s RHS, if it can be determined and if the
6467d523365SDimitry Andric /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
6477d523365SDimitry Andric /// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
6487d523365SDimitry Andric /// the multiplication may overflow, which is useful when the result will be
6497d523365SDimitry Andric /// used in a context where the most significant bits are ignored.
getExactSDiv(const SCEV * LHS,const SCEV * RHS,ScalarEvolution & SE,bool IgnoreSignificantBits=false)650f22ef01cSRoman Divacky static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
651f22ef01cSRoman Divacky ScalarEvolution &SE,
652f22ef01cSRoman Divacky bool IgnoreSignificantBits = false) {
653f22ef01cSRoman Divacky // Handle the trivial case, which works for any SCEV type.
654f22ef01cSRoman Divacky if (LHS == RHS)
655f22ef01cSRoman Divacky return SE.getConstant(LHS->getType(), 1);
656f22ef01cSRoman Divacky
657ffd1746dSEd Schouten // Handle a few RHS special cases.
658ffd1746dSEd Schouten const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
659ffd1746dSEd Schouten if (RC) {
6607d523365SDimitry Andric const APInt &RA = RC->getAPInt();
661ffd1746dSEd Schouten // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
662ffd1746dSEd Schouten // some folding.
663ffd1746dSEd Schouten if (RA.isAllOnesValue())
664ffd1746dSEd Schouten return SE.getMulExpr(LHS, RC);
665ffd1746dSEd Schouten // Handle x /s 1 as x.
666ffd1746dSEd Schouten if (RA == 1)
667ffd1746dSEd Schouten return LHS;
668ffd1746dSEd Schouten }
669f22ef01cSRoman Divacky
670f22ef01cSRoman Divacky // Check for a division of a constant by a constant.
671f22ef01cSRoman Divacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
672f22ef01cSRoman Divacky if (!RC)
67391bc56edSDimitry Andric return nullptr;
6747d523365SDimitry Andric const APInt &LA = C->getAPInt();
6757d523365SDimitry Andric const APInt &RA = RC->getAPInt();
676ffd1746dSEd Schouten if (LA.srem(RA) != 0)
67791bc56edSDimitry Andric return nullptr;
678ffd1746dSEd Schouten return SE.getConstant(LA.sdiv(RA));
679f22ef01cSRoman Divacky }
680f22ef01cSRoman Divacky
681f22ef01cSRoman Divacky // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
682f22ef01cSRoman Divacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
683d88c1a5aSDimitry Andric if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
684f22ef01cSRoman Divacky const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
685f22ef01cSRoman Divacky IgnoreSignificantBits);
68691bc56edSDimitry Andric if (!Step) return nullptr;
687e580952dSDimitry Andric const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
688e580952dSDimitry Andric IgnoreSignificantBits);
68991bc56edSDimitry Andric if (!Start) return nullptr;
6903b0f4066SDimitry Andric // FlagNW is independent of the start value, step direction, and is
6913b0f4066SDimitry Andric // preserved with smaller magnitude steps.
6923b0f4066SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
6933b0f4066SDimitry Andric return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
694f22ef01cSRoman Divacky }
69591bc56edSDimitry Andric return nullptr;
696f22ef01cSRoman Divacky }
697f22ef01cSRoman Divacky
698f22ef01cSRoman Divacky // Distribute the sdiv over add operands, if the add doesn't overflow.
699f22ef01cSRoman Divacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
700f22ef01cSRoman Divacky if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
701f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> Ops;
702ff0cc061SDimitry Andric for (const SCEV *S : Add->operands()) {
703ff0cc061SDimitry Andric const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
70491bc56edSDimitry Andric if (!Op) return nullptr;
705f22ef01cSRoman Divacky Ops.push_back(Op);
706f22ef01cSRoman Divacky }
707f22ef01cSRoman Divacky return SE.getAddExpr(Ops);
708f22ef01cSRoman Divacky }
70991bc56edSDimitry Andric return nullptr;
710f22ef01cSRoman Divacky }
711f22ef01cSRoman Divacky
712f22ef01cSRoman Divacky // Check for a multiply operand that we can pull RHS out of.
713ffd1746dSEd Schouten if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
714f22ef01cSRoman Divacky if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
715f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> Ops;
716f22ef01cSRoman Divacky bool Found = false;
717ff0cc061SDimitry Andric for (const SCEV *S : Mul->operands()) {
718f22ef01cSRoman Divacky if (!Found)
719f22ef01cSRoman Divacky if (const SCEV *Q = getExactSDiv(S, RHS, SE,
720f22ef01cSRoman Divacky IgnoreSignificantBits)) {
721f22ef01cSRoman Divacky S = Q;
722f22ef01cSRoman Divacky Found = true;
723f22ef01cSRoman Divacky }
724f22ef01cSRoman Divacky Ops.push_back(S);
725f22ef01cSRoman Divacky }
72691bc56edSDimitry Andric return Found ? SE.getMulExpr(Ops) : nullptr;
727f22ef01cSRoman Divacky }
72891bc56edSDimitry Andric return nullptr;
729ffd1746dSEd Schouten }
730f22ef01cSRoman Divacky
731f22ef01cSRoman Divacky // Otherwise we don't know.
73291bc56edSDimitry Andric return nullptr;
733f22ef01cSRoman Divacky }
734f22ef01cSRoman Divacky
7357d523365SDimitry Andric /// If S involves the addition of a constant integer value, return that integer
7367d523365SDimitry Andric /// value, and mutate S to point to a new SCEV with that value excluded.
ExtractImmediate(const SCEV * & S,ScalarEvolution & SE)737f22ef01cSRoman Divacky static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
738f22ef01cSRoman Divacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
7397d523365SDimitry Andric if (C->getAPInt().getMinSignedBits() <= 64) {
740f22ef01cSRoman Divacky S = SE.getConstant(C->getType(), 0);
741f22ef01cSRoman Divacky return C->getValue()->getSExtValue();
742f22ef01cSRoman Divacky }
743f22ef01cSRoman Divacky } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
744f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
745f22ef01cSRoman Divacky int64_t Result = ExtractImmediate(NewOps.front(), SE);
746e580952dSDimitry Andric if (Result != 0)
747f22ef01cSRoman Divacky S = SE.getAddExpr(NewOps);
748f22ef01cSRoman Divacky return Result;
749f22ef01cSRoman Divacky } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
750f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
751f22ef01cSRoman Divacky int64_t Result = ExtractImmediate(NewOps.front(), SE);
752e580952dSDimitry Andric if (Result != 0)
7533b0f4066SDimitry Andric S = SE.getAddRecExpr(NewOps, AR->getLoop(),
7543b0f4066SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
7553b0f4066SDimitry Andric SCEV::FlagAnyWrap);
756f22ef01cSRoman Divacky return Result;
757f22ef01cSRoman Divacky }
758f22ef01cSRoman Divacky return 0;
759f22ef01cSRoman Divacky }
760f22ef01cSRoman Divacky
7617d523365SDimitry Andric /// If S involves the addition of a GlobalValue address, return that symbol, and
7627d523365SDimitry Andric /// mutate S to point to a new SCEV with that value excluded.
ExtractSymbol(const SCEV * & S,ScalarEvolution & SE)763f22ef01cSRoman Divacky static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
764f22ef01cSRoman Divacky if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
765f22ef01cSRoman Divacky if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
766f22ef01cSRoman Divacky S = SE.getConstant(GV->getType(), 0);
767f22ef01cSRoman Divacky return GV;
768f22ef01cSRoman Divacky }
769f22ef01cSRoman Divacky } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
770f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
771f22ef01cSRoman Divacky GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
772e580952dSDimitry Andric if (Result)
773f22ef01cSRoman Divacky S = SE.getAddExpr(NewOps);
774f22ef01cSRoman Divacky return Result;
775f22ef01cSRoman Divacky } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
776f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
777f22ef01cSRoman Divacky GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
778e580952dSDimitry Andric if (Result)
7793b0f4066SDimitry Andric S = SE.getAddRecExpr(NewOps, AR->getLoop(),
7803b0f4066SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
7813b0f4066SDimitry Andric SCEV::FlagAnyWrap);
782f22ef01cSRoman Divacky return Result;
783f22ef01cSRoman Divacky }
78491bc56edSDimitry Andric return nullptr;
785f22ef01cSRoman Divacky }
786f22ef01cSRoman Divacky
7877d523365SDimitry Andric /// Returns true if the specified instruction is using the specified value as an
7887d523365SDimitry Andric /// address.
isAddressUse(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)7892cab237bSDimitry Andric static bool isAddressUse(const TargetTransformInfo &TTI,
7902cab237bSDimitry Andric Instruction *Inst, Value *OperandVal) {
791f22ef01cSRoman Divacky bool isAddress = isa<LoadInst>(Inst);
792f22ef01cSRoman Divacky if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
7937a7e6055SDimitry Andric if (SI->getPointerOperand() == OperandVal)
794f22ef01cSRoman Divacky isAddress = true;
795f22ef01cSRoman Divacky } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
796f22ef01cSRoman Divacky // Addressing modes can also be folded into prefetches and a variety
797f22ef01cSRoman Divacky // of intrinsics.
798f22ef01cSRoman Divacky switch (II->getIntrinsicID()) {
7992cab237bSDimitry Andric case Intrinsic::memset:
800f22ef01cSRoman Divacky case Intrinsic::prefetch:
801ffd1746dSEd Schouten if (II->getArgOperand(0) == OperandVal)
802f22ef01cSRoman Divacky isAddress = true;
803f22ef01cSRoman Divacky break;
8042cab237bSDimitry Andric case Intrinsic::memmove:
8052cab237bSDimitry Andric case Intrinsic::memcpy:
8062cab237bSDimitry Andric if (II->getArgOperand(0) == OperandVal ||
8072cab237bSDimitry Andric II->getArgOperand(1) == OperandVal)
8082cab237bSDimitry Andric isAddress = true;
8092cab237bSDimitry Andric break;
8102cab237bSDimitry Andric default: {
8112cab237bSDimitry Andric MemIntrinsicInfo IntrInfo;
8122cab237bSDimitry Andric if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
8132cab237bSDimitry Andric if (IntrInfo.PtrVal == OperandVal)
8142cab237bSDimitry Andric isAddress = true;
8152cab237bSDimitry Andric }
8162cab237bSDimitry Andric }
817f22ef01cSRoman Divacky }
8187a7e6055SDimitry Andric } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
8197a7e6055SDimitry Andric if (RMW->getPointerOperand() == OperandVal)
8207a7e6055SDimitry Andric isAddress = true;
8217a7e6055SDimitry Andric } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
8227a7e6055SDimitry Andric if (CmpX->getPointerOperand() == OperandVal)
8237a7e6055SDimitry Andric isAddress = true;
824f22ef01cSRoman Divacky }
825f22ef01cSRoman Divacky return isAddress;
826f22ef01cSRoman Divacky }
827f22ef01cSRoman Divacky
8287d523365SDimitry Andric /// Return the type of the memory being accessed.
getAccessType(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)8292cab237bSDimitry Andric static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
8304ba319b5SDimitry Andric Instruction *Inst, Value *OperandVal) {
8317d523365SDimitry Andric MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
8327d523365SDimitry Andric if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
8337d523365SDimitry Andric AccessTy.MemTy = SI->getOperand(0)->getType();
8347d523365SDimitry Andric AccessTy.AddrSpace = SI->getPointerAddressSpace();
8357d523365SDimitry Andric } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
8367d523365SDimitry Andric AccessTy.AddrSpace = LI->getPointerAddressSpace();
8377a7e6055SDimitry Andric } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
8387a7e6055SDimitry Andric AccessTy.AddrSpace = RMW->getPointerAddressSpace();
8397a7e6055SDimitry Andric } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
8407a7e6055SDimitry Andric AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
8412cab237bSDimitry Andric } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
8422cab237bSDimitry Andric switch (II->getIntrinsicID()) {
8432cab237bSDimitry Andric case Intrinsic::prefetch:
8444ba319b5SDimitry Andric case Intrinsic::memset:
8452cab237bSDimitry Andric AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
8464ba319b5SDimitry Andric AccessTy.MemTy = OperandVal->getType();
8474ba319b5SDimitry Andric break;
8484ba319b5SDimitry Andric case Intrinsic::memmove:
8494ba319b5SDimitry Andric case Intrinsic::memcpy:
8504ba319b5SDimitry Andric AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
8514ba319b5SDimitry Andric AccessTy.MemTy = OperandVal->getType();
8522cab237bSDimitry Andric break;
8532cab237bSDimitry Andric default: {
8542cab237bSDimitry Andric MemIntrinsicInfo IntrInfo;
8552cab237bSDimitry Andric if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
8562cab237bSDimitry Andric AccessTy.AddrSpace
8572cab237bSDimitry Andric = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
8582cab237bSDimitry Andric }
8592cab237bSDimitry Andric
8602cab237bSDimitry Andric break;
8612cab237bSDimitry Andric }
8622cab237bSDimitry Andric }
863f22ef01cSRoman Divacky }
864f22ef01cSRoman Divacky
865f22ef01cSRoman Divacky // All pointers have the same requirements, so canonicalize them to an
866f22ef01cSRoman Divacky // arbitrary pointer type to minimize variation.
8677d523365SDimitry Andric if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
8687d523365SDimitry Andric AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
869f22ef01cSRoman Divacky PTy->getAddressSpace());
870f22ef01cSRoman Divacky
871f22ef01cSRoman Divacky return AccessTy;
872f22ef01cSRoman Divacky }
873f22ef01cSRoman Divacky
8747d523365SDimitry Andric /// Return true if this AddRec is already a phi in its loop.
isExistingPhi(const SCEVAddRecExpr * AR,ScalarEvolution & SE)875dff0c46cSDimitry Andric static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
87630785c0eSDimitry Andric for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
87730785c0eSDimitry Andric if (SE.isSCEVable(PN.getType()) &&
87830785c0eSDimitry Andric (SE.getEffectiveSCEVType(PN.getType()) ==
879dff0c46cSDimitry Andric SE.getEffectiveSCEVType(AR->getType())) &&
88030785c0eSDimitry Andric SE.getSCEV(&PN) == AR)
881dff0c46cSDimitry Andric return true;
882dff0c46cSDimitry Andric }
883dff0c46cSDimitry Andric return false;
884dff0c46cSDimitry Andric }
885dff0c46cSDimitry Andric
886dff0c46cSDimitry Andric /// Check if expanding this expression is likely to incur significant cost. This
887dff0c46cSDimitry Andric /// is tricky because SCEV doesn't track which expressions are actually computed
888dff0c46cSDimitry Andric /// by the current IR.
889dff0c46cSDimitry Andric ///
890dff0c46cSDimitry Andric /// We currently allow expansion of IV increments that involve adds,
891dff0c46cSDimitry Andric /// multiplication by constants, and AddRecs from existing phis.
892dff0c46cSDimitry Andric ///
893dff0c46cSDimitry Andric /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
894dff0c46cSDimitry Andric /// obvious multiple of the UDivExpr.
isHighCostExpansion(const SCEV * S,SmallPtrSetImpl<const SCEV * > & Processed,ScalarEvolution & SE)895dff0c46cSDimitry Andric static bool isHighCostExpansion(const SCEV *S,
89639d628a0SDimitry Andric SmallPtrSetImpl<const SCEV*> &Processed,
897dff0c46cSDimitry Andric ScalarEvolution &SE) {
898dff0c46cSDimitry Andric // Zero/One operand expressions
899dff0c46cSDimitry Andric switch (S->getSCEVType()) {
900dff0c46cSDimitry Andric case scUnknown:
901dff0c46cSDimitry Andric case scConstant:
902dff0c46cSDimitry Andric return false;
903dff0c46cSDimitry Andric case scTruncate:
904dff0c46cSDimitry Andric return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
905dff0c46cSDimitry Andric Processed, SE);
906dff0c46cSDimitry Andric case scZeroExtend:
907dff0c46cSDimitry Andric return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
908dff0c46cSDimitry Andric Processed, SE);
909dff0c46cSDimitry Andric case scSignExtend:
910dff0c46cSDimitry Andric return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
911dff0c46cSDimitry Andric Processed, SE);
912dff0c46cSDimitry Andric }
913dff0c46cSDimitry Andric
91439d628a0SDimitry Andric if (!Processed.insert(S).second)
915dff0c46cSDimitry Andric return false;
916dff0c46cSDimitry Andric
917dff0c46cSDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
918ff0cc061SDimitry Andric for (const SCEV *S : Add->operands()) {
919ff0cc061SDimitry Andric if (isHighCostExpansion(S, Processed, SE))
920dff0c46cSDimitry Andric return true;
921dff0c46cSDimitry Andric }
922dff0c46cSDimitry Andric return false;
923dff0c46cSDimitry Andric }
924dff0c46cSDimitry Andric
925dff0c46cSDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
926dff0c46cSDimitry Andric if (Mul->getNumOperands() == 2) {
927dff0c46cSDimitry Andric // Multiplication by a constant is ok
928dff0c46cSDimitry Andric if (isa<SCEVConstant>(Mul->getOperand(0)))
929dff0c46cSDimitry Andric return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
930dff0c46cSDimitry Andric
931dff0c46cSDimitry Andric // If we have the value of one operand, check if an existing
932dff0c46cSDimitry Andric // multiplication already generates this expression.
933dff0c46cSDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
934dff0c46cSDimitry Andric Value *UVal = U->getValue();
93591bc56edSDimitry Andric for (User *UR : UVal->users()) {
936dff0c46cSDimitry Andric // If U is a constant, it may be used by a ConstantExpr.
93791bc56edSDimitry Andric Instruction *UI = dyn_cast<Instruction>(UR);
93891bc56edSDimitry Andric if (UI && UI->getOpcode() == Instruction::Mul &&
93991bc56edSDimitry Andric SE.isSCEVable(UI->getType())) {
94091bc56edSDimitry Andric return SE.getSCEV(UI) == Mul;
941dff0c46cSDimitry Andric }
942dff0c46cSDimitry Andric }
943dff0c46cSDimitry Andric }
944dff0c46cSDimitry Andric }
945dff0c46cSDimitry Andric }
946dff0c46cSDimitry Andric
947dff0c46cSDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
948dff0c46cSDimitry Andric if (isExistingPhi(AR, SE))
949dff0c46cSDimitry Andric return false;
950dff0c46cSDimitry Andric }
951dff0c46cSDimitry Andric
952dff0c46cSDimitry Andric // Fow now, consider any other type of expression (div/mul/min/max) high cost.
953dff0c46cSDimitry Andric return true;
954dff0c46cSDimitry Andric }
955dff0c46cSDimitry Andric
9564ba319b5SDimitry Andric /// If any of the instructions in the specified set are trivially dead, delete
9577d523365SDimitry Andric /// them and see if this makes any of their operands subsequently dead.
958f22ef01cSRoman Divacky static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> & DeadInsts)959f37b6182SDimitry Andric DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
960f22ef01cSRoman Divacky bool Changed = false;
961f22ef01cSRoman Divacky
962f22ef01cSRoman Divacky while (!DeadInsts.empty()) {
9633861d79fSDimitry Andric Value *V = DeadInsts.pop_back_val();
9643861d79fSDimitry Andric Instruction *I = dyn_cast_or_null<Instruction>(V);
965f22ef01cSRoman Divacky
96691bc56edSDimitry Andric if (!I || !isInstructionTriviallyDead(I))
967f22ef01cSRoman Divacky continue;
968f22ef01cSRoman Divacky
969ff0cc061SDimitry Andric for (Use &O : I->operands())
970ff0cc061SDimitry Andric if (Instruction *U = dyn_cast<Instruction>(O)) {
971ff0cc061SDimitry Andric O = nullptr;
972f22ef01cSRoman Divacky if (U->use_empty())
97397bc6c73SDimitry Andric DeadInsts.emplace_back(U);
974f22ef01cSRoman Divacky }
975f22ef01cSRoman Divacky
976f22ef01cSRoman Divacky I->eraseFromParent();
977f22ef01cSRoman Divacky Changed = true;
978f22ef01cSRoman Divacky }
979f22ef01cSRoman Divacky
980f22ef01cSRoman Divacky return Changed;
981f22ef01cSRoman Divacky }
982f22ef01cSRoman Divacky
983f22ef01cSRoman Divacky namespace {
984d88c1a5aSDimitry Andric
985f785676fSDimitry Andric class LSRUse;
986d88c1a5aSDimitry Andric
987d88c1a5aSDimitry Andric } // end anonymous namespace
98891bc56edSDimitry Andric
9894ba319b5SDimitry Andric /// Check if the addressing mode defined by \p F is completely
99091bc56edSDimitry Andric /// folded in \p LU at isel time.
99191bc56edSDimitry Andric /// This includes address-mode folding and special icmp tricks.
99291bc56edSDimitry Andric /// This function returns true if \p LU can accommodate what \p F
99391bc56edSDimitry Andric /// defines and up to 1 base + 1 scaled + offset.
99491bc56edSDimitry Andric /// In other words, if \p F has several base registers, this function may
99591bc56edSDimitry Andric /// still return true. Therefore, users still need to account for
99691bc56edSDimitry Andric /// additional base registers and/or unfolded offsets to derive an
99791bc56edSDimitry Andric /// accurate cost model.
99891bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
99991bc56edSDimitry Andric const LSRUse &LU, const Formula &F);
10002cab237bSDimitry Andric
1001f785676fSDimitry Andric // Get the cost of the scaling factor used in F for LU.
1002f785676fSDimitry Andric static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
10037a7e6055SDimitry Andric const LSRUse &LU, const Formula &F,
10047a7e6055SDimitry Andric const Loop &L);
1005f785676fSDimitry Andric
1006f785676fSDimitry Andric namespace {
1007f22ef01cSRoman Divacky
10087d523365SDimitry Andric /// This class is used to measure and compare candidate formulae.
1009f22ef01cSRoman Divacky class Cost {
1010db17bf38SDimitry Andric TargetTransformInfo::LSRCost C;
1011f22ef01cSRoman Divacky
1012f22ef01cSRoman Divacky public:
Cost()1013db17bf38SDimitry Andric Cost() {
1014db17bf38SDimitry Andric C.Insns = 0;
1015db17bf38SDimitry Andric C.NumRegs = 0;
1016db17bf38SDimitry Andric C.AddRecCost = 0;
1017db17bf38SDimitry Andric C.NumIVMuls = 0;
1018db17bf38SDimitry Andric C.NumBaseAdds = 0;
1019db17bf38SDimitry Andric C.ImmCost = 0;
1020db17bf38SDimitry Andric C.SetupCost = 0;
1021db17bf38SDimitry Andric C.ScaleCost = 0;
1022db17bf38SDimitry Andric }
1023f22ef01cSRoman Divacky
1024db17bf38SDimitry Andric bool isLess(Cost &Other, const TargetTransformInfo &TTI);
1025f22ef01cSRoman Divacky
102691bc56edSDimitry Andric void Lose();
1027f22ef01cSRoman Divacky
10286122f3e6SDimitry Andric #ifndef NDEBUG
10296122f3e6SDimitry Andric // Once any of the metrics loses, they must all remain losers.
isValid()10306122f3e6SDimitry Andric bool isValid() {
1031db17bf38SDimitry Andric return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
1032db17bf38SDimitry Andric | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
1033db17bf38SDimitry Andric || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
1034db17bf38SDimitry Andric & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
10356122f3e6SDimitry Andric }
10366122f3e6SDimitry Andric #endif
10376122f3e6SDimitry Andric
isLoser()10386122f3e6SDimitry Andric bool isLoser() {
10396122f3e6SDimitry Andric assert(isValid() && "invalid cost");
1040db17bf38SDimitry Andric return C.NumRegs == ~0u;
10416122f3e6SDimitry Andric }
10426122f3e6SDimitry Andric
1043f785676fSDimitry Andric void RateFormula(const TargetTransformInfo &TTI,
1044f785676fSDimitry Andric const Formula &F,
104539d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
1046f22ef01cSRoman Divacky const DenseSet<const SCEV *> &VisitedRegs,
1047f22ef01cSRoman Divacky const Loop *L,
1048dff0c46cSDimitry Andric ScalarEvolution &SE, DominatorTree &DT,
1049f785676fSDimitry Andric const LSRUse &LU,
105039d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
1051f22ef01cSRoman Divacky
1052f22ef01cSRoman Divacky void print(raw_ostream &OS) const;
1053f22ef01cSRoman Divacky void dump() const;
1054f22ef01cSRoman Divacky
1055f22ef01cSRoman Divacky private:
1056f22ef01cSRoman Divacky void RateRegister(const SCEV *Reg,
105739d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
1058f22ef01cSRoman Divacky const Loop *L,
10594ba319b5SDimitry Andric ScalarEvolution &SE, DominatorTree &DT,
10604ba319b5SDimitry Andric const TargetTransformInfo &TTI);
1061f22ef01cSRoman Divacky void RatePrimaryRegister(const SCEV *Reg,
106239d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
1063f22ef01cSRoman Divacky const Loop *L,
1064dff0c46cSDimitry Andric ScalarEvolution &SE, DominatorTree &DT,
10654ba319b5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs,
10664ba319b5SDimitry Andric const TargetTransformInfo &TTI);
1067f22ef01cSRoman Divacky };
1068f22ef01cSRoman Divacky
1069d88c1a5aSDimitry Andric /// An operand value in an instruction which is to be replaced with some
1070d88c1a5aSDimitry Andric /// equivalent, possibly strength-reduced, replacement.
1071d88c1a5aSDimitry Andric struct LSRFixup {
1072d88c1a5aSDimitry Andric /// The instruction which will be updated.
10732cab237bSDimitry Andric Instruction *UserInst = nullptr;
1074d88c1a5aSDimitry Andric
1075d88c1a5aSDimitry Andric /// The operand of the instruction which will be replaced. The operand may be
1076d88c1a5aSDimitry Andric /// used more than once; every instance will be replaced.
10772cab237bSDimitry Andric Value *OperandValToReplace = nullptr;
1078d88c1a5aSDimitry Andric
1079d88c1a5aSDimitry Andric /// If this user is to use the post-incremented value of an induction
10802cab237bSDimitry Andric /// variable, this set is non-empty and holds the loops associated with the
1081d88c1a5aSDimitry Andric /// induction variable.
1082d88c1a5aSDimitry Andric PostIncLoopSet PostIncLoops;
1083d88c1a5aSDimitry Andric
1084d88c1a5aSDimitry Andric /// A constant offset to be added to the LSRUse expression. This allows
1085d88c1a5aSDimitry Andric /// multiple fixups to share the same LSRUse with different offsets, for
1086d88c1a5aSDimitry Andric /// example in an unrolled loop.
10872cab237bSDimitry Andric int64_t Offset = 0;
10882cab237bSDimitry Andric
10892cab237bSDimitry Andric LSRFixup() = default;
1090d88c1a5aSDimitry Andric
1091d88c1a5aSDimitry Andric bool isUseFullyOutsideLoop(const Loop *L) const;
1092d88c1a5aSDimitry Andric
1093d88c1a5aSDimitry Andric void print(raw_ostream &OS) const;
1094d88c1a5aSDimitry Andric void dump() const;
1095d88c1a5aSDimitry Andric };
1096d88c1a5aSDimitry Andric
1097d88c1a5aSDimitry Andric /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
1098d88c1a5aSDimitry Andric /// SmallVectors of const SCEV*.
1099d88c1a5aSDimitry Andric struct UniquifierDenseMapInfo {
getEmptyKey__anon244971d90711::UniquifierDenseMapInfo1100d88c1a5aSDimitry Andric static SmallVector<const SCEV *, 4> getEmptyKey() {
1101d88c1a5aSDimitry Andric SmallVector<const SCEV *, 4> V;
1102d88c1a5aSDimitry Andric V.push_back(reinterpret_cast<const SCEV *>(-1));
1103d88c1a5aSDimitry Andric return V;
11043dac3a9bSDimitry Andric }
1105f22ef01cSRoman Divacky
getTombstoneKey__anon244971d90711::UniquifierDenseMapInfo1106d88c1a5aSDimitry Andric static SmallVector<const SCEV *, 4> getTombstoneKey() {
1107d88c1a5aSDimitry Andric SmallVector<const SCEV *, 4> V;
1108d88c1a5aSDimitry Andric V.push_back(reinterpret_cast<const SCEV *>(-2));
1109d88c1a5aSDimitry Andric return V;
1110d88c1a5aSDimitry Andric }
1111d88c1a5aSDimitry Andric
getHashValue__anon244971d90711::UniquifierDenseMapInfo1112d88c1a5aSDimitry Andric static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
1113d88c1a5aSDimitry Andric return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1114d88c1a5aSDimitry Andric }
1115d88c1a5aSDimitry Andric
isEqual__anon244971d90711::UniquifierDenseMapInfo1116d88c1a5aSDimitry Andric static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1117d88c1a5aSDimitry Andric const SmallVector<const SCEV *, 4> &RHS) {
1118d88c1a5aSDimitry Andric return LHS == RHS;
1119d88c1a5aSDimitry Andric }
1120d88c1a5aSDimitry Andric };
1121d88c1a5aSDimitry Andric
1122d88c1a5aSDimitry Andric /// This class holds the state that LSR keeps for each use in IVUsers, as well
1123d88c1a5aSDimitry Andric /// as uses invented by LSR itself. It includes information about what kinds of
1124d88c1a5aSDimitry Andric /// things can be folded into the user, information about the user itself, and
1125d88c1a5aSDimitry Andric /// information about how the use may be satisfied. TODO: Represent multiple
1126d88c1a5aSDimitry Andric /// users of the same expression in common?
1127d88c1a5aSDimitry Andric class LSRUse {
1128d88c1a5aSDimitry Andric DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1129d88c1a5aSDimitry Andric
1130d88c1a5aSDimitry Andric public:
1131d88c1a5aSDimitry Andric /// An enum for a kind of use, indicating what types of scaled and immediate
1132d88c1a5aSDimitry Andric /// operands it might support.
1133d88c1a5aSDimitry Andric enum KindType {
1134d88c1a5aSDimitry Andric Basic, ///< A normal use, with no folding.
1135d88c1a5aSDimitry Andric Special, ///< A special case of basic, allowing -1 scales.
1136d88c1a5aSDimitry Andric Address, ///< An address use; folding according to TargetLowering
1137d88c1a5aSDimitry Andric ICmpZero ///< An equality icmp with both operands folded into one.
1138d88c1a5aSDimitry Andric // TODO: Add a generic icmp too?
1139d88c1a5aSDimitry Andric };
1140d88c1a5aSDimitry Andric
11412cab237bSDimitry Andric using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1142d88c1a5aSDimitry Andric
1143d88c1a5aSDimitry Andric KindType Kind;
1144d88c1a5aSDimitry Andric MemAccessTy AccessTy;
1145d88c1a5aSDimitry Andric
1146d88c1a5aSDimitry Andric /// The list of operands which are to be replaced.
1147d88c1a5aSDimitry Andric SmallVector<LSRFixup, 8> Fixups;
1148d88c1a5aSDimitry Andric
1149d88c1a5aSDimitry Andric /// Keep track of the min and max offsets of the fixups.
11502cab237bSDimitry Andric int64_t MinOffset = std::numeric_limits<int64_t>::max();
11512cab237bSDimitry Andric int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1152d88c1a5aSDimitry Andric
1153d88c1a5aSDimitry Andric /// This records whether all of the fixups using this LSRUse are outside of
1154d88c1a5aSDimitry Andric /// the loop, in which case some special-case heuristics may be used.
11552cab237bSDimitry Andric bool AllFixupsOutsideLoop = true;
1156d88c1a5aSDimitry Andric
1157d88c1a5aSDimitry Andric /// RigidFormula is set to true to guarantee that this use will be associated
1158d88c1a5aSDimitry Andric /// with a single formula--the one that initially matched. Some SCEV
1159d88c1a5aSDimitry Andric /// expressions cannot be expanded. This allows LSR to consider the registers
1160d88c1a5aSDimitry Andric /// used by those expressions without the need to expand them later after
1161d88c1a5aSDimitry Andric /// changing the formula.
11622cab237bSDimitry Andric bool RigidFormula = false;
1163d88c1a5aSDimitry Andric
1164d88c1a5aSDimitry Andric /// This records the widest use type for any fixup using this
1165d88c1a5aSDimitry Andric /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1166d88c1a5aSDimitry Andric /// fixup widths to be equivalent, because the narrower one may be relying on
1167d88c1a5aSDimitry Andric /// the implicit truncation to truncate away bogus bits.
11682cab237bSDimitry Andric Type *WidestFixupType = nullptr;
1169d88c1a5aSDimitry Andric
1170d88c1a5aSDimitry Andric /// A list of ways to build a value that can satisfy this user. After the
1171d88c1a5aSDimitry Andric /// list is populated, one of these is selected heuristically and used to
1172d88c1a5aSDimitry Andric /// formulate a replacement for OperandValToReplace in UserInst.
1173d88c1a5aSDimitry Andric SmallVector<Formula, 12> Formulae;
1174d88c1a5aSDimitry Andric
1175d88c1a5aSDimitry Andric /// The set of register candidates used by all formulae in this LSRUse.
1176d88c1a5aSDimitry Andric SmallPtrSet<const SCEV *, 4> Regs;
1177d88c1a5aSDimitry Andric
LSRUse(KindType K,MemAccessTy AT)11782cab237bSDimitry Andric LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
1179d88c1a5aSDimitry Andric
getNewFixup()1180d88c1a5aSDimitry Andric LSRFixup &getNewFixup() {
1181d88c1a5aSDimitry Andric Fixups.push_back(LSRFixup());
1182d88c1a5aSDimitry Andric return Fixups.back();
1183d88c1a5aSDimitry Andric }
1184d88c1a5aSDimitry Andric
pushFixup(LSRFixup & f)1185d88c1a5aSDimitry Andric void pushFixup(LSRFixup &f) {
1186d88c1a5aSDimitry Andric Fixups.push_back(f);
1187d88c1a5aSDimitry Andric if (f.Offset > MaxOffset)
1188d88c1a5aSDimitry Andric MaxOffset = f.Offset;
1189d88c1a5aSDimitry Andric if (f.Offset < MinOffset)
1190d88c1a5aSDimitry Andric MinOffset = f.Offset;
1191d88c1a5aSDimitry Andric }
1192d88c1a5aSDimitry Andric
1193d88c1a5aSDimitry Andric bool HasFormulaWithSameRegs(const Formula &F) const;
11947a7e6055SDimitry Andric float getNotSelectedProbability(const SCEV *Reg) const;
11957a7e6055SDimitry Andric bool InsertFormula(const Formula &F, const Loop &L);
1196d88c1a5aSDimitry Andric void DeleteFormula(Formula &F);
1197d88c1a5aSDimitry Andric void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1198d88c1a5aSDimitry Andric
1199d88c1a5aSDimitry Andric void print(raw_ostream &OS) const;
1200d88c1a5aSDimitry Andric void dump() const;
1201d88c1a5aSDimitry Andric };
1202d88c1a5aSDimitry Andric
1203d88c1a5aSDimitry Andric } // end anonymous namespace
1204d88c1a5aSDimitry Andric
12052cab237bSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
12062cab237bSDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
12072cab237bSDimitry Andric GlobalValue *BaseGV, int64_t BaseOffset,
12082cab237bSDimitry Andric bool HasBaseReg, int64_t Scale,
12092cab237bSDimitry Andric Instruction *Fixup = nullptr);
12102cab237bSDimitry Andric
12117d523365SDimitry Andric /// Tally up interesting quantities from the given register.
RateRegister(const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,const TargetTransformInfo & TTI)1212f22ef01cSRoman Divacky void Cost::RateRegister(const SCEV *Reg,
121339d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
1214f22ef01cSRoman Divacky const Loop *L,
12154ba319b5SDimitry Andric ScalarEvolution &SE, DominatorTree &DT,
12164ba319b5SDimitry Andric const TargetTransformInfo &TTI) {
1217f22ef01cSRoman Divacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
12187a7e6055SDimitry Andric // If this is an addrec for another loop, it should be an invariant
12197a7e6055SDimitry Andric // with respect to L since L is the innermost loop (at least
12207a7e6055SDimitry Andric // for now LSR only handles innermost loops).
1221dff0c46cSDimitry Andric if (AR->getLoop() != L) {
1222dff0c46cSDimitry Andric // If the AddRec exists, consider it's register free and leave it alone.
1223dff0c46cSDimitry Andric if (isExistingPhi(AR, SE))
1224f22ef01cSRoman Divacky return;
1225dff0c46cSDimitry Andric
12267a7e6055SDimitry Andric // It is bad to allow LSR for current loop to add induction variables
12277a7e6055SDimitry Andric // for its sibling loops.
12287a7e6055SDimitry Andric if (!AR->getLoop()->contains(L)) {
122991bc56edSDimitry Andric Lose();
12306122f3e6SDimitry Andric return;
12316122f3e6SDimitry Andric }
12327a7e6055SDimitry Andric
12337a7e6055SDimitry Andric // Otherwise, it will be an invariant with respect to Loop L.
1234db17bf38SDimitry Andric ++C.NumRegs;
12357a7e6055SDimitry Andric return;
12367a7e6055SDimitry Andric }
12374ba319b5SDimitry Andric
12384ba319b5SDimitry Andric unsigned LoopCost = 1;
12394ba319b5SDimitry Andric if (TTI.shouldFavorPostInc()) {
12404ba319b5SDimitry Andric const SCEV *LoopStep = AR->getStepRecurrence(SE);
12414ba319b5SDimitry Andric if (isa<SCEVConstant>(LoopStep)) {
12424ba319b5SDimitry Andric // Check if a post-indexed load/store can be used.
12434ba319b5SDimitry Andric if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
12444ba319b5SDimitry Andric TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
12454ba319b5SDimitry Andric const SCEV *LoopStart = AR->getStart();
12464ba319b5SDimitry Andric if (!isa<SCEVConstant>(LoopStart) &&
12474ba319b5SDimitry Andric SE.isLoopInvariant(LoopStart, L))
12484ba319b5SDimitry Andric LoopCost = 0;
12494ba319b5SDimitry Andric }
12504ba319b5SDimitry Andric }
12514ba319b5SDimitry Andric }
12524ba319b5SDimitry Andric C.AddRecCost += LoopCost;
1253f22ef01cSRoman Divacky
1254f22ef01cSRoman Divacky // Add the step value register, if it needs one.
1255f22ef01cSRoman Divacky // TODO: The non-affine case isn't precisely modeled here.
12566122f3e6SDimitry Andric if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
12576122f3e6SDimitry Andric if (!Regs.count(AR->getOperand(1))) {
12584ba319b5SDimitry Andric RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
12596122f3e6SDimitry Andric if (isLoser())
12606122f3e6SDimitry Andric return;
12616122f3e6SDimitry Andric }
12626122f3e6SDimitry Andric }
1263f22ef01cSRoman Divacky }
1264db17bf38SDimitry Andric ++C.NumRegs;
1265f22ef01cSRoman Divacky
1266f22ef01cSRoman Divacky // Rough heuristic; favor registers which don't require extra setup
1267f22ef01cSRoman Divacky // instructions in the preheader.
1268f22ef01cSRoman Divacky if (!isa<SCEVUnknown>(Reg) &&
1269f22ef01cSRoman Divacky !isa<SCEVConstant>(Reg) &&
1270f22ef01cSRoman Divacky !(isa<SCEVAddRecExpr>(Reg) &&
1271f22ef01cSRoman Divacky (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
1272f22ef01cSRoman Divacky isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
1273db17bf38SDimitry Andric ++C.SetupCost;
12742754fe60SDimitry Andric
1275db17bf38SDimitry Andric C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
12762754fe60SDimitry Andric SE.hasComputableLoopEvolution(Reg, L);
1277f22ef01cSRoman Divacky }
1278f22ef01cSRoman Divacky
12797d523365SDimitry Andric /// Record this register in the set. If we haven't seen it before, rate
12807d523365SDimitry Andric /// it. Optional LoserRegs provides a way to declare any formula that refers to
12817d523365SDimitry Andric /// one of those regs an instant loser.
RatePrimaryRegister(const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,SmallPtrSetImpl<const SCEV * > * LoserRegs,const TargetTransformInfo & TTI)1282f22ef01cSRoman Divacky void Cost::RatePrimaryRegister(const SCEV *Reg,
128339d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
1284f22ef01cSRoman Divacky const Loop *L,
1285dff0c46cSDimitry Andric ScalarEvolution &SE, DominatorTree &DT,
12864ba319b5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs,
12874ba319b5SDimitry Andric const TargetTransformInfo &TTI) {
1288dff0c46cSDimitry Andric if (LoserRegs && LoserRegs->count(Reg)) {
128991bc56edSDimitry Andric Lose();
1290dff0c46cSDimitry Andric return;
1291dff0c46cSDimitry Andric }
129239d628a0SDimitry Andric if (Regs.insert(Reg).second) {
12934ba319b5SDimitry Andric RateRegister(Reg, Regs, L, SE, DT, TTI);
1294139f7f9bSDimitry Andric if (LoserRegs && isLoser())
1295dff0c46cSDimitry Andric LoserRegs->insert(Reg);
1296dff0c46cSDimitry Andric }
1297f22ef01cSRoman Divacky }
1298f22ef01cSRoman Divacky
RateFormula(const TargetTransformInfo & TTI,const Formula & F,SmallPtrSetImpl<const SCEV * > & Regs,const DenseSet<const SCEV * > & VisitedRegs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,const LSRUse & LU,SmallPtrSetImpl<const SCEV * > * LoserRegs)1299f785676fSDimitry Andric void Cost::RateFormula(const TargetTransformInfo &TTI,
1300f785676fSDimitry Andric const Formula &F,
130139d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
1302f22ef01cSRoman Divacky const DenseSet<const SCEV *> &VisitedRegs,
1303f22ef01cSRoman Divacky const Loop *L,
1304dff0c46cSDimitry Andric ScalarEvolution &SE, DominatorTree &DT,
1305f785676fSDimitry Andric const LSRUse &LU,
130639d628a0SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs) {
13077a7e6055SDimitry Andric assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
1308f22ef01cSRoman Divacky // Tally up the registers.
1309db17bf38SDimitry Andric unsigned PrevAddRecCost = C.AddRecCost;
1310db17bf38SDimitry Andric unsigned PrevNumRegs = C.NumRegs;
1311db17bf38SDimitry Andric unsigned PrevNumBaseAdds = C.NumBaseAdds;
1312f22ef01cSRoman Divacky if (const SCEV *ScaledReg = F.ScaledReg) {
1313f22ef01cSRoman Divacky if (VisitedRegs.count(ScaledReg)) {
131491bc56edSDimitry Andric Lose();
1315f22ef01cSRoman Divacky return;
1316f22ef01cSRoman Divacky }
13174ba319b5SDimitry Andric RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
13186122f3e6SDimitry Andric if (isLoser())
13196122f3e6SDimitry Andric return;
1320f22ef01cSRoman Divacky }
1321ff0cc061SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) {
1322f22ef01cSRoman Divacky if (VisitedRegs.count(BaseReg)) {
132391bc56edSDimitry Andric Lose();
1324f22ef01cSRoman Divacky return;
1325f22ef01cSRoman Divacky }
13264ba319b5SDimitry Andric RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
13276122f3e6SDimitry Andric if (isLoser())
13286122f3e6SDimitry Andric return;
1329f22ef01cSRoman Divacky }
1330f22ef01cSRoman Divacky
1331bd5abe19SDimitry Andric // Determine how many (unfolded) adds we'll need inside the loop.
133291bc56edSDimitry Andric size_t NumBaseParts = F.getNumRegs();
1333bd5abe19SDimitry Andric if (NumBaseParts > 1)
1334f785676fSDimitry Andric // Do not count the base and a possible second register if the target
1335f785676fSDimitry Andric // allows to fold 2 registers.
1336db17bf38SDimitry Andric C.NumBaseAdds +=
133791bc56edSDimitry Andric NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1338db17bf38SDimitry Andric C.NumBaseAdds += (F.UnfoldedOffset != 0);
1339f785676fSDimitry Andric
1340f785676fSDimitry Andric // Accumulate non-free scaling amounts.
1341db17bf38SDimitry Andric C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
1342f22ef01cSRoman Divacky
1343f22ef01cSRoman Divacky // Tally up the non-zero immediates.
1344d88c1a5aSDimitry Andric for (const LSRFixup &Fixup : LU.Fixups) {
1345d88c1a5aSDimitry Andric int64_t O = Fixup.Offset;
1346ff0cc061SDimitry Andric int64_t Offset = (uint64_t)O + F.BaseOffset;
1347139f7f9bSDimitry Andric if (F.BaseGV)
1348db17bf38SDimitry Andric C.ImmCost += 64; // Handle symbolic values conservatively.
1349f22ef01cSRoman Divacky // TODO: This should probably be the pointer size.
1350f22ef01cSRoman Divacky else if (Offset != 0)
1351db17bf38SDimitry Andric C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
1352d88c1a5aSDimitry Andric
1353d88c1a5aSDimitry Andric // Check with target if this offset with this instruction is
1354d88c1a5aSDimitry Andric // specifically not supported.
13552cab237bSDimitry Andric if (LU.Kind == LSRUse::Address && Offset != 0 &&
13562cab237bSDimitry Andric !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
13572cab237bSDimitry Andric Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
1358db17bf38SDimitry Andric C.NumBaseAdds++;
1359db17bf38SDimitry Andric }
1360db17bf38SDimitry Andric
1361db17bf38SDimitry Andric // If we don't count instruction cost exit here.
1362db17bf38SDimitry Andric if (!InsnsCost) {
1363db17bf38SDimitry Andric assert(isValid() && "invalid cost");
1364db17bf38SDimitry Andric return;
1365db17bf38SDimitry Andric }
1366db17bf38SDimitry Andric
1367db17bf38SDimitry Andric // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1368db17bf38SDimitry Andric // additional instruction (at least fill).
1369db17bf38SDimitry Andric unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
1370db17bf38SDimitry Andric if (C.NumRegs > TTIRegNum) {
1371db17bf38SDimitry Andric // Cost already exceeded TTIRegNum, then only newly added register can add
1372db17bf38SDimitry Andric // new instructions.
1373db17bf38SDimitry Andric if (PrevNumRegs > TTIRegNum)
1374db17bf38SDimitry Andric C.Insns += (C.NumRegs - PrevNumRegs);
1375db17bf38SDimitry Andric else
1376db17bf38SDimitry Andric C.Insns += (C.NumRegs - TTIRegNum);
1377f22ef01cSRoman Divacky }
13787a7e6055SDimitry Andric
13797a7e6055SDimitry Andric // If ICmpZero formula ends with not 0, it could not be replaced by
13807a7e6055SDimitry Andric // just add or sub. We'll need to compare final result of AddRec.
13814ba319b5SDimitry Andric // That means we'll need an additional instruction. But if the target can
13824ba319b5SDimitry Andric // macro-fuse a compare with a branch, don't count this extra instruction.
13837a7e6055SDimitry Andric // For -10 + {0, +, 1}:
13847a7e6055SDimitry Andric // i = i + 1;
13857a7e6055SDimitry Andric // cmp i, 10
13867a7e6055SDimitry Andric //
13877a7e6055SDimitry Andric // For {-10, +, 1}:
13887a7e6055SDimitry Andric // i = i + 1;
13894ba319b5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp())
1390db17bf38SDimitry Andric C.Insns++;
13917a7e6055SDimitry Andric // Each new AddRec adds 1 instruction to calculation.
1392db17bf38SDimitry Andric C.Insns += (C.AddRecCost - PrevAddRecCost);
13937a7e6055SDimitry Andric
13947a7e6055SDimitry Andric // BaseAdds adds instructions for unfolded registers.
13957a7e6055SDimitry Andric if (LU.Kind != LSRUse::ICmpZero)
1396db17bf38SDimitry Andric C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
13976122f3e6SDimitry Andric assert(isValid() && "invalid cost");
1398f22ef01cSRoman Divacky }
1399f22ef01cSRoman Divacky
14007d523365SDimitry Andric /// Set this cost to a losing value.
Lose()140191bc56edSDimitry Andric void Cost::Lose() {
14022cab237bSDimitry Andric C.Insns = std::numeric_limits<unsigned>::max();
14032cab237bSDimitry Andric C.NumRegs = std::numeric_limits<unsigned>::max();
14042cab237bSDimitry Andric C.AddRecCost = std::numeric_limits<unsigned>::max();
14052cab237bSDimitry Andric C.NumIVMuls = std::numeric_limits<unsigned>::max();
14062cab237bSDimitry Andric C.NumBaseAdds = std::numeric_limits<unsigned>::max();
14072cab237bSDimitry Andric C.ImmCost = std::numeric_limits<unsigned>::max();
14082cab237bSDimitry Andric C.SetupCost = std::numeric_limits<unsigned>::max();
14092cab237bSDimitry Andric C.ScaleCost = std::numeric_limits<unsigned>::max();
1410f22ef01cSRoman Divacky }
1411f22ef01cSRoman Divacky
14127d523365SDimitry Andric /// Choose the lower cost.
isLess(Cost & Other,const TargetTransformInfo & TTI)1413db17bf38SDimitry Andric bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
1414db17bf38SDimitry Andric if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
1415db17bf38SDimitry Andric C.Insns != Other.C.Insns)
1416db17bf38SDimitry Andric return C.Insns < Other.C.Insns;
1417db17bf38SDimitry Andric return TTI.isLSRCostLess(C, Other.C);
1418f22ef01cSRoman Divacky }
1419f22ef01cSRoman Divacky
14202cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const1421f22ef01cSRoman Divacky void Cost::print(raw_ostream &OS) const {
1422db17bf38SDimitry Andric if (InsnsCost)
1423db17bf38SDimitry Andric OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
1424db17bf38SDimitry Andric OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
1425db17bf38SDimitry Andric if (C.AddRecCost != 0)
1426db17bf38SDimitry Andric OS << ", with addrec cost " << C.AddRecCost;
1427db17bf38SDimitry Andric if (C.NumIVMuls != 0)
1428db17bf38SDimitry Andric OS << ", plus " << C.NumIVMuls << " IV mul"
1429db17bf38SDimitry Andric << (C.NumIVMuls == 1 ? "" : "s");
1430db17bf38SDimitry Andric if (C.NumBaseAdds != 0)
1431db17bf38SDimitry Andric OS << ", plus " << C.NumBaseAdds << " base add"
1432db17bf38SDimitry Andric << (C.NumBaseAdds == 1 ? "" : "s");
1433db17bf38SDimitry Andric if (C.ScaleCost != 0)
1434db17bf38SDimitry Andric OS << ", plus " << C.ScaleCost << " scale cost";
1435db17bf38SDimitry Andric if (C.ImmCost != 0)
1436db17bf38SDimitry Andric OS << ", plus " << C.ImmCost << " imm cost";
1437db17bf38SDimitry Andric if (C.SetupCost != 0)
1438db17bf38SDimitry Andric OS << ", plus " << C.SetupCost << " setup cost";
1439f22ef01cSRoman Divacky }
1440f22ef01cSRoman Divacky
dump() const14417a7e6055SDimitry Andric LLVM_DUMP_METHOD void Cost::dump() const {
1442f22ef01cSRoman Divacky print(errs()); errs() << '\n';
1443f22ef01cSRoman Divacky }
14447a7e6055SDimitry Andric #endif
1445f22ef01cSRoman Divacky
14467d523365SDimitry Andric /// Test whether this fixup always uses its value outside of the given loop.
isUseFullyOutsideLoop(const Loop * L) const1447f22ef01cSRoman Divacky bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1448f22ef01cSRoman Divacky // PHI nodes use their value in their incoming blocks.
1449f22ef01cSRoman Divacky if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1450f22ef01cSRoman Divacky for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1451f22ef01cSRoman Divacky if (PN->getIncomingValue(i) == OperandValToReplace &&
1452f22ef01cSRoman Divacky L->contains(PN->getIncomingBlock(i)))
1453f22ef01cSRoman Divacky return false;
1454f22ef01cSRoman Divacky return true;
1455f22ef01cSRoman Divacky }
1456f22ef01cSRoman Divacky
1457f22ef01cSRoman Divacky return !L->contains(UserInst);
1458f22ef01cSRoman Divacky }
1459f22ef01cSRoman Divacky
14602cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const1461f22ef01cSRoman Divacky void LSRFixup::print(raw_ostream &OS) const {
1462f22ef01cSRoman Divacky OS << "UserInst=";
1463f22ef01cSRoman Divacky // Store is common and interesting enough to be worth special-casing.
1464f22ef01cSRoman Divacky if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1465f22ef01cSRoman Divacky OS << "store ";
146691bc56edSDimitry Andric Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1467f22ef01cSRoman Divacky } else if (UserInst->getType()->isVoidTy())
1468f22ef01cSRoman Divacky OS << UserInst->getOpcodeName();
1469f22ef01cSRoman Divacky else
147091bc56edSDimitry Andric UserInst->printAsOperand(OS, /*PrintType=*/false);
1471f22ef01cSRoman Divacky
1472f22ef01cSRoman Divacky OS << ", OperandValToReplace=";
147391bc56edSDimitry Andric OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1474f22ef01cSRoman Divacky
1475ff0cc061SDimitry Andric for (const Loop *PIL : PostIncLoops) {
1476f22ef01cSRoman Divacky OS << ", PostIncLoop=";
1477ff0cc061SDimitry Andric PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1478f22ef01cSRoman Divacky }
1479f22ef01cSRoman Divacky
1480f22ef01cSRoman Divacky if (Offset != 0)
1481f22ef01cSRoman Divacky OS << ", Offset=" << Offset;
1482f22ef01cSRoman Divacky }
1483f22ef01cSRoman Divacky
dump() const14847a7e6055SDimitry Andric LLVM_DUMP_METHOD void LSRFixup::dump() const {
1485f22ef01cSRoman Divacky print(errs()); errs() << '\n';
1486f22ef01cSRoman Divacky }
14877a7e6055SDimitry Andric #endif
1488f22ef01cSRoman Divacky
14897d523365SDimitry Andric /// Test whether this use as a formula which has the same registers as the given
14907d523365SDimitry Andric /// formula.
HasFormulaWithSameRegs(const Formula & F) const1491f22ef01cSRoman Divacky bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1492139f7f9bSDimitry Andric SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1493f22ef01cSRoman Divacky if (F.ScaledReg) Key.push_back(F.ScaledReg);
1494f22ef01cSRoman Divacky // Unstable sort by host order ok, because this is only used for uniquifying.
1495*b5893f02SDimitry Andric llvm::sort(Key);
1496f22ef01cSRoman Divacky return Uniquifier.count(Key);
1497f22ef01cSRoman Divacky }
1498f22ef01cSRoman Divacky
14997a7e6055SDimitry Andric /// The function returns a probability of selecting formula without Reg.
getNotSelectedProbability(const SCEV * Reg) const15007a7e6055SDimitry Andric float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
15017a7e6055SDimitry Andric unsigned FNum = 0;
15027a7e6055SDimitry Andric for (const Formula &F : Formulae)
15037a7e6055SDimitry Andric if (F.referencesReg(Reg))
15047a7e6055SDimitry Andric FNum++;
15057a7e6055SDimitry Andric return ((float)(Formulae.size() - FNum)) / Formulae.size();
15067a7e6055SDimitry Andric }
15077a7e6055SDimitry Andric
15087d523365SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
15097d523365SDimitry Andric /// return true. Return false otherwise. The formula must be in canonical form.
InsertFormula(const Formula & F,const Loop & L)15107a7e6055SDimitry Andric bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
15117a7e6055SDimitry Andric assert(F.isCanonical(L) && "Invalid canonical representation");
151291bc56edSDimitry Andric
1513f785676fSDimitry Andric if (!Formulae.empty() && RigidFormula)
1514f785676fSDimitry Andric return false;
1515f785676fSDimitry Andric
1516139f7f9bSDimitry Andric SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1517f22ef01cSRoman Divacky if (F.ScaledReg) Key.push_back(F.ScaledReg);
1518f22ef01cSRoman Divacky // Unstable sort by host order ok, because this is only used for uniquifying.
1519*b5893f02SDimitry Andric llvm::sort(Key);
1520f22ef01cSRoman Divacky
1521f22ef01cSRoman Divacky if (!Uniquifier.insert(Key).second)
1522f22ef01cSRoman Divacky return false;
1523f22ef01cSRoman Divacky
1524f22ef01cSRoman Divacky // Using a register to hold the value of 0 is not profitable.
1525f22ef01cSRoman Divacky assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1526f22ef01cSRoman Divacky "Zero allocated in a scaled register!");
1527f22ef01cSRoman Divacky #ifndef NDEBUG
1528ff0cc061SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs)
1529ff0cc061SDimitry Andric assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1530f22ef01cSRoman Divacky #endif
1531f22ef01cSRoman Divacky
1532f22ef01cSRoman Divacky // Add the formula to the list.
1533f22ef01cSRoman Divacky Formulae.push_back(F);
1534f22ef01cSRoman Divacky
1535f22ef01cSRoman Divacky // Record registers now being used by this use.
1536f22ef01cSRoman Divacky Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
153791bc56edSDimitry Andric if (F.ScaledReg)
153891bc56edSDimitry Andric Regs.insert(F.ScaledReg);
1539f22ef01cSRoman Divacky
1540f22ef01cSRoman Divacky return true;
1541f22ef01cSRoman Divacky }
1542f22ef01cSRoman Divacky
15437d523365SDimitry Andric /// Remove the given formula from this use's list.
DeleteFormula(Formula & F)1544f22ef01cSRoman Divacky void LSRUse::DeleteFormula(Formula &F) {
1545f22ef01cSRoman Divacky if (&F != &Formulae.back())
1546f22ef01cSRoman Divacky std::swap(F, Formulae.back());
1547f22ef01cSRoman Divacky Formulae.pop_back();
1548f22ef01cSRoman Divacky }
1549f22ef01cSRoman Divacky
15507d523365SDimitry Andric /// Recompute the Regs field, and update RegUses.
RecomputeRegs(size_t LUIdx,RegUseTracker & RegUses)1551f22ef01cSRoman Divacky void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1552f22ef01cSRoman Divacky // Now that we've filtered out some formulae, recompute the Regs set.
1553ff0cc061SDimitry Andric SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1554f22ef01cSRoman Divacky Regs.clear();
1555ff0cc061SDimitry Andric for (const Formula &F : Formulae) {
1556f22ef01cSRoman Divacky if (F.ScaledReg) Regs.insert(F.ScaledReg);
1557f22ef01cSRoman Divacky Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1558f22ef01cSRoman Divacky }
1559f22ef01cSRoman Divacky
1560f22ef01cSRoman Divacky // Update the RegTracker.
156139d628a0SDimitry Andric for (const SCEV *S : OldRegs)
156239d628a0SDimitry Andric if (!Regs.count(S))
15637d523365SDimitry Andric RegUses.dropRegister(S, LUIdx);
1564f22ef01cSRoman Divacky }
1565f22ef01cSRoman Divacky
15662cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const1567f22ef01cSRoman Divacky void LSRUse::print(raw_ostream &OS) const {
1568f22ef01cSRoman Divacky OS << "LSR Use: Kind=";
1569f22ef01cSRoman Divacky switch (Kind) {
1570f22ef01cSRoman Divacky case Basic: OS << "Basic"; break;
1571f22ef01cSRoman Divacky case Special: OS << "Special"; break;
1572f22ef01cSRoman Divacky case ICmpZero: OS << "ICmpZero"; break;
1573f22ef01cSRoman Divacky case Address:
1574f22ef01cSRoman Divacky OS << "Address of ";
15757d523365SDimitry Andric if (AccessTy.MemTy->isPointerTy())
1576f22ef01cSRoman Divacky OS << "pointer"; // the full pointer type could be really verbose
15777d523365SDimitry Andric else {
15787d523365SDimitry Andric OS << *AccessTy.MemTy;
15797d523365SDimitry Andric }
15807d523365SDimitry Andric
15817d523365SDimitry Andric OS << " in addrspace(" << AccessTy.AddrSpace << ')';
1582f22ef01cSRoman Divacky }
1583f22ef01cSRoman Divacky
1584f22ef01cSRoman Divacky OS << ", Offsets={";
1585ff0cc061SDimitry Andric bool NeedComma = false;
1586d88c1a5aSDimitry Andric for (const LSRFixup &Fixup : Fixups) {
1587ff0cc061SDimitry Andric if (NeedComma) OS << ',';
1588d88c1a5aSDimitry Andric OS << Fixup.Offset;
1589ff0cc061SDimitry Andric NeedComma = true;
1590f22ef01cSRoman Divacky }
1591f22ef01cSRoman Divacky OS << '}';
1592f22ef01cSRoman Divacky
1593f22ef01cSRoman Divacky if (AllFixupsOutsideLoop)
1594f22ef01cSRoman Divacky OS << ", all-fixups-outside-loop";
1595e580952dSDimitry Andric
1596e580952dSDimitry Andric if (WidestFixupType)
1597e580952dSDimitry Andric OS << ", widest fixup type: " << *WidestFixupType;
1598f22ef01cSRoman Divacky }
1599f22ef01cSRoman Divacky
dump() const16007a7e6055SDimitry Andric LLVM_DUMP_METHOD void LSRUse::dump() const {
1601f22ef01cSRoman Divacky print(errs()); errs() << '\n';
1602f22ef01cSRoman Divacky }
16037a7e6055SDimitry Andric #endif
1604f22ef01cSRoman Divacky
isAMCompletelyFolded(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,Instruction * Fixup)160591bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
16067d523365SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
160791bc56edSDimitry Andric GlobalValue *BaseGV, int64_t BaseOffset,
16082cab237bSDimitry Andric bool HasBaseReg, int64_t Scale,
16092cab237bSDimitry Andric Instruction *Fixup/*= nullptr*/) {
1610f22ef01cSRoman Divacky switch (Kind) {
1611f22ef01cSRoman Divacky case LSRUse::Address:
16127d523365SDimitry Andric return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
16132cab237bSDimitry Andric HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
1614f22ef01cSRoman Divacky
1615f22ef01cSRoman Divacky case LSRUse::ICmpZero:
1616f22ef01cSRoman Divacky // There's not even a target hook for querying whether it would be legal to
1617f22ef01cSRoman Divacky // fold a GV into an ICmp.
1618139f7f9bSDimitry Andric if (BaseGV)
1619f22ef01cSRoman Divacky return false;
1620f22ef01cSRoman Divacky
1621f22ef01cSRoman Divacky // ICmp only has two operands; don't allow more than two non-trivial parts.
1622139f7f9bSDimitry Andric if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1623f22ef01cSRoman Divacky return false;
1624f22ef01cSRoman Divacky
1625f22ef01cSRoman Divacky // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1626f22ef01cSRoman Divacky // putting the scaled register in the other operand of the icmp.
1627139f7f9bSDimitry Andric if (Scale != 0 && Scale != -1)
1628f22ef01cSRoman Divacky return false;
1629f22ef01cSRoman Divacky
1630f22ef01cSRoman Divacky // If we have low-level target information, ask the target if it can fold an
1631f22ef01cSRoman Divacky // integer immediate on an icmp.
1632139f7f9bSDimitry Andric if (BaseOffset != 0) {
1633dff0c46cSDimitry Andric // We have one of:
1634139f7f9bSDimitry Andric // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1635139f7f9bSDimitry Andric // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1636dff0c46cSDimitry Andric // Offs is the ICmp immediate.
1637139f7f9bSDimitry Andric if (Scale == 0)
16382cab237bSDimitry Andric // The cast does the right thing with
16392cab237bSDimitry Andric // std::numeric_limits<int64_t>::min().
1640139f7f9bSDimitry Andric BaseOffset = -(uint64_t)BaseOffset;
1641139f7f9bSDimitry Andric return TTI.isLegalICmpImmediate(BaseOffset);
1642f22ef01cSRoman Divacky }
1643f22ef01cSRoman Divacky
1644dff0c46cSDimitry Andric // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1645f22ef01cSRoman Divacky return true;
1646f22ef01cSRoman Divacky
1647f22ef01cSRoman Divacky case LSRUse::Basic:
1648f22ef01cSRoman Divacky // Only handle single-register values.
1649139f7f9bSDimitry Andric return !BaseGV && Scale == 0 && BaseOffset == 0;
1650f22ef01cSRoman Divacky
1651f22ef01cSRoman Divacky case LSRUse::Special:
16527ae0e2c9SDimitry Andric // Special case Basic to handle -1 scales.
1653139f7f9bSDimitry Andric return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1654f22ef01cSRoman Divacky }
1655f22ef01cSRoman Divacky
1656dff0c46cSDimitry Andric llvm_unreachable("Invalid LSRUse Kind!");
1657f22ef01cSRoman Divacky }
1658f22ef01cSRoman Divacky
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)165991bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
166091bc56edSDimitry Andric int64_t MinOffset, int64_t MaxOffset,
16617d523365SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
166291bc56edSDimitry Andric GlobalValue *BaseGV, int64_t BaseOffset,
166391bc56edSDimitry Andric bool HasBaseReg, int64_t Scale) {
1664f22ef01cSRoman Divacky // Check for overflow.
1665139f7f9bSDimitry Andric if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1666f22ef01cSRoman Divacky (MinOffset > 0))
1667f22ef01cSRoman Divacky return false;
1668139f7f9bSDimitry Andric MinOffset = (uint64_t)BaseOffset + MinOffset;
1669139f7f9bSDimitry Andric if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1670f22ef01cSRoman Divacky (MaxOffset > 0))
1671f22ef01cSRoman Divacky return false;
1672139f7f9bSDimitry Andric MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1673139f7f9bSDimitry Andric
167491bc56edSDimitry Andric return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
167591bc56edSDimitry Andric HasBaseReg, Scale) &&
167691bc56edSDimitry Andric isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
167791bc56edSDimitry Andric HasBaseReg, Scale);
167891bc56edSDimitry Andric }
167991bc56edSDimitry Andric
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F,const Loop & L)168091bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
168191bc56edSDimitry Andric int64_t MinOffset, int64_t MaxOffset,
16827d523365SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
16837a7e6055SDimitry Andric const Formula &F, const Loop &L) {
168491bc56edSDimitry Andric // For the purpose of isAMCompletelyFolded either having a canonical formula
168591bc56edSDimitry Andric // or a scale not equal to zero is correct.
168691bc56edSDimitry Andric // Problems may arise from non canonical formulae having a scale == 0.
168791bc56edSDimitry Andric // Strictly speaking it would best to just rely on canonical formulae.
168891bc56edSDimitry Andric // However, when we generate the scaled formulae, we first check that the
168991bc56edSDimitry Andric // scaling factor is profitable before computing the actual ScaledReg for
169091bc56edSDimitry Andric // compile time sake.
16917a7e6055SDimitry Andric assert((F.isCanonical(L) || F.Scale != 0));
169291bc56edSDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
169391bc56edSDimitry Andric F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
169491bc56edSDimitry Andric }
169591bc56edSDimitry Andric
16967d523365SDimitry Andric /// Test whether we know how to expand the current formula.
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)169791bc56edSDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
16987d523365SDimitry Andric int64_t MaxOffset, LSRUse::KindType Kind,
16997d523365SDimitry Andric MemAccessTy AccessTy, GlobalValue *BaseGV,
17007d523365SDimitry Andric int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
170191bc56edSDimitry Andric // We know how to expand completely foldable formulae.
170291bc56edSDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
170391bc56edSDimitry Andric BaseOffset, HasBaseReg, Scale) ||
170491bc56edSDimitry Andric // Or formulae that use a base register produced by a sum of base
170591bc56edSDimitry Andric // registers.
170691bc56edSDimitry Andric (Scale == 1 &&
170791bc56edSDimitry Andric isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
170891bc56edSDimitry Andric BaseGV, BaseOffset, true, 0));
1709f22ef01cSRoman Divacky }
1710f22ef01cSRoman Divacky
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F)1711139f7f9bSDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
17127d523365SDimitry Andric int64_t MaxOffset, LSRUse::KindType Kind,
17137d523365SDimitry Andric MemAccessTy AccessTy, const Formula &F) {
1714139f7f9bSDimitry Andric return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1715139f7f9bSDimitry Andric F.BaseOffset, F.HasBaseReg, F.Scale);
1716139f7f9bSDimitry Andric }
1717139f7f9bSDimitry Andric
isAMCompletelyFolded(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F)171891bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
171991bc56edSDimitry Andric const LSRUse &LU, const Formula &F) {
17202cab237bSDimitry Andric // Target may want to look at the user instructions.
17212cab237bSDimitry Andric if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
17222cab237bSDimitry Andric for (const LSRFixup &Fixup : LU.Fixups)
17232cab237bSDimitry Andric if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
17242cab237bSDimitry Andric (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
17252cab237bSDimitry Andric F.Scale, Fixup.UserInst))
17262cab237bSDimitry Andric return false;
17272cab237bSDimitry Andric return true;
17282cab237bSDimitry Andric }
17292cab237bSDimitry Andric
173091bc56edSDimitry Andric return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
173191bc56edSDimitry Andric LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
173291bc56edSDimitry Andric F.Scale);
1733f785676fSDimitry Andric }
1734f785676fSDimitry Andric
getScalingFactorCost(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F,const Loop & L)1735f785676fSDimitry Andric static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
17367a7e6055SDimitry Andric const LSRUse &LU, const Formula &F,
17377a7e6055SDimitry Andric const Loop &L) {
1738f785676fSDimitry Andric if (!F.Scale)
1739f785676fSDimitry Andric return 0;
174091bc56edSDimitry Andric
174191bc56edSDimitry Andric // If the use is not completely folded in that instruction, we will have to
174291bc56edSDimitry Andric // pay an extra cost only for scale != 1.
174391bc56edSDimitry Andric if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
17447a7e6055SDimitry Andric LU.AccessTy, F, L))
174591bc56edSDimitry Andric return F.Scale != 1;
1746f785676fSDimitry Andric
1747f785676fSDimitry Andric switch (LU.Kind) {
1748f785676fSDimitry Andric case LSRUse::Address: {
1749f785676fSDimitry Andric // Check the scaling factor cost with both the min and max offsets.
17507d523365SDimitry Andric int ScaleCostMinOffset = TTI.getScalingFactorCost(
17517d523365SDimitry Andric LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
17527d523365SDimitry Andric F.Scale, LU.AccessTy.AddrSpace);
17537d523365SDimitry Andric int ScaleCostMaxOffset = TTI.getScalingFactorCost(
17547d523365SDimitry Andric LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
17557d523365SDimitry Andric F.Scale, LU.AccessTy.AddrSpace);
1756f785676fSDimitry Andric
1757f785676fSDimitry Andric assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1758f785676fSDimitry Andric "Legal addressing mode has an illegal cost!");
1759f785676fSDimitry Andric return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1760f785676fSDimitry Andric }
1761f785676fSDimitry Andric case LSRUse::ICmpZero:
1762f785676fSDimitry Andric case LSRUse::Basic:
1763f785676fSDimitry Andric case LSRUse::Special:
176491bc56edSDimitry Andric // The use is completely folded, i.e., everything is folded into the
176591bc56edSDimitry Andric // instruction.
1766f785676fSDimitry Andric return 0;
1767f785676fSDimitry Andric }
1768f785676fSDimitry Andric
1769f785676fSDimitry Andric llvm_unreachable("Invalid LSRUse Kind!");
1770f785676fSDimitry Andric }
1771f785676fSDimitry Andric
isAlwaysFoldable(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg)1772139f7f9bSDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
17737d523365SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
1774139f7f9bSDimitry Andric GlobalValue *BaseGV, int64_t BaseOffset,
1775139f7f9bSDimitry Andric bool HasBaseReg) {
1776f22ef01cSRoman Divacky // Fast-path: zero is always foldable.
1777139f7f9bSDimitry Andric if (BaseOffset == 0 && !BaseGV) return true;
1778f22ef01cSRoman Divacky
1779f22ef01cSRoman Divacky // Conservatively, create an address with an immediate and a
1780f22ef01cSRoman Divacky // base and a scale.
1781139f7f9bSDimitry Andric int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1782f22ef01cSRoman Divacky
1783f22ef01cSRoman Divacky // Canonicalize a scale of 1 to a base register if the formula doesn't
1784f22ef01cSRoman Divacky // already have a base register.
1785139f7f9bSDimitry Andric if (!HasBaseReg && Scale == 1) {
1786139f7f9bSDimitry Andric Scale = 0;
1787139f7f9bSDimitry Andric HasBaseReg = true;
1788f22ef01cSRoman Divacky }
1789f22ef01cSRoman Divacky
179091bc56edSDimitry Andric return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
179191bc56edSDimitry Andric HasBaseReg, Scale);
1792f22ef01cSRoman Divacky }
1793f22ef01cSRoman Divacky
isAlwaysFoldable(const TargetTransformInfo & TTI,ScalarEvolution & SE,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const SCEV * S,bool HasBaseReg)1794139f7f9bSDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1795139f7f9bSDimitry Andric ScalarEvolution &SE, int64_t MinOffset,
1796139f7f9bSDimitry Andric int64_t MaxOffset, LSRUse::KindType Kind,
17977d523365SDimitry Andric MemAccessTy AccessTy, const SCEV *S,
17987d523365SDimitry Andric bool HasBaseReg) {
1799f22ef01cSRoman Divacky // Fast-path: zero is always foldable.
1800f22ef01cSRoman Divacky if (S->isZero()) return true;
1801f22ef01cSRoman Divacky
1802f22ef01cSRoman Divacky // Conservatively, create an address with an immediate and a
1803f22ef01cSRoman Divacky // base and a scale.
1804139f7f9bSDimitry Andric int64_t BaseOffset = ExtractImmediate(S, SE);
1805f22ef01cSRoman Divacky GlobalValue *BaseGV = ExtractSymbol(S, SE);
1806f22ef01cSRoman Divacky
1807f22ef01cSRoman Divacky // If there's anything else involved, it's not foldable.
1808f22ef01cSRoman Divacky if (!S->isZero()) return false;
1809f22ef01cSRoman Divacky
1810f22ef01cSRoman Divacky // Fast-path: zero is always foldable.
1811139f7f9bSDimitry Andric if (BaseOffset == 0 && !BaseGV) return true;
1812f22ef01cSRoman Divacky
1813f22ef01cSRoman Divacky // Conservatively, create an address with an immediate and a
1814f22ef01cSRoman Divacky // base and a scale.
1815139f7f9bSDimitry Andric int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1816f22ef01cSRoman Divacky
181791bc56edSDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1818139f7f9bSDimitry Andric BaseOffset, HasBaseReg, Scale);
1819f22ef01cSRoman Divacky }
1820f22ef01cSRoman Divacky
1821ffd1746dSEd Schouten namespace {
1822ffd1746dSEd Schouten
18237d523365SDimitry Andric /// An individual increment in a Chain of IV increments. Relate an IV user to
18247d523365SDimitry Andric /// an expression that computes the IV it uses from the IV used by the previous
18257d523365SDimitry Andric /// link in the Chain.
1826dff0c46cSDimitry Andric ///
1827dff0c46cSDimitry Andric /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1828dff0c46cSDimitry Andric /// original IVOperand. The head of the chain's IVOperand is only valid during
1829dff0c46cSDimitry Andric /// chain collection, before LSR replaces IV users. During chain generation,
1830dff0c46cSDimitry Andric /// IncExpr can be used to find the new IVOperand that computes the same
1831dff0c46cSDimitry Andric /// expression.
1832dff0c46cSDimitry Andric struct IVInc {
1833dff0c46cSDimitry Andric Instruction *UserInst;
1834dff0c46cSDimitry Andric Value* IVOperand;
1835dff0c46cSDimitry Andric const SCEV *IncExpr;
1836dff0c46cSDimitry Andric
IVInc__anon244971d90811::IVInc18372cab237bSDimitry Andric IVInc(Instruction *U, Value *O, const SCEV *E)
18382cab237bSDimitry Andric : UserInst(U), IVOperand(O), IncExpr(E) {}
1839dff0c46cSDimitry Andric };
1840dff0c46cSDimitry Andric
18417d523365SDimitry Andric // The list of IV increments in program order. We typically add the head of a
18427d523365SDimitry Andric // chain without finding subsequent links.
18437ae0e2c9SDimitry Andric struct IVChain {
18447ae0e2c9SDimitry Andric SmallVector<IVInc, 1> Incs;
18452cab237bSDimitry Andric const SCEV *ExprBase = nullptr;
18467ae0e2c9SDimitry Andric
18472cab237bSDimitry Andric IVChain() = default;
IVChain__anon244971d90811::IVChain18487ae0e2c9SDimitry Andric IVChain(const IVInc &Head, const SCEV *Base)
18497ae0e2c9SDimitry Andric : Incs(1, Head), ExprBase(Base) {}
18507ae0e2c9SDimitry Andric
18512cab237bSDimitry Andric using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
18527ae0e2c9SDimitry Andric
18537d523365SDimitry Andric // Return the first increment in the chain.
begin__anon244971d90811::IVChain18547ae0e2c9SDimitry Andric const_iterator begin() const {
18557ae0e2c9SDimitry Andric assert(!Incs.empty());
185691bc56edSDimitry Andric return std::next(Incs.begin());
18577ae0e2c9SDimitry Andric }
end__anon244971d90811::IVChain18587ae0e2c9SDimitry Andric const_iterator end() const {
18597ae0e2c9SDimitry Andric return Incs.end();
18607ae0e2c9SDimitry Andric }
18617ae0e2c9SDimitry Andric
18627d523365SDimitry Andric // Returns true if this chain contains any increments.
hasIncs__anon244971d90811::IVChain18637ae0e2c9SDimitry Andric bool hasIncs() const { return Incs.size() >= 2; }
18647ae0e2c9SDimitry Andric
18657d523365SDimitry Andric // Add an IVInc to the end of this chain.
add__anon244971d90811::IVChain18667ae0e2c9SDimitry Andric void add(const IVInc &X) { Incs.push_back(X); }
18677ae0e2c9SDimitry Andric
18687d523365SDimitry Andric // Returns the last UserInst in the chain.
tailUserInst__anon244971d90811::IVChain18697ae0e2c9SDimitry Andric Instruction *tailUserInst() const { return Incs.back().UserInst; }
18707ae0e2c9SDimitry Andric
18717d523365SDimitry Andric // Returns true if IncExpr can be profitably added to this chain.
18727ae0e2c9SDimitry Andric bool isProfitableIncrement(const SCEV *OperExpr,
18737ae0e2c9SDimitry Andric const SCEV *IncExpr,
18747ae0e2c9SDimitry Andric ScalarEvolution&);
18757ae0e2c9SDimitry Andric };
1876dff0c46cSDimitry Andric
18777d523365SDimitry Andric /// Helper for CollectChains to track multiple IV increment uses. Distinguish
18787d523365SDimitry Andric /// between FarUsers that definitely cross IV increments and NearUsers that may
18797d523365SDimitry Andric /// be used between IV increments.
1880dff0c46cSDimitry Andric struct ChainUsers {
1881dff0c46cSDimitry Andric SmallPtrSet<Instruction*, 4> FarUsers;
1882dff0c46cSDimitry Andric SmallPtrSet<Instruction*, 4> NearUsers;
1883dff0c46cSDimitry Andric };
1884dff0c46cSDimitry Andric
18857d523365SDimitry Andric /// This class holds state for the main loop strength reduction logic.
1886f22ef01cSRoman Divacky class LSRInstance {
1887f22ef01cSRoman Divacky IVUsers &IU;
1888f22ef01cSRoman Divacky ScalarEvolution &SE;
1889f22ef01cSRoman Divacky DominatorTree &DT;
1890f22ef01cSRoman Divacky LoopInfo &LI;
1891139f7f9bSDimitry Andric const TargetTransformInfo &TTI;
1892f22ef01cSRoman Divacky Loop *const L;
18932cab237bSDimitry Andric bool Changed = false;
1894f22ef01cSRoman Divacky
18957d523365SDimitry Andric /// This is the insert position that the current loop's induction variable
18967d523365SDimitry Andric /// increment should be placed. In simple loops, this is the latch block's
18977d523365SDimitry Andric /// terminator. But in more complicated cases, this is a position which will
18987d523365SDimitry Andric /// dominate all the in-loop post-increment users.
18992cab237bSDimitry Andric Instruction *IVIncInsertPos = nullptr;
1900f22ef01cSRoman Divacky
19017d523365SDimitry Andric /// Interesting factors between use strides.
1902d88c1a5aSDimitry Andric ///
1903d88c1a5aSDimitry Andric /// We explicitly use a SetVector which contains a SmallSet, instead of the
1904d88c1a5aSDimitry Andric /// default, a SmallDenseSet, because we need to use the full range of
1905d88c1a5aSDimitry Andric /// int64_ts, and there's currently no good way of doing that with
1906d88c1a5aSDimitry Andric /// SmallDenseSet.
1907d88c1a5aSDimitry Andric SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
1908f22ef01cSRoman Divacky
19097d523365SDimitry Andric /// Interesting use types, to facilitate truncation reuse.
19106122f3e6SDimitry Andric SmallSetVector<Type *, 4> Types;
1911f22ef01cSRoman Divacky
19127d523365SDimitry Andric /// The list of interesting uses.
1913f22ef01cSRoman Divacky SmallVector<LSRUse, 16> Uses;
1914f22ef01cSRoman Divacky
19157d523365SDimitry Andric /// Track which uses use which register candidates.
1916f22ef01cSRoman Divacky RegUseTracker RegUses;
1917f22ef01cSRoman Divacky
1918dff0c46cSDimitry Andric // Limit the number of chains to avoid quadratic behavior. We don't expect to
1919dff0c46cSDimitry Andric // have more than a few IV increment chains in a loop. Missing a Chain falls
1920dff0c46cSDimitry Andric // back to normal LSR behavior for those uses.
1921dff0c46cSDimitry Andric static const unsigned MaxChains = 8;
1922dff0c46cSDimitry Andric
19237d523365SDimitry Andric /// IV users can form a chain of IV increments.
1924dff0c46cSDimitry Andric SmallVector<IVChain, MaxChains> IVChainVec;
1925dff0c46cSDimitry Andric
19267d523365SDimitry Andric /// IV users that belong to profitable IVChains.
1927dff0c46cSDimitry Andric SmallPtrSet<Use*, MaxChains> IVIncSet;
1928dff0c46cSDimitry Andric
1929f22ef01cSRoman Divacky void OptimizeShadowIV();
1930f22ef01cSRoman Divacky bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1931f22ef01cSRoman Divacky ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1932f22ef01cSRoman Divacky void OptimizeLoopTermCond();
1933f22ef01cSRoman Divacky
1934dff0c46cSDimitry Andric void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1935dff0c46cSDimitry Andric SmallVectorImpl<ChainUsers> &ChainUsersVec);
1936dff0c46cSDimitry Andric void FinalizeChain(IVChain &Chain);
1937dff0c46cSDimitry Andric void CollectChains();
1938dff0c46cSDimitry Andric void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1939f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts);
1940dff0c46cSDimitry Andric
1941f22ef01cSRoman Divacky void CollectInterestingTypesAndFactors();
1942f22ef01cSRoman Divacky void CollectFixupsAndInitialFormulae();
1943f22ef01cSRoman Divacky
1944f22ef01cSRoman Divacky // Support for sharing of LSRUses between LSRFixups.
19452cab237bSDimitry Andric using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
1946f22ef01cSRoman Divacky UseMapTy UseMap;
1947f22ef01cSRoman Divacky
1948f22ef01cSRoman Divacky bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
19497d523365SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy);
1950f22ef01cSRoman Divacky
19517d523365SDimitry Andric std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
19527d523365SDimitry Andric MemAccessTy AccessTy);
1953f22ef01cSRoman Divacky
19542754fe60SDimitry Andric void DeleteUse(LSRUse &LU, size_t LUIdx);
1955f22ef01cSRoman Divacky
1956f22ef01cSRoman Divacky LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1957f22ef01cSRoman Divacky
1958f22ef01cSRoman Divacky void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1959f22ef01cSRoman Divacky void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1960f22ef01cSRoman Divacky void CountRegisters(const Formula &F, size_t LUIdx);
1961f22ef01cSRoman Divacky bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1962f22ef01cSRoman Divacky
1963f22ef01cSRoman Divacky void CollectLoopInvariantFixupsAndFormulae();
1964f22ef01cSRoman Divacky
1965f22ef01cSRoman Divacky void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1966f22ef01cSRoman Divacky unsigned Depth = 0);
196791bc56edSDimitry Andric
196891bc56edSDimitry Andric void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
196991bc56edSDimitry Andric const Formula &Base, unsigned Depth,
197091bc56edSDimitry Andric size_t Idx, bool IsScaledReg = false);
1971f22ef01cSRoman Divacky void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
197291bc56edSDimitry Andric void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
197391bc56edSDimitry Andric const Formula &Base, size_t Idx,
197491bc56edSDimitry Andric bool IsScaledReg = false);
1975f22ef01cSRoman Divacky void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
197691bc56edSDimitry Andric void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
197791bc56edSDimitry Andric const Formula &Base,
197891bc56edSDimitry Andric const SmallVectorImpl<int64_t> &Worklist,
197991bc56edSDimitry Andric size_t Idx, bool IsScaledReg = false);
1980f22ef01cSRoman Divacky void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1981f22ef01cSRoman Divacky void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1982f22ef01cSRoman Divacky void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1983f22ef01cSRoman Divacky void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1984f22ef01cSRoman Divacky void GenerateCrossUseConstantOffsets();
1985f22ef01cSRoman Divacky void GenerateAllReuseFormulae();
1986f22ef01cSRoman Divacky
1987f22ef01cSRoman Divacky void FilterOutUndesirableDedicatedRegisters();
1988f22ef01cSRoman Divacky
1989f22ef01cSRoman Divacky size_t EstimateSearchSpaceComplexity() const;
1990e580952dSDimitry Andric void NarrowSearchSpaceByDetectingSupersets();
1991e580952dSDimitry Andric void NarrowSearchSpaceByCollapsingUnrolledCode();
1992e580952dSDimitry Andric void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1993c4394386SDimitry Andric void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
19947a7e6055SDimitry Andric void NarrowSearchSpaceByDeletingCostlyFormulas();
1995e580952dSDimitry Andric void NarrowSearchSpaceByPickingWinnerRegs();
1996f22ef01cSRoman Divacky void NarrowSearchSpaceUsingHeuristics();
1997f22ef01cSRoman Divacky
1998f22ef01cSRoman Divacky void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1999f22ef01cSRoman Divacky Cost &SolutionCost,
2000f22ef01cSRoman Divacky SmallVectorImpl<const Formula *> &Workspace,
2001f22ef01cSRoman Divacky const Cost &CurCost,
2002f22ef01cSRoman Divacky const SmallPtrSet<const SCEV *, 16> &CurRegs,
2003f22ef01cSRoman Divacky DenseSet<const SCEV *> &VisitedRegs) const;
2004f22ef01cSRoman Divacky void Solve(SmallVectorImpl<const Formula *> &Solution) const;
2005f22ef01cSRoman Divacky
2006f22ef01cSRoman Divacky BasicBlock::iterator
2007f22ef01cSRoman Divacky HoistInsertPosition(BasicBlock::iterator IP,
2008f22ef01cSRoman Divacky const SmallVectorImpl<Instruction *> &Inputs) const;
2009dff0c46cSDimitry Andric BasicBlock::iterator
2010dff0c46cSDimitry Andric AdjustInsertPositionForExpand(BasicBlock::iterator IP,
2011f22ef01cSRoman Divacky const LSRFixup &LF,
2012dff0c46cSDimitry Andric const LSRUse &LU,
2013dff0c46cSDimitry Andric SCEVExpander &Rewriter) const;
2014f22ef01cSRoman Divacky
2015f37b6182SDimitry Andric Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2016f37b6182SDimitry Andric BasicBlock::iterator IP, SCEVExpander &Rewriter,
2017f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2018d88c1a5aSDimitry Andric void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
2019f37b6182SDimitry Andric const Formula &F, SCEVExpander &Rewriter,
2020f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2021f37b6182SDimitry Andric void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2022f22ef01cSRoman Divacky SCEVExpander &Rewriter,
2023f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
20247d523365SDimitry Andric void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
2025f22ef01cSRoman Divacky
2026dff0c46cSDimitry Andric public:
20277d523365SDimitry Andric LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
20287d523365SDimitry Andric LoopInfo &LI, const TargetTransformInfo &TTI);
2029f22ef01cSRoman Divacky
getChanged() const2030f22ef01cSRoman Divacky bool getChanged() const { return Changed; }
2031f22ef01cSRoman Divacky
2032f22ef01cSRoman Divacky void print_factors_and_types(raw_ostream &OS) const;
2033f22ef01cSRoman Divacky void print_fixups(raw_ostream &OS) const;
2034f22ef01cSRoman Divacky void print_uses(raw_ostream &OS) const;
2035f22ef01cSRoman Divacky void print(raw_ostream &OS) const;
2036f22ef01cSRoman Divacky void dump() const;
2037f22ef01cSRoman Divacky };
2038f22ef01cSRoman Divacky
2039d88c1a5aSDimitry Andric } // end anonymous namespace
2040f22ef01cSRoman Divacky
20417d523365SDimitry Andric /// If IV is used in a int-to-float cast inside the loop then try to eliminate
20427d523365SDimitry Andric /// the cast operation.
OptimizeShadowIV()2043f22ef01cSRoman Divacky void LSRInstance::OptimizeShadowIV() {
2044f22ef01cSRoman Divacky const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2045f22ef01cSRoman Divacky if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2046f22ef01cSRoman Divacky return;
2047f22ef01cSRoman Divacky
2048f22ef01cSRoman Divacky for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
2049f22ef01cSRoman Divacky UI != E; /* empty */) {
2050f22ef01cSRoman Divacky IVUsers::const_iterator CandidateUI = UI;
2051f22ef01cSRoman Divacky ++UI;
2052f22ef01cSRoman Divacky Instruction *ShadowUse = CandidateUI->getUser();
205391bc56edSDimitry Andric Type *DestTy = nullptr;
20546122f3e6SDimitry Andric bool IsSigned = false;
2055f22ef01cSRoman Divacky
2056f22ef01cSRoman Divacky /* If shadow use is a int->float cast then insert a second IV
2057f22ef01cSRoman Divacky to eliminate this cast.
2058f22ef01cSRoman Divacky
2059f22ef01cSRoman Divacky for (unsigned i = 0; i < n; ++i)
2060f22ef01cSRoman Divacky foo((double)i);
2061f22ef01cSRoman Divacky
2062f22ef01cSRoman Divacky is transformed into
2063f22ef01cSRoman Divacky
2064f22ef01cSRoman Divacky double d = 0.0;
2065f22ef01cSRoman Divacky for (unsigned i = 0; i < n; ++i, ++d)
2066f22ef01cSRoman Divacky foo(d);
2067f22ef01cSRoman Divacky */
20686122f3e6SDimitry Andric if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
20696122f3e6SDimitry Andric IsSigned = false;
2070f22ef01cSRoman Divacky DestTy = UCast->getDestTy();
20716122f3e6SDimitry Andric }
20726122f3e6SDimitry Andric else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
20736122f3e6SDimitry Andric IsSigned = true;
2074f22ef01cSRoman Divacky DestTy = SCast->getDestTy();
20756122f3e6SDimitry Andric }
2076f22ef01cSRoman Divacky if (!DestTy) continue;
2077f22ef01cSRoman Divacky
2078f22ef01cSRoman Divacky // If target does not support DestTy natively then do not apply
2079f22ef01cSRoman Divacky // this transformation.
2080139f7f9bSDimitry Andric if (!TTI.isTypeLegal(DestTy)) continue;
2081f22ef01cSRoman Divacky
2082f22ef01cSRoman Divacky PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
2083f22ef01cSRoman Divacky if (!PH) continue;
2084f22ef01cSRoman Divacky if (PH->getNumIncomingValues() != 2) continue;
2085f22ef01cSRoman Divacky
20862cab237bSDimitry Andric // If the calculation in integers overflows, the result in FP type will
20872cab237bSDimitry Andric // differ. So we only can do this transformation if we are guaranteed to not
20882cab237bSDimitry Andric // deal with overflowing values
20892cab237bSDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
20902cab237bSDimitry Andric if (!AR) continue;
20912cab237bSDimitry Andric if (IsSigned && !AR->hasNoSignedWrap()) continue;
20922cab237bSDimitry Andric if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
20932cab237bSDimitry Andric
20946122f3e6SDimitry Andric Type *SrcTy = PH->getType();
2095f22ef01cSRoman Divacky int Mantissa = DestTy->getFPMantissaWidth();
2096f22ef01cSRoman Divacky if (Mantissa == -1) continue;
2097f22ef01cSRoman Divacky if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
2098f22ef01cSRoman Divacky continue;
2099f22ef01cSRoman Divacky
2100f22ef01cSRoman Divacky unsigned Entry, Latch;
2101f22ef01cSRoman Divacky if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
2102f22ef01cSRoman Divacky Entry = 0;
2103f22ef01cSRoman Divacky Latch = 1;
2104f22ef01cSRoman Divacky } else {
2105f22ef01cSRoman Divacky Entry = 1;
2106f22ef01cSRoman Divacky Latch = 0;
2107f22ef01cSRoman Divacky }
2108f22ef01cSRoman Divacky
2109f22ef01cSRoman Divacky ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
2110f22ef01cSRoman Divacky if (!Init) continue;
21116122f3e6SDimitry Andric Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
21126122f3e6SDimitry Andric (double)Init->getSExtValue() :
21136122f3e6SDimitry Andric (double)Init->getZExtValue());
2114f22ef01cSRoman Divacky
2115f22ef01cSRoman Divacky BinaryOperator *Incr =
2116f22ef01cSRoman Divacky dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
2117f22ef01cSRoman Divacky if (!Incr) continue;
2118f22ef01cSRoman Divacky if (Incr->getOpcode() != Instruction::Add
2119f22ef01cSRoman Divacky && Incr->getOpcode() != Instruction::Sub)
2120f22ef01cSRoman Divacky continue;
2121f22ef01cSRoman Divacky
2122f22ef01cSRoman Divacky /* Initialize new IV, double d = 0.0 in above example. */
212391bc56edSDimitry Andric ConstantInt *C = nullptr;
2124f22ef01cSRoman Divacky if (Incr->getOperand(0) == PH)
2125f22ef01cSRoman Divacky C = dyn_cast<ConstantInt>(Incr->getOperand(1));
2126f22ef01cSRoman Divacky else if (Incr->getOperand(1) == PH)
2127f22ef01cSRoman Divacky C = dyn_cast<ConstantInt>(Incr->getOperand(0));
2128f22ef01cSRoman Divacky else
2129f22ef01cSRoman Divacky continue;
2130f22ef01cSRoman Divacky
2131f22ef01cSRoman Divacky if (!C) continue;
2132f22ef01cSRoman Divacky
2133f22ef01cSRoman Divacky // Ignore negative constants, as the code below doesn't handle them
2134f22ef01cSRoman Divacky // correctly. TODO: Remove this restriction.
2135f22ef01cSRoman Divacky if (!C->getValue().isStrictlyPositive()) continue;
2136f22ef01cSRoman Divacky
2137f22ef01cSRoman Divacky /* Add new PHINode. */
21383b0f4066SDimitry Andric PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
2139f22ef01cSRoman Divacky
2140f22ef01cSRoman Divacky /* create new increment. '++d' in above example. */
2141f22ef01cSRoman Divacky Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
2142f22ef01cSRoman Divacky BinaryOperator *NewIncr =
2143f22ef01cSRoman Divacky BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
2144f22ef01cSRoman Divacky Instruction::FAdd : Instruction::FSub,
2145f22ef01cSRoman Divacky NewPH, CFP, "IV.S.next.", Incr);
2146f22ef01cSRoman Divacky
2147f22ef01cSRoman Divacky NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
2148f22ef01cSRoman Divacky NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
2149f22ef01cSRoman Divacky
2150f22ef01cSRoman Divacky /* Remove cast operation */
2151f22ef01cSRoman Divacky ShadowUse->replaceAllUsesWith(NewPH);
2152f22ef01cSRoman Divacky ShadowUse->eraseFromParent();
2153f22ef01cSRoman Divacky Changed = true;
2154f22ef01cSRoman Divacky break;
2155f22ef01cSRoman Divacky }
2156f22ef01cSRoman Divacky }
2157f22ef01cSRoman Divacky
21587d523365SDimitry Andric /// If Cond has an operand that is an expression of an IV, set the IV user and
21597d523365SDimitry Andric /// stride information and return true, otherwise return false.
FindIVUserForCond(ICmpInst * Cond,IVStrideUse * & CondUse)2160f22ef01cSRoman Divacky bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
2161ff0cc061SDimitry Andric for (IVStrideUse &U : IU)
2162ff0cc061SDimitry Andric if (U.getUser() == Cond) {
2163f22ef01cSRoman Divacky // NOTE: we could handle setcc instructions with multiple uses here, but
2164f22ef01cSRoman Divacky // InstCombine does it as well for simple uses, it's not clear that it
2165f22ef01cSRoman Divacky // occurs enough in real life to handle.
2166ff0cc061SDimitry Andric CondUse = &U;
2167f22ef01cSRoman Divacky return true;
2168f22ef01cSRoman Divacky }
2169f22ef01cSRoman Divacky return false;
2170f22ef01cSRoman Divacky }
2171f22ef01cSRoman Divacky
21727d523365SDimitry Andric /// Rewrite the loop's terminating condition if it uses a max computation.
2173f22ef01cSRoman Divacky ///
2174f22ef01cSRoman Divacky /// This is a narrow solution to a specific, but acute, problem. For loops
2175f22ef01cSRoman Divacky /// like this:
2176f22ef01cSRoman Divacky ///
2177f22ef01cSRoman Divacky /// i = 0;
2178f22ef01cSRoman Divacky /// do {
2179f22ef01cSRoman Divacky /// p[i] = 0.0;
2180f22ef01cSRoman Divacky /// } while (++i < n);
2181f22ef01cSRoman Divacky ///
2182f22ef01cSRoman Divacky /// the trip count isn't just 'n', because 'n' might not be positive. And
2183f22ef01cSRoman Divacky /// unfortunately this can come up even for loops where the user didn't use
2184f22ef01cSRoman Divacky /// a C do-while loop. For example, seemingly well-behaved top-test loops
2185f22ef01cSRoman Divacky /// will commonly be lowered like this:
21862cab237bSDimitry Andric ///
2187f22ef01cSRoman Divacky /// if (n > 0) {
2188f22ef01cSRoman Divacky /// i = 0;
2189f22ef01cSRoman Divacky /// do {
2190f22ef01cSRoman Divacky /// p[i] = 0.0;
2191f22ef01cSRoman Divacky /// } while (++i < n);
2192f22ef01cSRoman Divacky /// }
2193f22ef01cSRoman Divacky ///
2194f22ef01cSRoman Divacky /// and then it's possible for subsequent optimization to obscure the if
2195f22ef01cSRoman Divacky /// test in such a way that indvars can't find it.
2196f22ef01cSRoman Divacky ///
2197f22ef01cSRoman Divacky /// When indvars can't find the if test in loops like this, it creates a
2198f22ef01cSRoman Divacky /// max expression, which allows it to give the loop a canonical
2199f22ef01cSRoman Divacky /// induction variable:
2200f22ef01cSRoman Divacky ///
2201f22ef01cSRoman Divacky /// i = 0;
2202f22ef01cSRoman Divacky /// max = n < 1 ? 1 : n;
2203f22ef01cSRoman Divacky /// do {
2204f22ef01cSRoman Divacky /// p[i] = 0.0;
2205f22ef01cSRoman Divacky /// } while (++i != max);
2206f22ef01cSRoman Divacky ///
2207f22ef01cSRoman Divacky /// Canonical induction variables are necessary because the loop passes
2208f22ef01cSRoman Divacky /// are designed around them. The most obvious example of this is the
2209f22ef01cSRoman Divacky /// LoopInfo analysis, which doesn't remember trip count values. It
2210f22ef01cSRoman Divacky /// expects to be able to rediscover the trip count each time it is
2211f22ef01cSRoman Divacky /// needed, and it does this using a simple analysis that only succeeds if
2212f22ef01cSRoman Divacky /// the loop has a canonical induction variable.
2213f22ef01cSRoman Divacky ///
2214f22ef01cSRoman Divacky /// However, when it comes time to generate code, the maximum operation
2215f22ef01cSRoman Divacky /// can be quite costly, especially if it's inside of an outer loop.
2216f22ef01cSRoman Divacky ///
2217f22ef01cSRoman Divacky /// This function solves this problem by detecting this type of loop and
2218f22ef01cSRoman Divacky /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2219f22ef01cSRoman Divacky /// the instructions for the maximum computation.
OptimizeMax(ICmpInst * Cond,IVStrideUse * & CondUse)2220f22ef01cSRoman Divacky ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
2221f22ef01cSRoman Divacky // Check that the loop matches the pattern we're looking for.
2222f22ef01cSRoman Divacky if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
2223f22ef01cSRoman Divacky Cond->getPredicate() != CmpInst::ICMP_NE)
2224f22ef01cSRoman Divacky return Cond;
2225f22ef01cSRoman Divacky
2226f22ef01cSRoman Divacky SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
2227f22ef01cSRoman Divacky if (!Sel || !Sel->hasOneUse()) return Cond;
2228f22ef01cSRoman Divacky
2229f22ef01cSRoman Divacky const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2230f22ef01cSRoman Divacky if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2231f22ef01cSRoman Divacky return Cond;
2232f22ef01cSRoman Divacky const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
2233f22ef01cSRoman Divacky
2234f22ef01cSRoman Divacky // Add one to the backedge-taken count to get the trip count.
2235e580952dSDimitry Andric const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
2236f22ef01cSRoman Divacky if (IterationCount != SE.getSCEV(Sel)) return Cond;
2237f22ef01cSRoman Divacky
2238f22ef01cSRoman Divacky // Check for a max calculation that matches the pattern. There's no check
2239f22ef01cSRoman Divacky // for ICMP_ULE here because the comparison would be with zero, which
2240f22ef01cSRoman Divacky // isn't interesting.
2241f22ef01cSRoman Divacky CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
224291bc56edSDimitry Andric const SCEVNAryExpr *Max = nullptr;
2243f22ef01cSRoman Divacky if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2244f22ef01cSRoman Divacky Pred = ICmpInst::ICMP_SLE;
2245f22ef01cSRoman Divacky Max = S;
2246f22ef01cSRoman Divacky } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2247f22ef01cSRoman Divacky Pred = ICmpInst::ICMP_SLT;
2248f22ef01cSRoman Divacky Max = S;
2249f22ef01cSRoman Divacky } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2250f22ef01cSRoman Divacky Pred = ICmpInst::ICMP_ULT;
2251f22ef01cSRoman Divacky Max = U;
2252f22ef01cSRoman Divacky } else {
2253f22ef01cSRoman Divacky // No match; bail.
2254f22ef01cSRoman Divacky return Cond;
2255f22ef01cSRoman Divacky }
2256f22ef01cSRoman Divacky
2257f22ef01cSRoman Divacky // To handle a max with more than two operands, this optimization would
2258f22ef01cSRoman Divacky // require additional checking and setup.
2259f22ef01cSRoman Divacky if (Max->getNumOperands() != 2)
2260f22ef01cSRoman Divacky return Cond;
2261f22ef01cSRoman Divacky
2262f22ef01cSRoman Divacky const SCEV *MaxLHS = Max->getOperand(0);
2263f22ef01cSRoman Divacky const SCEV *MaxRHS = Max->getOperand(1);
2264f22ef01cSRoman Divacky
2265f22ef01cSRoman Divacky // ScalarEvolution canonicalizes constants to the left. For < and >, look
2266f22ef01cSRoman Divacky // for a comparison with 1. For <= and >=, a comparison with zero.
2267f22ef01cSRoman Divacky if (!MaxLHS ||
2268f22ef01cSRoman Divacky (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2269f22ef01cSRoman Divacky return Cond;
2270f22ef01cSRoman Divacky
2271f22ef01cSRoman Divacky // Check the relevant induction variable for conformance to
2272f22ef01cSRoman Divacky // the pattern.
2273f22ef01cSRoman Divacky const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2274f22ef01cSRoman Divacky const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2275f22ef01cSRoman Divacky if (!AR || !AR->isAffine() ||
2276f22ef01cSRoman Divacky AR->getStart() != One ||
2277f22ef01cSRoman Divacky AR->getStepRecurrence(SE) != One)
2278f22ef01cSRoman Divacky return Cond;
2279f22ef01cSRoman Divacky
2280f22ef01cSRoman Divacky assert(AR->getLoop() == L &&
2281f22ef01cSRoman Divacky "Loop condition operand is an addrec in a different loop!");
2282f22ef01cSRoman Divacky
2283f22ef01cSRoman Divacky // Check the right operand of the select, and remember it, as it will
2284f22ef01cSRoman Divacky // be used in the new comparison instruction.
228591bc56edSDimitry Andric Value *NewRHS = nullptr;
2286f22ef01cSRoman Divacky if (ICmpInst::isTrueWhenEqual(Pred)) {
2287f22ef01cSRoman Divacky // Look for n+1, and grab n.
2288f22ef01cSRoman Divacky if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2289139f7f9bSDimitry Andric if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2290139f7f9bSDimitry Andric if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2291f22ef01cSRoman Divacky NewRHS = BO->getOperand(0);
2292f22ef01cSRoman Divacky if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2293139f7f9bSDimitry Andric if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2294139f7f9bSDimitry Andric if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2295f22ef01cSRoman Divacky NewRHS = BO->getOperand(0);
2296f22ef01cSRoman Divacky if (!NewRHS)
2297f22ef01cSRoman Divacky return Cond;
2298f22ef01cSRoman Divacky } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2299f22ef01cSRoman Divacky NewRHS = Sel->getOperand(1);
2300f22ef01cSRoman Divacky else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2301f22ef01cSRoman Divacky NewRHS = Sel->getOperand(2);
2302ffd1746dSEd Schouten else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2303ffd1746dSEd Schouten NewRHS = SU->getValue();
2304f22ef01cSRoman Divacky else
2305ffd1746dSEd Schouten // Max doesn't match expected pattern.
2306ffd1746dSEd Schouten return Cond;
2307f22ef01cSRoman Divacky
2308f22ef01cSRoman Divacky // Determine the new comparison opcode. It may be signed or unsigned,
2309f22ef01cSRoman Divacky // and the original comparison may be either equality or inequality.
2310f22ef01cSRoman Divacky if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2311f22ef01cSRoman Divacky Pred = CmpInst::getInversePredicate(Pred);
2312f22ef01cSRoman Divacky
2313f22ef01cSRoman Divacky // Ok, everything looks ok to change the condition into an SLT or SGE and
2314f22ef01cSRoman Divacky // delete the max calculation.
2315f22ef01cSRoman Divacky ICmpInst *NewCond =
2316f22ef01cSRoman Divacky new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2317f22ef01cSRoman Divacky
2318f22ef01cSRoman Divacky // Delete the max calculation instructions.
2319f22ef01cSRoman Divacky Cond->replaceAllUsesWith(NewCond);
2320f22ef01cSRoman Divacky CondUse->setUser(NewCond);
2321f22ef01cSRoman Divacky Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2322f22ef01cSRoman Divacky Cond->eraseFromParent();
2323f22ef01cSRoman Divacky Sel->eraseFromParent();
2324f22ef01cSRoman Divacky if (Cmp->use_empty())
2325f22ef01cSRoman Divacky Cmp->eraseFromParent();
2326f22ef01cSRoman Divacky return NewCond;
2327f22ef01cSRoman Divacky }
2328f22ef01cSRoman Divacky
23297d523365SDimitry Andric /// Change loop terminating condition to use the postinc iv when possible.
2330f22ef01cSRoman Divacky void
OptimizeLoopTermCond()2331f22ef01cSRoman Divacky LSRInstance::OptimizeLoopTermCond() {
2332f22ef01cSRoman Divacky SmallPtrSet<Instruction *, 4> PostIncs;
2333f22ef01cSRoman Divacky
2334d88c1a5aSDimitry Andric // We need a different set of heuristics for rotated and non-rotated loops.
2335d88c1a5aSDimitry Andric // If a loop is rotated then the latch is also the backedge, so inserting
2336d88c1a5aSDimitry Andric // post-inc expressions just before the latch is ideal. To reduce live ranges
2337d88c1a5aSDimitry Andric // it also makes sense to rewrite terminating conditions to use post-inc
2338d88c1a5aSDimitry Andric // expressions.
2339d88c1a5aSDimitry Andric //
2340d88c1a5aSDimitry Andric // If the loop is not rotated then the latch is not a backedge; the latch
2341d88c1a5aSDimitry Andric // check is done in the loop head. Adding post-inc expressions before the
2342d88c1a5aSDimitry Andric // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2343d88c1a5aSDimitry Andric // in the loop body. In this case we do *not* want to use post-inc expressions
2344d88c1a5aSDimitry Andric // in the latch check, and we want to insert post-inc expressions before
2345d88c1a5aSDimitry Andric // the backedge.
2346f22ef01cSRoman Divacky BasicBlock *LatchBlock = L->getLoopLatch();
2347f22ef01cSRoman Divacky SmallVector<BasicBlock*, 8> ExitingBlocks;
2348f22ef01cSRoman Divacky L->getExitingBlocks(ExitingBlocks);
2349d88c1a5aSDimitry Andric if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {
2350d88c1a5aSDimitry Andric return LatchBlock != BB;
2351d88c1a5aSDimitry Andric })) {
2352d88c1a5aSDimitry Andric // The backedge doesn't exit the loop; treat this as a head-tested loop.
2353d88c1a5aSDimitry Andric IVIncInsertPos = LatchBlock->getTerminator();
2354d88c1a5aSDimitry Andric return;
2355d88c1a5aSDimitry Andric }
2356f22ef01cSRoman Divacky
2357d88c1a5aSDimitry Andric // Otherwise treat this as a rotated loop.
2358ff0cc061SDimitry Andric for (BasicBlock *ExitingBlock : ExitingBlocks) {
2359f22ef01cSRoman Divacky // Get the terminating condition for the loop if possible. If we
2360f22ef01cSRoman Divacky // can, we want to change it to use a post-incremented version of its
2361f22ef01cSRoman Divacky // induction variable, to allow coalescing the live ranges for the IV into
2362f22ef01cSRoman Divacky // one register value.
2363f22ef01cSRoman Divacky
2364f22ef01cSRoman Divacky BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2365f22ef01cSRoman Divacky if (!TermBr)
2366f22ef01cSRoman Divacky continue;
2367f22ef01cSRoman Divacky // FIXME: Overly conservative, termination condition could be an 'or' etc..
2368f22ef01cSRoman Divacky if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2369f22ef01cSRoman Divacky continue;
2370f22ef01cSRoman Divacky
2371f22ef01cSRoman Divacky // Search IVUsesByStride to find Cond's IVUse if there is one.
237291bc56edSDimitry Andric IVStrideUse *CondUse = nullptr;
2373f22ef01cSRoman Divacky ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2374f22ef01cSRoman Divacky if (!FindIVUserForCond(Cond, CondUse))
2375f22ef01cSRoman Divacky continue;
2376f22ef01cSRoman Divacky
2377f22ef01cSRoman Divacky // If the trip count is computed in terms of a max (due to ScalarEvolution
2378f22ef01cSRoman Divacky // being unable to find a sufficient guard, for example), change the loop
2379f22ef01cSRoman Divacky // comparison to use SLT or ULT instead of NE.
2380f22ef01cSRoman Divacky // One consequence of doing this now is that it disrupts the count-down
2381f22ef01cSRoman Divacky // optimization. That's not always a bad thing though, because in such
2382f22ef01cSRoman Divacky // cases it may still be worthwhile to avoid a max.
2383f22ef01cSRoman Divacky Cond = OptimizeMax(Cond, CondUse);
2384f22ef01cSRoman Divacky
2385f22ef01cSRoman Divacky // If this exiting block dominates the latch block, it may also use
2386f22ef01cSRoman Divacky // the post-inc value if it won't be shared with other uses.
2387f22ef01cSRoman Divacky // Check for dominance.
2388f22ef01cSRoman Divacky if (!DT.dominates(ExitingBlock, LatchBlock))
2389f22ef01cSRoman Divacky continue;
2390f22ef01cSRoman Divacky
2391f22ef01cSRoman Divacky // Conservatively avoid trying to use the post-inc value in non-latch
2392f22ef01cSRoman Divacky // exits if there may be pre-inc users in intervening blocks.
2393f22ef01cSRoman Divacky if (LatchBlock != ExitingBlock)
2394f22ef01cSRoman Divacky for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2395f22ef01cSRoman Divacky // Test if the use is reachable from the exiting block. This dominator
2396f22ef01cSRoman Divacky // query is a conservative approximation of reachability.
2397f22ef01cSRoman Divacky if (&*UI != CondUse &&
2398f22ef01cSRoman Divacky !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2399f22ef01cSRoman Divacky // Conservatively assume there may be reuse if the quotient of their
2400f22ef01cSRoman Divacky // strides could be a legal scale.
2401f22ef01cSRoman Divacky const SCEV *A = IU.getStride(*CondUse, L);
2402f22ef01cSRoman Divacky const SCEV *B = IU.getStride(*UI, L);
2403f22ef01cSRoman Divacky if (!A || !B) continue;
2404f22ef01cSRoman Divacky if (SE.getTypeSizeInBits(A->getType()) !=
2405f22ef01cSRoman Divacky SE.getTypeSizeInBits(B->getType())) {
2406f22ef01cSRoman Divacky if (SE.getTypeSizeInBits(A->getType()) >
2407f22ef01cSRoman Divacky SE.getTypeSizeInBits(B->getType()))
2408f22ef01cSRoman Divacky B = SE.getSignExtendExpr(B, A->getType());
2409f22ef01cSRoman Divacky else
2410f22ef01cSRoman Divacky A = SE.getSignExtendExpr(A, B->getType());
2411f22ef01cSRoman Divacky }
2412f22ef01cSRoman Divacky if (const SCEVConstant *D =
2413f22ef01cSRoman Divacky dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2414f22ef01cSRoman Divacky const ConstantInt *C = D->getValue();
2415f22ef01cSRoman Divacky // Stride of one or negative one can have reuse with non-addresses.
2416c4394386SDimitry Andric if (C->isOne() || C->isMinusOne())
2417f22ef01cSRoman Divacky goto decline_post_inc;
2418f22ef01cSRoman Divacky // Avoid weird situations.
2419f22ef01cSRoman Divacky if (C->getValue().getMinSignedBits() >= 64 ||
2420f22ef01cSRoman Divacky C->getValue().isMinSignedValue())
2421f22ef01cSRoman Divacky goto decline_post_inc;
2422f22ef01cSRoman Divacky // Check for possible scaled-address reuse.
24234ba319b5SDimitry Andric if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
24244ba319b5SDimitry Andric MemAccessTy AccessTy = getAccessType(
24254ba319b5SDimitry Andric TTI, UI->getUser(), UI->getOperandValToReplace());
2426139f7f9bSDimitry Andric int64_t Scale = C->getSExtValue();
24277d523365SDimitry Andric if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2428139f7f9bSDimitry Andric /*BaseOffset=*/0,
24297d523365SDimitry Andric /*HasBaseReg=*/false, Scale,
24307d523365SDimitry Andric AccessTy.AddrSpace))
2431f22ef01cSRoman Divacky goto decline_post_inc;
2432139f7f9bSDimitry Andric Scale = -Scale;
24337d523365SDimitry Andric if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2434139f7f9bSDimitry Andric /*BaseOffset=*/0,
24357d523365SDimitry Andric /*HasBaseReg=*/false, Scale,
24367d523365SDimitry Andric AccessTy.AddrSpace))
2437f22ef01cSRoman Divacky goto decline_post_inc;
2438f22ef01cSRoman Divacky }
2439f22ef01cSRoman Divacky }
24404ba319b5SDimitry Andric }
2441f22ef01cSRoman Divacky
24424ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
2443f22ef01cSRoman Divacky << *Cond << '\n');
2444f22ef01cSRoman Divacky
2445f22ef01cSRoman Divacky // It's possible for the setcc instruction to be anywhere in the loop, and
2446f22ef01cSRoman Divacky // possible for it to have multiple users. If it is not immediately before
2447f22ef01cSRoman Divacky // the exiting block branch, move it.
2448f22ef01cSRoman Divacky if (&*++BasicBlock::iterator(Cond) != TermBr) {
2449f22ef01cSRoman Divacky if (Cond->hasOneUse()) {
2450f22ef01cSRoman Divacky Cond->moveBefore(TermBr);
2451f22ef01cSRoman Divacky } else {
2452f22ef01cSRoman Divacky // Clone the terminating condition and insert into the loopend.
2453f22ef01cSRoman Divacky ICmpInst *OldCond = Cond;
2454f22ef01cSRoman Divacky Cond = cast<ICmpInst>(Cond->clone());
2455f22ef01cSRoman Divacky Cond->setName(L->getHeader()->getName() + ".termcond");
24567d523365SDimitry Andric ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
2457f22ef01cSRoman Divacky
2458f22ef01cSRoman Divacky // Clone the IVUse, as the old use still exists!
245917a519f9SDimitry Andric CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2460f22ef01cSRoman Divacky TermBr->replaceUsesOfWith(OldCond, Cond);
2461f22ef01cSRoman Divacky }
2462f22ef01cSRoman Divacky }
2463f22ef01cSRoman Divacky
2464f22ef01cSRoman Divacky // If we get to here, we know that we can transform the setcc instruction to
2465f22ef01cSRoman Divacky // use the post-incremented version of the IV, allowing us to coalesce the
2466f22ef01cSRoman Divacky // live ranges for the IV correctly.
2467f22ef01cSRoman Divacky CondUse->transformToPostInc(L);
2468f22ef01cSRoman Divacky Changed = true;
2469f22ef01cSRoman Divacky
2470f22ef01cSRoman Divacky PostIncs.insert(Cond);
2471f22ef01cSRoman Divacky decline_post_inc:;
2472f22ef01cSRoman Divacky }
2473f22ef01cSRoman Divacky
2474f22ef01cSRoman Divacky // Determine an insertion point for the loop induction variable increment. It
2475f22ef01cSRoman Divacky // must dominate all the post-inc comparisons we just set up, and it must
2476f22ef01cSRoman Divacky // dominate the loop latch edge.
2477f22ef01cSRoman Divacky IVIncInsertPos = L->getLoopLatch()->getTerminator();
247839d628a0SDimitry Andric for (Instruction *Inst : PostIncs) {
2479f22ef01cSRoman Divacky BasicBlock *BB =
2480f22ef01cSRoman Divacky DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
248139d628a0SDimitry Andric Inst->getParent());
248239d628a0SDimitry Andric if (BB == Inst->getParent())
248339d628a0SDimitry Andric IVIncInsertPos = Inst;
2484f22ef01cSRoman Divacky else if (BB != IVIncInsertPos->getParent())
2485f22ef01cSRoman Divacky IVIncInsertPos = BB->getTerminator();
2486f22ef01cSRoman Divacky }
2487f22ef01cSRoman Divacky }
2488f22ef01cSRoman Divacky
24897d523365SDimitry Andric /// Determine if the given use can accommodate a fixup at the given offset and
24907d523365SDimitry Andric /// other details. If so, update the use and return true.
reconcileNewOffset(LSRUse & LU,int64_t NewOffset,bool HasBaseReg,LSRUse::KindType Kind,MemAccessTy AccessTy)24917d523365SDimitry Andric bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
24927d523365SDimitry Andric bool HasBaseReg, LSRUse::KindType Kind,
24937d523365SDimitry Andric MemAccessTy AccessTy) {
2494f22ef01cSRoman Divacky int64_t NewMinOffset = LU.MinOffset;
2495f22ef01cSRoman Divacky int64_t NewMaxOffset = LU.MaxOffset;
24967d523365SDimitry Andric MemAccessTy NewAccessTy = AccessTy;
2497f22ef01cSRoman Divacky
2498f22ef01cSRoman Divacky // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2499f22ef01cSRoman Divacky // something conservative, however this can pessimize in the case that one of
2500f22ef01cSRoman Divacky // the uses will have all its uses outside the loop, for example.
2501f22ef01cSRoman Divacky if (LU.Kind != Kind)
2502f22ef01cSRoman Divacky return false;
250391bc56edSDimitry Andric
2504f22ef01cSRoman Divacky // Check for a mismatched access type, and fall back conservatively as needed.
2505ffd1746dSEd Schouten // TODO: Be less conservative when the type is similar and can use the same
2506ffd1746dSEd Schouten // addressing modes.
25077d523365SDimitry Andric if (Kind == LSRUse::Address) {
2508899ca3d6SDimitry Andric if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2509899ca3d6SDimitry Andric NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2510899ca3d6SDimitry Andric AccessTy.AddrSpace);
2511899ca3d6SDimitry Andric }
25127d523365SDimitry Andric }
2513f22ef01cSRoman Divacky
251491bc56edSDimitry Andric // Conservatively assume HasBaseReg is true for now.
251591bc56edSDimitry Andric if (NewOffset < LU.MinOffset) {
251691bc56edSDimitry Andric if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
251791bc56edSDimitry Andric LU.MaxOffset - NewOffset, HasBaseReg))
251891bc56edSDimitry Andric return false;
251991bc56edSDimitry Andric NewMinOffset = NewOffset;
252091bc56edSDimitry Andric } else if (NewOffset > LU.MaxOffset) {
252191bc56edSDimitry Andric if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
252291bc56edSDimitry Andric NewOffset - LU.MinOffset, HasBaseReg))
252391bc56edSDimitry Andric return false;
252491bc56edSDimitry Andric NewMaxOffset = NewOffset;
252591bc56edSDimitry Andric }
252691bc56edSDimitry Andric
2527f22ef01cSRoman Divacky // Update the use.
2528f22ef01cSRoman Divacky LU.MinOffset = NewMinOffset;
2529f22ef01cSRoman Divacky LU.MaxOffset = NewMaxOffset;
2530f22ef01cSRoman Divacky LU.AccessTy = NewAccessTy;
2531f22ef01cSRoman Divacky return true;
2532f22ef01cSRoman Divacky }
2533f22ef01cSRoman Divacky
25347d523365SDimitry Andric /// Return an LSRUse index and an offset value for a fixup which needs the given
25357d523365SDimitry Andric /// expression, with the given kind and optional access type. Either reuse an
25367d523365SDimitry Andric /// existing use or create a new one, as needed.
getUse(const SCEV * & Expr,LSRUse::KindType Kind,MemAccessTy AccessTy)25377d523365SDimitry Andric std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
25387d523365SDimitry Andric LSRUse::KindType Kind,
25397d523365SDimitry Andric MemAccessTy AccessTy) {
2540f22ef01cSRoman Divacky const SCEV *Copy = Expr;
2541f22ef01cSRoman Divacky int64_t Offset = ExtractImmediate(Expr, SE);
2542f22ef01cSRoman Divacky
2543f22ef01cSRoman Divacky // Basic uses can't accept any offset, for example.
254491bc56edSDimitry Andric if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2545139f7f9bSDimitry Andric Offset, /*HasBaseReg=*/ true)) {
2546f22ef01cSRoman Divacky Expr = Copy;
2547f22ef01cSRoman Divacky Offset = 0;
2548f22ef01cSRoman Divacky }
2549f22ef01cSRoman Divacky
2550f22ef01cSRoman Divacky std::pair<UseMapTy::iterator, bool> P =
255191bc56edSDimitry Andric UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2552f22ef01cSRoman Divacky if (!P.second) {
2553f22ef01cSRoman Divacky // A use already existed with this base.
2554f22ef01cSRoman Divacky size_t LUIdx = P.first->second;
2555f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
2556f22ef01cSRoman Divacky if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2557f22ef01cSRoman Divacky // Reuse this use.
2558f22ef01cSRoman Divacky return std::make_pair(LUIdx, Offset);
2559f22ef01cSRoman Divacky }
2560f22ef01cSRoman Divacky
2561f22ef01cSRoman Divacky // Create a new use.
2562f22ef01cSRoman Divacky size_t LUIdx = Uses.size();
2563f22ef01cSRoman Divacky P.first->second = LUIdx;
2564f22ef01cSRoman Divacky Uses.push_back(LSRUse(Kind, AccessTy));
2565f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
2566f22ef01cSRoman Divacky
2567f22ef01cSRoman Divacky LU.MinOffset = Offset;
2568f22ef01cSRoman Divacky LU.MaxOffset = Offset;
2569f22ef01cSRoman Divacky return std::make_pair(LUIdx, Offset);
2570f22ef01cSRoman Divacky }
2571f22ef01cSRoman Divacky
25727d523365SDimitry Andric /// Delete the given use from the Uses list.
DeleteUse(LSRUse & LU,size_t LUIdx)25732754fe60SDimitry Andric void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2574f22ef01cSRoman Divacky if (&LU != &Uses.back())
2575f22ef01cSRoman Divacky std::swap(LU, Uses.back());
2576f22ef01cSRoman Divacky Uses.pop_back();
25772754fe60SDimitry Andric
25782754fe60SDimitry Andric // Update RegUses.
25797d523365SDimitry Andric RegUses.swapAndDropUse(LUIdx, Uses.size());
2580f22ef01cSRoman Divacky }
2581f22ef01cSRoman Divacky
25827d523365SDimitry Andric /// Look for a use distinct from OrigLU which is has a formula that has the same
25837d523365SDimitry Andric /// registers as the given formula.
2584f22ef01cSRoman Divacky LSRUse *
FindUseWithSimilarFormula(const Formula & OrigF,const LSRUse & OrigLU)2585f22ef01cSRoman Divacky LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2586f22ef01cSRoman Divacky const LSRUse &OrigLU) {
2587e580952dSDimitry Andric // Search all uses for the formula. This could be more clever.
2588f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2589f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
2590e580952dSDimitry Andric // Check whether this use is close enough to OrigLU, to see whether it's
2591e580952dSDimitry Andric // worthwhile looking through its formulae.
2592e580952dSDimitry Andric // Ignore ICmpZero uses because they may contain formulae generated by
2593e580952dSDimitry Andric // GenerateICmpZeroScales, in which case adding fixup offsets may
2594e580952dSDimitry Andric // be invalid.
2595f22ef01cSRoman Divacky if (&LU != &OrigLU &&
2596f22ef01cSRoman Divacky LU.Kind != LSRUse::ICmpZero &&
2597f22ef01cSRoman Divacky LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2598e580952dSDimitry Andric LU.WidestFixupType == OrigLU.WidestFixupType &&
2599f22ef01cSRoman Divacky LU.HasFormulaWithSameRegs(OrigF)) {
2600e580952dSDimitry Andric // Scan through this use's formulae.
2601ff0cc061SDimitry Andric for (const Formula &F : LU.Formulae) {
2602e580952dSDimitry Andric // Check to see if this formula has the same registers and symbols
2603e580952dSDimitry Andric // as OrigF.
2604f22ef01cSRoman Divacky if (F.BaseRegs == OrigF.BaseRegs &&
2605f22ef01cSRoman Divacky F.ScaledReg == OrigF.ScaledReg &&
2606139f7f9bSDimitry Andric F.BaseGV == OrigF.BaseGV &&
2607139f7f9bSDimitry Andric F.Scale == OrigF.Scale &&
2608bd5abe19SDimitry Andric F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2609139f7f9bSDimitry Andric if (F.BaseOffset == 0)
2610f22ef01cSRoman Divacky return &LU;
2611e580952dSDimitry Andric // This is the formula where all the registers and symbols matched;
2612e580952dSDimitry Andric // there aren't going to be any others. Since we declined it, we
26137ae0e2c9SDimitry Andric // can skip the rest of the formulae and proceed to the next LSRUse.
2614f22ef01cSRoman Divacky break;
2615f22ef01cSRoman Divacky }
2616f22ef01cSRoman Divacky }
2617f22ef01cSRoman Divacky }
2618f22ef01cSRoman Divacky }
2619f22ef01cSRoman Divacky
2620e580952dSDimitry Andric // Nothing looked good.
262191bc56edSDimitry Andric return nullptr;
2622f22ef01cSRoman Divacky }
2623f22ef01cSRoman Divacky
CollectInterestingTypesAndFactors()2624f22ef01cSRoman Divacky void LSRInstance::CollectInterestingTypesAndFactors() {
2625f22ef01cSRoman Divacky SmallSetVector<const SCEV *, 4> Strides;
2626f22ef01cSRoman Divacky
2627f22ef01cSRoman Divacky // Collect interesting types and strides.
2628f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> Worklist;
2629ff0cc061SDimitry Andric for (const IVStrideUse &U : IU) {
2630ff0cc061SDimitry Andric const SCEV *Expr = IU.getExpr(U);
2631f22ef01cSRoman Divacky
2632f22ef01cSRoman Divacky // Collect interesting types.
2633f22ef01cSRoman Divacky Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2634f22ef01cSRoman Divacky
2635f22ef01cSRoman Divacky // Add strides for mentioned loops.
2636f22ef01cSRoman Divacky Worklist.push_back(Expr);
2637f22ef01cSRoman Divacky do {
2638f22ef01cSRoman Divacky const SCEV *S = Worklist.pop_back_val();
2639f22ef01cSRoman Divacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2640dff0c46cSDimitry Andric if (AR->getLoop() == L)
2641f22ef01cSRoman Divacky Strides.insert(AR->getStepRecurrence(SE));
2642f22ef01cSRoman Divacky Worklist.push_back(AR->getStart());
2643f22ef01cSRoman Divacky } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2644ffd1746dSEd Schouten Worklist.append(Add->op_begin(), Add->op_end());
2645f22ef01cSRoman Divacky }
2646f22ef01cSRoman Divacky } while (!Worklist.empty());
2647f22ef01cSRoman Divacky }
2648f22ef01cSRoman Divacky
2649f22ef01cSRoman Divacky // Compute interesting factors from the set of interesting strides.
2650f22ef01cSRoman Divacky for (SmallSetVector<const SCEV *, 4>::const_iterator
2651f22ef01cSRoman Divacky I = Strides.begin(), E = Strides.end(); I != E; ++I)
2652f22ef01cSRoman Divacky for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
265391bc56edSDimitry Andric std::next(I); NewStrideIter != E; ++NewStrideIter) {
2654f22ef01cSRoman Divacky const SCEV *OldStride = *I;
2655f22ef01cSRoman Divacky const SCEV *NewStride = *NewStrideIter;
2656f22ef01cSRoman Divacky
2657f22ef01cSRoman Divacky if (SE.getTypeSizeInBits(OldStride->getType()) !=
2658f22ef01cSRoman Divacky SE.getTypeSizeInBits(NewStride->getType())) {
2659f22ef01cSRoman Divacky if (SE.getTypeSizeInBits(OldStride->getType()) >
2660f22ef01cSRoman Divacky SE.getTypeSizeInBits(NewStride->getType()))
2661f22ef01cSRoman Divacky NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2662f22ef01cSRoman Divacky else
2663f22ef01cSRoman Divacky OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2664f22ef01cSRoman Divacky }
2665f22ef01cSRoman Divacky if (const SCEVConstant *Factor =
2666f22ef01cSRoman Divacky dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2667f22ef01cSRoman Divacky SE, true))) {
26687d523365SDimitry Andric if (Factor->getAPInt().getMinSignedBits() <= 64)
26697d523365SDimitry Andric Factors.insert(Factor->getAPInt().getSExtValue());
2670f22ef01cSRoman Divacky } else if (const SCEVConstant *Factor =
2671f22ef01cSRoman Divacky dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2672f22ef01cSRoman Divacky NewStride,
2673f22ef01cSRoman Divacky SE, true))) {
26747d523365SDimitry Andric if (Factor->getAPInt().getMinSignedBits() <= 64)
26757d523365SDimitry Andric Factors.insert(Factor->getAPInt().getSExtValue());
2676f22ef01cSRoman Divacky }
2677f22ef01cSRoman Divacky }
2678f22ef01cSRoman Divacky
2679f22ef01cSRoman Divacky // If all uses use the same type, don't bother looking for truncation-based
2680f22ef01cSRoman Divacky // reuse.
2681f22ef01cSRoman Divacky if (Types.size() == 1)
2682f22ef01cSRoman Divacky Types.clear();
2683f22ef01cSRoman Divacky
26844ba319b5SDimitry Andric LLVM_DEBUG(print_factors_and_types(dbgs()));
2685f22ef01cSRoman Divacky }
2686f22ef01cSRoman Divacky
26877d523365SDimitry Andric /// Helper for CollectChains that finds an IV operand (computed by an AddRec in
26887d523365SDimitry Andric /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
26897d523365SDimitry Andric /// IVStrideUses, we could partially skip this.
2690dff0c46cSDimitry Andric static User::op_iterator
findIVOperand(User::op_iterator OI,User::op_iterator OE,Loop * L,ScalarEvolution & SE)2691dff0c46cSDimitry Andric findIVOperand(User::op_iterator OI, User::op_iterator OE,
2692dff0c46cSDimitry Andric Loop *L, ScalarEvolution &SE) {
2693dff0c46cSDimitry Andric for(; OI != OE; ++OI) {
2694dff0c46cSDimitry Andric if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2695dff0c46cSDimitry Andric if (!SE.isSCEVable(Oper->getType()))
2696dff0c46cSDimitry Andric continue;
2697dff0c46cSDimitry Andric
2698dff0c46cSDimitry Andric if (const SCEVAddRecExpr *AR =
2699dff0c46cSDimitry Andric dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2700dff0c46cSDimitry Andric if (AR->getLoop() == L)
2701dff0c46cSDimitry Andric break;
2702dff0c46cSDimitry Andric }
2703dff0c46cSDimitry Andric }
2704dff0c46cSDimitry Andric }
2705dff0c46cSDimitry Andric return OI;
2706dff0c46cSDimitry Andric }
2707dff0c46cSDimitry Andric
27084ba319b5SDimitry Andric /// IVChain logic must consistently peek base TruncInst operands, so wrap it in
27097d523365SDimitry Andric /// a convenient helper.
getWideOperand(Value * Oper)2710dff0c46cSDimitry Andric static Value *getWideOperand(Value *Oper) {
2711dff0c46cSDimitry Andric if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2712dff0c46cSDimitry Andric return Trunc->getOperand(0);
2713dff0c46cSDimitry Andric return Oper;
2714dff0c46cSDimitry Andric }
2715dff0c46cSDimitry Andric
27167d523365SDimitry Andric /// Return true if we allow an IV chain to include both types.
isCompatibleIVType(Value * LVal,Value * RVal)2717dff0c46cSDimitry Andric static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2718dff0c46cSDimitry Andric Type *LType = LVal->getType();
2719dff0c46cSDimitry Andric Type *RType = RVal->getType();
27207a7e6055SDimitry Andric return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
27217a7e6055SDimitry Andric // Different address spaces means (possibly)
27227a7e6055SDimitry Andric // different types of the pointer implementation,
27237a7e6055SDimitry Andric // e.g. i16 vs i32 so disallow that.
27247a7e6055SDimitry Andric (LType->getPointerAddressSpace() ==
27257a7e6055SDimitry Andric RType->getPointerAddressSpace()));
2726dff0c46cSDimitry Andric }
2727dff0c46cSDimitry Andric
27287d523365SDimitry Andric /// Return an approximation of this SCEV expression's "base", or NULL for any
27297d523365SDimitry Andric /// constant. Returning the expression itself is conservative. Returning a
27307d523365SDimitry Andric /// deeper subexpression is more precise and valid as long as it isn't less
27317d523365SDimitry Andric /// complex than another subexpression. For expressions involving multiple
27327d523365SDimitry Andric /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
27337d523365SDimitry Andric /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
27347d523365SDimitry Andric /// IVInc==b-a.
2735dff0c46cSDimitry Andric ///
2736dff0c46cSDimitry Andric /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2737dff0c46cSDimitry Andric /// SCEVUnknown, we simply return the rightmost SCEV operand.
getExprBase(const SCEV * S)2738dff0c46cSDimitry Andric static const SCEV *getExprBase(const SCEV *S) {
2739dff0c46cSDimitry Andric switch (S->getSCEVType()) {
2740dff0c46cSDimitry Andric default: // uncluding scUnknown.
2741dff0c46cSDimitry Andric return S;
2742dff0c46cSDimitry Andric case scConstant:
274391bc56edSDimitry Andric return nullptr;
2744dff0c46cSDimitry Andric case scTruncate:
2745dff0c46cSDimitry Andric return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2746dff0c46cSDimitry Andric case scZeroExtend:
2747dff0c46cSDimitry Andric return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2748dff0c46cSDimitry Andric case scSignExtend:
2749dff0c46cSDimitry Andric return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2750dff0c46cSDimitry Andric case scAddExpr: {
2751dff0c46cSDimitry Andric // Skip over scaled operands (scMulExpr) to follow add operands as long as
2752dff0c46cSDimitry Andric // there's nothing more complex.
2753dff0c46cSDimitry Andric // FIXME: not sure if we want to recognize negation.
2754dff0c46cSDimitry Andric const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2755dff0c46cSDimitry Andric for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2756dff0c46cSDimitry Andric E(Add->op_begin()); I != E; ++I) {
2757dff0c46cSDimitry Andric const SCEV *SubExpr = *I;
2758dff0c46cSDimitry Andric if (SubExpr->getSCEVType() == scAddExpr)
2759dff0c46cSDimitry Andric return getExprBase(SubExpr);
2760dff0c46cSDimitry Andric
2761dff0c46cSDimitry Andric if (SubExpr->getSCEVType() != scMulExpr)
2762dff0c46cSDimitry Andric return SubExpr;
2763dff0c46cSDimitry Andric }
2764dff0c46cSDimitry Andric return S; // all operands are scaled, be conservative.
2765dff0c46cSDimitry Andric }
2766dff0c46cSDimitry Andric case scAddRecExpr:
2767dff0c46cSDimitry Andric return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2768dff0c46cSDimitry Andric }
2769dff0c46cSDimitry Andric }
2770dff0c46cSDimitry Andric
2771dff0c46cSDimitry Andric /// Return true if the chain increment is profitable to expand into a loop
2772dff0c46cSDimitry Andric /// invariant value, which may require its own register. A profitable chain
2773dff0c46cSDimitry Andric /// increment will be an offset relative to the same base. We allow such offsets
2774dff0c46cSDimitry Andric /// to potentially be used as chain increment as long as it's not obviously
2775dff0c46cSDimitry Andric /// expensive to expand using real instructions.
isProfitableIncrement(const SCEV * OperExpr,const SCEV * IncExpr,ScalarEvolution & SE)27767ae0e2c9SDimitry Andric bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
27777ae0e2c9SDimitry Andric const SCEV *IncExpr,
27787ae0e2c9SDimitry Andric ScalarEvolution &SE) {
27797ae0e2c9SDimitry Andric // Aggressively form chains when -stress-ivchain.
2780dff0c46cSDimitry Andric if (StressIVChain)
27817ae0e2c9SDimitry Andric return true;
2782dff0c46cSDimitry Andric
2783dff0c46cSDimitry Andric // Do not replace a constant offset from IV head with a nonconstant IV
2784dff0c46cSDimitry Andric // increment.
2785dff0c46cSDimitry Andric if (!isa<SCEVConstant>(IncExpr)) {
27867ae0e2c9SDimitry Andric const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2787dff0c46cSDimitry Andric if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2788d88c1a5aSDimitry Andric return false;
2789dff0c46cSDimitry Andric }
2790dff0c46cSDimitry Andric
2791dff0c46cSDimitry Andric SmallPtrSet<const SCEV*, 8> Processed;
27927ae0e2c9SDimitry Andric return !isHighCostExpansion(IncExpr, Processed, SE);
2793dff0c46cSDimitry Andric }
2794dff0c46cSDimitry Andric
2795dff0c46cSDimitry Andric /// Return true if the number of registers needed for the chain is estimated to
2796dff0c46cSDimitry Andric /// be less than the number required for the individual IV users. First prohibit
2797dff0c46cSDimitry Andric /// any IV users that keep the IV live across increments (the Users set should
2798dff0c46cSDimitry Andric /// be empty). Next count the number and type of increments in the chain.
2799dff0c46cSDimitry Andric ///
2800dff0c46cSDimitry Andric /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2801dff0c46cSDimitry Andric /// effectively use postinc addressing modes. Only consider it profitable it the
2802dff0c46cSDimitry Andric /// increments can be computed in fewer registers when chained.
2803dff0c46cSDimitry Andric ///
2804dff0c46cSDimitry Andric /// TODO: Consider IVInc free if it's already used in another chains.
2805dff0c46cSDimitry Andric static bool
isProfitableChain(IVChain & Chain,SmallPtrSetImpl<Instruction * > & Users,ScalarEvolution & SE,const TargetTransformInfo & TTI)280639d628a0SDimitry Andric isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
2807139f7f9bSDimitry Andric ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2808dff0c46cSDimitry Andric if (StressIVChain)
2809dff0c46cSDimitry Andric return true;
2810dff0c46cSDimitry Andric
28117ae0e2c9SDimitry Andric if (!Chain.hasIncs())
2812dff0c46cSDimitry Andric return false;
2813dff0c46cSDimitry Andric
2814dff0c46cSDimitry Andric if (!Users.empty()) {
28154ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
28164ba319b5SDimitry Andric for (Instruction *Inst
28174ba319b5SDimitry Andric : Users) { dbgs() << " " << *Inst << "\n"; });
2818dff0c46cSDimitry Andric return false;
2819dff0c46cSDimitry Andric }
28207ae0e2c9SDimitry Andric assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2821dff0c46cSDimitry Andric
2822dff0c46cSDimitry Andric // The chain itself may require a register, so intialize cost to 1.
2823dff0c46cSDimitry Andric int cost = 1;
2824dff0c46cSDimitry Andric
2825dff0c46cSDimitry Andric // A complete chain likely eliminates the need for keeping the original IV in
2826dff0c46cSDimitry Andric // a register. LSR does not currently know how to form a complete chain unless
2827dff0c46cSDimitry Andric // the header phi already exists.
28287ae0e2c9SDimitry Andric if (isa<PHINode>(Chain.tailUserInst())
28297ae0e2c9SDimitry Andric && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2830dff0c46cSDimitry Andric --cost;
2831dff0c46cSDimitry Andric }
283291bc56edSDimitry Andric const SCEV *LastIncExpr = nullptr;
2833dff0c46cSDimitry Andric unsigned NumConstIncrements = 0;
2834dff0c46cSDimitry Andric unsigned NumVarIncrements = 0;
2835dff0c46cSDimitry Andric unsigned NumReusedIncrements = 0;
2836ff0cc061SDimitry Andric for (const IVInc &Inc : Chain) {
2837ff0cc061SDimitry Andric if (Inc.IncExpr->isZero())
2838dff0c46cSDimitry Andric continue;
2839dff0c46cSDimitry Andric
2840dff0c46cSDimitry Andric // Incrementing by zero or some constant is neutral. We assume constants can
2841dff0c46cSDimitry Andric // be folded into an addressing mode or an add's immediate operand.
2842ff0cc061SDimitry Andric if (isa<SCEVConstant>(Inc.IncExpr)) {
2843dff0c46cSDimitry Andric ++NumConstIncrements;
2844dff0c46cSDimitry Andric continue;
2845dff0c46cSDimitry Andric }
2846dff0c46cSDimitry Andric
2847ff0cc061SDimitry Andric if (Inc.IncExpr == LastIncExpr)
2848dff0c46cSDimitry Andric ++NumReusedIncrements;
2849dff0c46cSDimitry Andric else
2850dff0c46cSDimitry Andric ++NumVarIncrements;
2851dff0c46cSDimitry Andric
2852ff0cc061SDimitry Andric LastIncExpr = Inc.IncExpr;
2853dff0c46cSDimitry Andric }
2854dff0c46cSDimitry Andric // An IV chain with a single increment is handled by LSR's postinc
2855dff0c46cSDimitry Andric // uses. However, a chain with multiple increments requires keeping the IV's
2856dff0c46cSDimitry Andric // value live longer than it needs to be if chained.
2857dff0c46cSDimitry Andric if (NumConstIncrements > 1)
2858dff0c46cSDimitry Andric --cost;
2859dff0c46cSDimitry Andric
2860dff0c46cSDimitry Andric // Materializing increment expressions in the preheader that didn't exist in
2861dff0c46cSDimitry Andric // the original code may cost a register. For example, sign-extended array
2862dff0c46cSDimitry Andric // indices can produce ridiculous increments like this:
2863dff0c46cSDimitry Andric // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2864dff0c46cSDimitry Andric cost += NumVarIncrements;
2865dff0c46cSDimitry Andric
2866dff0c46cSDimitry Andric // Reusing variable increments likely saves a register to hold the multiple of
2867dff0c46cSDimitry Andric // the stride.
2868dff0c46cSDimitry Andric cost -= NumReusedIncrements;
2869dff0c46cSDimitry Andric
28704ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
28717ae0e2c9SDimitry Andric << "\n");
2872dff0c46cSDimitry Andric
2873dff0c46cSDimitry Andric return cost < 0;
2874dff0c46cSDimitry Andric }
2875dff0c46cSDimitry Andric
28767d523365SDimitry Andric /// Add this IV user to an existing chain or make it the head of a new chain.
ChainInstruction(Instruction * UserInst,Instruction * IVOper,SmallVectorImpl<ChainUsers> & ChainUsersVec)2877dff0c46cSDimitry Andric void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2878dff0c46cSDimitry Andric SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2879dff0c46cSDimitry Andric // When IVs are used as types of varying widths, they are generally converted
2880dff0c46cSDimitry Andric // to a wider type with some uses remaining narrow under a (free) trunc.
28817ae0e2c9SDimitry Andric Value *const NextIV = getWideOperand(IVOper);
28827ae0e2c9SDimitry Andric const SCEV *const OperExpr = SE.getSCEV(NextIV);
28837ae0e2c9SDimitry Andric const SCEV *const OperExprBase = getExprBase(OperExpr);
2884dff0c46cSDimitry Andric
2885dff0c46cSDimitry Andric // Visit all existing chains. Check if its IVOper can be computed as a
2886dff0c46cSDimitry Andric // profitable loop invariant increment from the last link in the Chain.
2887dff0c46cSDimitry Andric unsigned ChainIdx = 0, NChains = IVChainVec.size();
288891bc56edSDimitry Andric const SCEV *LastIncExpr = nullptr;
2889dff0c46cSDimitry Andric for (; ChainIdx < NChains; ++ChainIdx) {
28907ae0e2c9SDimitry Andric IVChain &Chain = IVChainVec[ChainIdx];
28917ae0e2c9SDimitry Andric
28927ae0e2c9SDimitry Andric // Prune the solution space aggressively by checking that both IV operands
28937ae0e2c9SDimitry Andric // are expressions that operate on the same unscaled SCEVUnknown. This
28947ae0e2c9SDimitry Andric // "base" will be canceled by the subsequent getMinusSCEV call. Checking
28957ae0e2c9SDimitry Andric // first avoids creating extra SCEV expressions.
28967ae0e2c9SDimitry Andric if (!StressIVChain && Chain.ExprBase != OperExprBase)
28977ae0e2c9SDimitry Andric continue;
28987ae0e2c9SDimitry Andric
28997ae0e2c9SDimitry Andric Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2900dff0c46cSDimitry Andric if (!isCompatibleIVType(PrevIV, NextIV))
2901dff0c46cSDimitry Andric continue;
2902dff0c46cSDimitry Andric
2903dff0c46cSDimitry Andric // A phi node terminates a chain.
29047ae0e2c9SDimitry Andric if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2905dff0c46cSDimitry Andric continue;
2906dff0c46cSDimitry Andric
29077ae0e2c9SDimitry Andric // The increment must be loop-invariant so it can be kept in a register.
29087ae0e2c9SDimitry Andric const SCEV *PrevExpr = SE.getSCEV(PrevIV);
29097ae0e2c9SDimitry Andric const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
29107ae0e2c9SDimitry Andric if (!SE.isLoopInvariant(IncExpr, L))
29117ae0e2c9SDimitry Andric continue;
29127ae0e2c9SDimitry Andric
29137ae0e2c9SDimitry Andric if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2914dff0c46cSDimitry Andric LastIncExpr = IncExpr;
2915dff0c46cSDimitry Andric break;
2916dff0c46cSDimitry Andric }
2917dff0c46cSDimitry Andric }
2918dff0c46cSDimitry Andric // If we haven't found a chain, create a new one, unless we hit the max. Don't
2919dff0c46cSDimitry Andric // bother for phi nodes, because they must be last in the chain.
2920dff0c46cSDimitry Andric if (ChainIdx == NChains) {
2921dff0c46cSDimitry Andric if (isa<PHINode>(UserInst))
2922dff0c46cSDimitry Andric return;
2923dff0c46cSDimitry Andric if (NChains >= MaxChains && !StressIVChain) {
29244ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
2925dff0c46cSDimitry Andric return;
2926dff0c46cSDimitry Andric }
29277ae0e2c9SDimitry Andric LastIncExpr = OperExpr;
2928dff0c46cSDimitry Andric // IVUsers may have skipped over sign/zero extensions. We don't currently
2929dff0c46cSDimitry Andric // attempt to form chains involving extensions unless they can be hoisted
2930dff0c46cSDimitry Andric // into this loop's AddRec.
2931dff0c46cSDimitry Andric if (!isa<SCEVAddRecExpr>(LastIncExpr))
2932dff0c46cSDimitry Andric return;
2933dff0c46cSDimitry Andric ++NChains;
29347ae0e2c9SDimitry Andric IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
29357ae0e2c9SDimitry Andric OperExprBase));
2936dff0c46cSDimitry Andric ChainUsersVec.resize(NChains);
29374ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
29387ae0e2c9SDimitry Andric << ") IV=" << *LastIncExpr << "\n");
29397ae0e2c9SDimitry Andric } else {
29404ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
29417ae0e2c9SDimitry Andric << ") IV+" << *LastIncExpr << "\n");
2942dff0c46cSDimitry Andric // Add this IV user to the end of the chain.
29437ae0e2c9SDimitry Andric IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
29447ae0e2c9SDimitry Andric }
2945139f7f9bSDimitry Andric IVChain &Chain = IVChainVec[ChainIdx];
2946dff0c46cSDimitry Andric
2947dff0c46cSDimitry Andric SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2948dff0c46cSDimitry Andric // This chain's NearUsers become FarUsers.
2949dff0c46cSDimitry Andric if (!LastIncExpr->isZero()) {
2950dff0c46cSDimitry Andric ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2951dff0c46cSDimitry Andric NearUsers.end());
2952dff0c46cSDimitry Andric NearUsers.clear();
2953dff0c46cSDimitry Andric }
2954dff0c46cSDimitry Andric
2955dff0c46cSDimitry Andric // All other uses of IVOperand become near uses of the chain.
2956dff0c46cSDimitry Andric // We currently ignore intermediate values within SCEV expressions, assuming
2957dff0c46cSDimitry Andric // they will eventually be used be the current chain, or can be computed
2958dff0c46cSDimitry Andric // from one of the chain increments. To be more precise we could
2959dff0c46cSDimitry Andric // transitively follow its user and only add leaf IV users to the set.
296091bc56edSDimitry Andric for (User *U : IVOper->users()) {
296191bc56edSDimitry Andric Instruction *OtherUse = dyn_cast<Instruction>(U);
2962139f7f9bSDimitry Andric if (!OtherUse)
2963dff0c46cSDimitry Andric continue;
2964139f7f9bSDimitry Andric // Uses in the chain will no longer be uses if the chain is formed.
2965139f7f9bSDimitry Andric // Include the head of the chain in this iteration (not Chain.begin()).
2966139f7f9bSDimitry Andric IVChain::const_iterator IncIter = Chain.Incs.begin();
2967139f7f9bSDimitry Andric IVChain::const_iterator IncEnd = Chain.Incs.end();
2968139f7f9bSDimitry Andric for( ; IncIter != IncEnd; ++IncIter) {
2969139f7f9bSDimitry Andric if (IncIter->UserInst == OtherUse)
2970139f7f9bSDimitry Andric break;
2971139f7f9bSDimitry Andric }
2972139f7f9bSDimitry Andric if (IncIter != IncEnd)
2973139f7f9bSDimitry Andric continue;
2974139f7f9bSDimitry Andric
2975dff0c46cSDimitry Andric if (SE.isSCEVable(OtherUse->getType())
2976dff0c46cSDimitry Andric && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2977dff0c46cSDimitry Andric && IU.isIVUserOrOperand(OtherUse)) {
2978dff0c46cSDimitry Andric continue;
2979dff0c46cSDimitry Andric }
2980dff0c46cSDimitry Andric NearUsers.insert(OtherUse);
2981dff0c46cSDimitry Andric }
2982dff0c46cSDimitry Andric
2983dff0c46cSDimitry Andric // Since this user is part of the chain, it's no longer considered a use
2984dff0c46cSDimitry Andric // of the chain.
2985dff0c46cSDimitry Andric ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2986dff0c46cSDimitry Andric }
2987dff0c46cSDimitry Andric
29887d523365SDimitry Andric /// Populate the vector of Chains.
2989dff0c46cSDimitry Andric ///
2990dff0c46cSDimitry Andric /// This decreases ILP at the architecture level. Targets with ample registers,
2991dff0c46cSDimitry Andric /// multiple memory ports, and no register renaming probably don't want
2992dff0c46cSDimitry Andric /// this. However, such targets should probably disable LSR altogether.
2993dff0c46cSDimitry Andric ///
2994dff0c46cSDimitry Andric /// The job of LSR is to make a reasonable choice of induction variables across
2995dff0c46cSDimitry Andric /// the loop. Subsequent passes can easily "unchain" computation exposing more
2996dff0c46cSDimitry Andric /// ILP *within the loop* if the target wants it.
2997dff0c46cSDimitry Andric ///
2998dff0c46cSDimitry Andric /// Finding the best IV chain is potentially a scheduling problem. Since LSR
2999dff0c46cSDimitry Andric /// will not reorder memory operations, it will recognize this as a chain, but
3000dff0c46cSDimitry Andric /// will generate redundant IV increments. Ideally this would be corrected later
3001dff0c46cSDimitry Andric /// by a smart scheduler:
3002dff0c46cSDimitry Andric /// = A[i]
3003dff0c46cSDimitry Andric /// = A[i+x]
3004dff0c46cSDimitry Andric /// A[i] =
3005dff0c46cSDimitry Andric /// A[i+x] =
3006dff0c46cSDimitry Andric ///
3007dff0c46cSDimitry Andric /// TODO: Walk the entire domtree within this loop, not just the path to the
3008dff0c46cSDimitry Andric /// loop latch. This will discover chains on side paths, but requires
3009dff0c46cSDimitry Andric /// maintaining multiple copies of the Chains state.
CollectChains()3010dff0c46cSDimitry Andric void LSRInstance::CollectChains() {
30114ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
3012dff0c46cSDimitry Andric SmallVector<ChainUsers, 8> ChainUsersVec;
3013dff0c46cSDimitry Andric
3014dff0c46cSDimitry Andric SmallVector<BasicBlock *,8> LatchPath;
3015dff0c46cSDimitry Andric BasicBlock *LoopHeader = L->getHeader();
3016dff0c46cSDimitry Andric for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
3017dff0c46cSDimitry Andric Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3018dff0c46cSDimitry Andric LatchPath.push_back(Rung->getBlock());
3019dff0c46cSDimitry Andric }
3020dff0c46cSDimitry Andric LatchPath.push_back(LoopHeader);
3021dff0c46cSDimitry Andric
3022dff0c46cSDimitry Andric // Walk the instruction stream from the loop header to the loop latch.
30233ca95b02SDimitry Andric for (BasicBlock *BB : reverse(LatchPath)) {
30243ca95b02SDimitry Andric for (Instruction &I : *BB) {
3025dff0c46cSDimitry Andric // Skip instructions that weren't seen by IVUsers analysis.
30263ca95b02SDimitry Andric if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
3027dff0c46cSDimitry Andric continue;
3028dff0c46cSDimitry Andric
3029dff0c46cSDimitry Andric // Ignore users that are part of a SCEV expression. This way we only
3030dff0c46cSDimitry Andric // consider leaf IV Users. This effectively rediscovers a portion of
3031dff0c46cSDimitry Andric // IVUsers analysis but in program order this time.
30323ca95b02SDimitry Andric if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
3033dff0c46cSDimitry Andric continue;
3034dff0c46cSDimitry Andric
3035dff0c46cSDimitry Andric // Remove this instruction from any NearUsers set it may be in.
3036dff0c46cSDimitry Andric for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
3037dff0c46cSDimitry Andric ChainIdx < NChains; ++ChainIdx) {
30383ca95b02SDimitry Andric ChainUsersVec[ChainIdx].NearUsers.erase(&I);
3039dff0c46cSDimitry Andric }
3040dff0c46cSDimitry Andric // Search for operands that can be chained.
3041dff0c46cSDimitry Andric SmallPtrSet<Instruction*, 4> UniqueOperands;
30423ca95b02SDimitry Andric User::op_iterator IVOpEnd = I.op_end();
30433ca95b02SDimitry Andric User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
3044dff0c46cSDimitry Andric while (IVOpIter != IVOpEnd) {
3045dff0c46cSDimitry Andric Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
304639d628a0SDimitry Andric if (UniqueOperands.insert(IVOpInst).second)
30473ca95b02SDimitry Andric ChainInstruction(&I, IVOpInst, ChainUsersVec);
304891bc56edSDimitry Andric IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3049dff0c46cSDimitry Andric }
3050dff0c46cSDimitry Andric } // Continue walking down the instructions.
3051dff0c46cSDimitry Andric } // Continue walking down the domtree.
3052dff0c46cSDimitry Andric // Visit phi backedges to determine if the chain can generate the IV postinc.
305330785c0eSDimitry Andric for (PHINode &PN : L->getHeader()->phis()) {
305430785c0eSDimitry Andric if (!SE.isSCEVable(PN.getType()))
3055dff0c46cSDimitry Andric continue;
3056dff0c46cSDimitry Andric
3057dff0c46cSDimitry Andric Instruction *IncV =
305830785c0eSDimitry Andric dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
3059dff0c46cSDimitry Andric if (IncV)
306030785c0eSDimitry Andric ChainInstruction(&PN, IncV, ChainUsersVec);
3061dff0c46cSDimitry Andric }
3062dff0c46cSDimitry Andric // Remove any unprofitable chains.
3063dff0c46cSDimitry Andric unsigned ChainIdx = 0;
3064dff0c46cSDimitry Andric for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
3065dff0c46cSDimitry Andric UsersIdx < NChains; ++UsersIdx) {
3066dff0c46cSDimitry Andric if (!isProfitableChain(IVChainVec[UsersIdx],
3067139f7f9bSDimitry Andric ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
3068dff0c46cSDimitry Andric continue;
3069dff0c46cSDimitry Andric // Preserve the chain at UsesIdx.
3070dff0c46cSDimitry Andric if (ChainIdx != UsersIdx)
3071dff0c46cSDimitry Andric IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3072dff0c46cSDimitry Andric FinalizeChain(IVChainVec[ChainIdx]);
3073dff0c46cSDimitry Andric ++ChainIdx;
3074dff0c46cSDimitry Andric }
3075dff0c46cSDimitry Andric IVChainVec.resize(ChainIdx);
3076dff0c46cSDimitry Andric }
3077dff0c46cSDimitry Andric
FinalizeChain(IVChain & Chain)3078dff0c46cSDimitry Andric void LSRInstance::FinalizeChain(IVChain &Chain) {
30797ae0e2c9SDimitry Andric assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
30804ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
3081dff0c46cSDimitry Andric
3082ff0cc061SDimitry Andric for (const IVInc &Inc : Chain) {
30834ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
3084d88c1a5aSDimitry Andric auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
3085ff0cc061SDimitry Andric assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
3086dff0c46cSDimitry Andric IVIncSet.insert(UseI);
3087dff0c46cSDimitry Andric }
3088dff0c46cSDimitry Andric }
3089dff0c46cSDimitry Andric
3090dff0c46cSDimitry Andric /// Return true if the IVInc can be folded into an addressing mode.
canFoldIVIncExpr(const SCEV * IncExpr,Instruction * UserInst,Value * Operand,const TargetTransformInfo & TTI)3091dff0c46cSDimitry Andric static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
3092139f7f9bSDimitry Andric Value *Operand, const TargetTransformInfo &TTI) {
3093dff0c46cSDimitry Andric const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
30942cab237bSDimitry Andric if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
3095dff0c46cSDimitry Andric return false;
3096dff0c46cSDimitry Andric
30977d523365SDimitry Andric if (IncConst->getAPInt().getMinSignedBits() > 64)
3098dff0c46cSDimitry Andric return false;
3099dff0c46cSDimitry Andric
31004ba319b5SDimitry Andric MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
3101dff0c46cSDimitry Andric int64_t IncOffset = IncConst->getValue()->getSExtValue();
31027d523365SDimitry Andric if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
3103139f7f9bSDimitry Andric IncOffset, /*HaseBaseReg=*/false))
3104dff0c46cSDimitry Andric return false;
3105dff0c46cSDimitry Andric
3106dff0c46cSDimitry Andric return true;
3107dff0c46cSDimitry Andric }
3108dff0c46cSDimitry Andric
31097d523365SDimitry Andric /// Generate an add or subtract for each IVInc in a chain to materialize the IV
31107d523365SDimitry Andric /// user's operand from the previous IV user's operand.
GenerateIVChain(const IVChain & Chain,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)3111dff0c46cSDimitry Andric void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
3112f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3113dff0c46cSDimitry Andric // Find the new IVOperand for the head of the chain. It may have been replaced
3114dff0c46cSDimitry Andric // by LSR.
31157ae0e2c9SDimitry Andric const IVInc &Head = Chain.Incs[0];
3116dff0c46cSDimitry Andric User::op_iterator IVOpEnd = Head.UserInst->op_end();
3117139f7f9bSDimitry Andric // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3118dff0c46cSDimitry Andric User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
3119dff0c46cSDimitry Andric IVOpEnd, L, SE);
312091bc56edSDimitry Andric Value *IVSrc = nullptr;
3121dff0c46cSDimitry Andric while (IVOpIter != IVOpEnd) {
3122dff0c46cSDimitry Andric IVSrc = getWideOperand(*IVOpIter);
3123dff0c46cSDimitry Andric
3124dff0c46cSDimitry Andric // If this operand computes the expression that the chain needs, we may use
3125dff0c46cSDimitry Andric // it. (Check this after setting IVSrc which is used below.)
3126dff0c46cSDimitry Andric //
3127dff0c46cSDimitry Andric // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3128dff0c46cSDimitry Andric // narrow for the chain, so we can no longer use it. We do allow using a
3129dff0c46cSDimitry Andric // wider phi, assuming the LSR checked for free truncation. In that case we
3130dff0c46cSDimitry Andric // should already have a truncate on this operand such that
3131dff0c46cSDimitry Andric // getSCEV(IVSrc) == IncExpr.
3132dff0c46cSDimitry Andric if (SE.getSCEV(*IVOpIter) == Head.IncExpr
3133dff0c46cSDimitry Andric || SE.getSCEV(IVSrc) == Head.IncExpr) {
3134dff0c46cSDimitry Andric break;
3135dff0c46cSDimitry Andric }
313691bc56edSDimitry Andric IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3137dff0c46cSDimitry Andric }
3138dff0c46cSDimitry Andric if (IVOpIter == IVOpEnd) {
3139dff0c46cSDimitry Andric // Gracefully give up on this chain.
31404ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
3141dff0c46cSDimitry Andric return;
3142dff0c46cSDimitry Andric }
3143dff0c46cSDimitry Andric
31444ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
3145dff0c46cSDimitry Andric Type *IVTy = IVSrc->getType();
3146dff0c46cSDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(IVTy);
314791bc56edSDimitry Andric const SCEV *LeftOverExpr = nullptr;
3148ff0cc061SDimitry Andric for (const IVInc &Inc : Chain) {
3149ff0cc061SDimitry Andric Instruction *InsertPt = Inc.UserInst;
3150dff0c46cSDimitry Andric if (isa<PHINode>(InsertPt))
3151dff0c46cSDimitry Andric InsertPt = L->getLoopLatch()->getTerminator();
3152dff0c46cSDimitry Andric
3153dff0c46cSDimitry Andric // IVOper will replace the current IV User's operand. IVSrc is the IV
3154dff0c46cSDimitry Andric // value currently held in a register.
3155dff0c46cSDimitry Andric Value *IVOper = IVSrc;
3156ff0cc061SDimitry Andric if (!Inc.IncExpr->isZero()) {
3157dff0c46cSDimitry Andric // IncExpr was the result of subtraction of two narrow values, so must
3158dff0c46cSDimitry Andric // be signed.
3159ff0cc061SDimitry Andric const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
3160dff0c46cSDimitry Andric LeftOverExpr = LeftOverExpr ?
3161dff0c46cSDimitry Andric SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3162dff0c46cSDimitry Andric }
3163dff0c46cSDimitry Andric if (LeftOverExpr && !LeftOverExpr->isZero()) {
3164dff0c46cSDimitry Andric // Expand the IV increment.
3165dff0c46cSDimitry Andric Rewriter.clearPostInc();
3166dff0c46cSDimitry Andric Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3167dff0c46cSDimitry Andric const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
3168dff0c46cSDimitry Andric SE.getUnknown(IncV));
3169dff0c46cSDimitry Andric IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3170dff0c46cSDimitry Andric
3171dff0c46cSDimitry Andric // If an IV increment can't be folded, use it as the next IV value.
3172ff0cc061SDimitry Andric if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
3173dff0c46cSDimitry Andric assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
3174dff0c46cSDimitry Andric IVSrc = IVOper;
317591bc56edSDimitry Andric LeftOverExpr = nullptr;
3176dff0c46cSDimitry Andric }
3177dff0c46cSDimitry Andric }
3178ff0cc061SDimitry Andric Type *OperTy = Inc.IVOperand->getType();
3179dff0c46cSDimitry Andric if (IVTy != OperTy) {
3180dff0c46cSDimitry Andric assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
3181dff0c46cSDimitry Andric "cannot extend a chained IV");
3182dff0c46cSDimitry Andric IRBuilder<> Builder(InsertPt);
3183dff0c46cSDimitry Andric IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
3184dff0c46cSDimitry Andric }
3185ff0cc061SDimitry Andric Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
318697bc6c73SDimitry Andric DeadInsts.emplace_back(Inc.IVOperand);
3187dff0c46cSDimitry Andric }
3188dff0c46cSDimitry Andric // If LSR created a new, wider phi, we may also replace its postinc. We only
3189dff0c46cSDimitry Andric // do this if we also found a wide value for the head of the chain.
31907ae0e2c9SDimitry Andric if (isa<PHINode>(Chain.tailUserInst())) {
319130785c0eSDimitry Andric for (PHINode &Phi : L->getHeader()->phis()) {
319230785c0eSDimitry Andric if (!isCompatibleIVType(&Phi, IVSrc))
3193dff0c46cSDimitry Andric continue;
3194dff0c46cSDimitry Andric Instruction *PostIncV = dyn_cast<Instruction>(
319530785c0eSDimitry Andric Phi.getIncomingValueForBlock(L->getLoopLatch()));
3196dff0c46cSDimitry Andric if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
3197dff0c46cSDimitry Andric continue;
3198dff0c46cSDimitry Andric Value *IVOper = IVSrc;
3199dff0c46cSDimitry Andric Type *PostIncTy = PostIncV->getType();
3200dff0c46cSDimitry Andric if (IVTy != PostIncTy) {
3201dff0c46cSDimitry Andric assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
3202dff0c46cSDimitry Andric IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3203dff0c46cSDimitry Andric Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3204dff0c46cSDimitry Andric IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
3205dff0c46cSDimitry Andric }
320630785c0eSDimitry Andric Phi.replaceUsesOfWith(PostIncV, IVOper);
320797bc6c73SDimitry Andric DeadInsts.emplace_back(PostIncV);
3208dff0c46cSDimitry Andric }
3209dff0c46cSDimitry Andric }
3210dff0c46cSDimitry Andric }
3211dff0c46cSDimitry Andric
CollectFixupsAndInitialFormulae()3212f22ef01cSRoman Divacky void LSRInstance::CollectFixupsAndInitialFormulae() {
3213ff0cc061SDimitry Andric for (const IVStrideUse &U : IU) {
3214ff0cc061SDimitry Andric Instruction *UserInst = U.getUser();
3215dff0c46cSDimitry Andric // Skip IV users that are part of profitable IV Chains.
3216d88c1a5aSDimitry Andric User::op_iterator UseI =
3217d88c1a5aSDimitry Andric find(UserInst->operands(), U.getOperandValToReplace());
3218dff0c46cSDimitry Andric assert(UseI != UserInst->op_end() && "cannot find IV operand");
32197a7e6055SDimitry Andric if (IVIncSet.count(UseI)) {
32204ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
3221dff0c46cSDimitry Andric continue;
32227a7e6055SDimitry Andric }
3223dff0c46cSDimitry Andric
3224f22ef01cSRoman Divacky LSRUse::KindType Kind = LSRUse::Basic;
32257d523365SDimitry Andric MemAccessTy AccessTy;
32262cab237bSDimitry Andric if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
3227f22ef01cSRoman Divacky Kind = LSRUse::Address;
32284ba319b5SDimitry Andric AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
3229f22ef01cSRoman Divacky }
3230f22ef01cSRoman Divacky
3231ff0cc061SDimitry Andric const SCEV *S = IU.getExpr(U);
3232d88c1a5aSDimitry Andric PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
3233f22ef01cSRoman Divacky
3234f22ef01cSRoman Divacky // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3235f22ef01cSRoman Divacky // (N - i == 0), and this allows (N - i) to be the expression that we work
3236f22ef01cSRoman Divacky // with rather than just N or i, so we can consider the register
3237f22ef01cSRoman Divacky // requirements for both N and i at the same time. Limiting this code to
3238f22ef01cSRoman Divacky // equality icmps is not a problem because all interesting loops use
3239f22ef01cSRoman Divacky // equality icmps, thanks to IndVarSimplify.
3240d88c1a5aSDimitry Andric if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
3241f22ef01cSRoman Divacky if (CI->isEquality()) {
3242f22ef01cSRoman Divacky // Swap the operands if needed to put the OperandValToReplace on the
3243f22ef01cSRoman Divacky // left, for consistency.
3244f22ef01cSRoman Divacky Value *NV = CI->getOperand(1);
3245d88c1a5aSDimitry Andric if (NV == U.getOperandValToReplace()) {
3246f22ef01cSRoman Divacky CI->setOperand(1, CI->getOperand(0));
3247f22ef01cSRoman Divacky CI->setOperand(0, NV);
3248f22ef01cSRoman Divacky NV = CI->getOperand(1);
3249f22ef01cSRoman Divacky Changed = true;
3250f22ef01cSRoman Divacky }
3251f22ef01cSRoman Divacky
3252f22ef01cSRoman Divacky // x == y --> x - y == 0
3253f22ef01cSRoman Divacky const SCEV *N = SE.getSCEV(NV);
3254f785676fSDimitry Andric if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
3255bd5abe19SDimitry Andric // S is normalized, so normalize N before folding it into S
3256bd5abe19SDimitry Andric // to keep the result normalized.
32577a7e6055SDimitry Andric N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
3258f22ef01cSRoman Divacky Kind = LSRUse::ICmpZero;
3259f22ef01cSRoman Divacky S = SE.getMinusSCEV(N, S);
3260f22ef01cSRoman Divacky }
3261f22ef01cSRoman Divacky
3262f22ef01cSRoman Divacky // -1 and the negations of all interesting strides (except the negation
3263f22ef01cSRoman Divacky // of -1) are now also interesting.
3264f22ef01cSRoman Divacky for (size_t i = 0, e = Factors.size(); i != e; ++i)
3265f22ef01cSRoman Divacky if (Factors[i] != -1)
3266f22ef01cSRoman Divacky Factors.insert(-(uint64_t)Factors[i]);
3267f22ef01cSRoman Divacky Factors.insert(-1);
3268f22ef01cSRoman Divacky }
3269f22ef01cSRoman Divacky
3270d88c1a5aSDimitry Andric // Get or create an LSRUse.
3271f22ef01cSRoman Divacky std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3272d88c1a5aSDimitry Andric size_t LUIdx = P.first;
3273d88c1a5aSDimitry Andric int64_t Offset = P.second;
3274d88c1a5aSDimitry Andric LSRUse &LU = Uses[LUIdx];
3275d88c1a5aSDimitry Andric
3276d88c1a5aSDimitry Andric // Record the fixup.
3277d88c1a5aSDimitry Andric LSRFixup &LF = LU.getNewFixup();
3278d88c1a5aSDimitry Andric LF.UserInst = UserInst;
3279d88c1a5aSDimitry Andric LF.OperandValToReplace = U.getOperandValToReplace();
3280d88c1a5aSDimitry Andric LF.PostIncLoops = TmpPostIncLoops;
3281d88c1a5aSDimitry Andric LF.Offset = Offset;
3282f22ef01cSRoman Divacky LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3283d88c1a5aSDimitry Andric
3284e580952dSDimitry Andric if (!LU.WidestFixupType ||
3285e580952dSDimitry Andric SE.getTypeSizeInBits(LU.WidestFixupType) <
3286e580952dSDimitry Andric SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3287e580952dSDimitry Andric LU.WidestFixupType = LF.OperandValToReplace->getType();
3288f22ef01cSRoman Divacky
3289f22ef01cSRoman Divacky // If this is the first use of this LSRUse, give it a formula.
3290f22ef01cSRoman Divacky if (LU.Formulae.empty()) {
3291d88c1a5aSDimitry Andric InsertInitialFormula(S, LU, LUIdx);
3292d88c1a5aSDimitry Andric CountRegisters(LU.Formulae.back(), LUIdx);
3293f22ef01cSRoman Divacky }
3294f22ef01cSRoman Divacky }
3295f22ef01cSRoman Divacky
32964ba319b5SDimitry Andric LLVM_DEBUG(print_fixups(dbgs()));
3297f22ef01cSRoman Divacky }
3298f22ef01cSRoman Divacky
32997d523365SDimitry Andric /// Insert a formula for the given expression into the given use, separating out
33007d523365SDimitry Andric /// loop-variant portions from loop-invariant and loop-computable portions.
3301f22ef01cSRoman Divacky void
InsertInitialFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3302f22ef01cSRoman Divacky LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3303f785676fSDimitry Andric // Mark uses whose expressions cannot be expanded.
3304f785676fSDimitry Andric if (!isSafeToExpand(S, SE))
3305f785676fSDimitry Andric LU.RigidFormula = true;
3306f785676fSDimitry Andric
3307f22ef01cSRoman Divacky Formula F;
33087d523365SDimitry Andric F.initialMatch(S, L, SE);
3309f22ef01cSRoman Divacky bool Inserted = InsertFormula(LU, LUIdx, F);
3310f22ef01cSRoman Divacky assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3311f22ef01cSRoman Divacky }
3312f22ef01cSRoman Divacky
33137d523365SDimitry Andric /// Insert a simple single-register formula for the given expression into the
33147d523365SDimitry Andric /// given use.
3315f22ef01cSRoman Divacky void
InsertSupplementalFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3316f22ef01cSRoman Divacky LSRInstance::InsertSupplementalFormula(const SCEV *S,
3317f22ef01cSRoman Divacky LSRUse &LU, size_t LUIdx) {
3318f22ef01cSRoman Divacky Formula F;
3319f22ef01cSRoman Divacky F.BaseRegs.push_back(S);
3320139f7f9bSDimitry Andric F.HasBaseReg = true;
3321f22ef01cSRoman Divacky bool Inserted = InsertFormula(LU, LUIdx, F);
3322f22ef01cSRoman Divacky assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3323f22ef01cSRoman Divacky }
3324f22ef01cSRoman Divacky
33257d523365SDimitry Andric /// Note which registers are used by the given formula, updating RegUses.
CountRegisters(const Formula & F,size_t LUIdx)3326f22ef01cSRoman Divacky void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3327f22ef01cSRoman Divacky if (F.ScaledReg)
33287d523365SDimitry Andric RegUses.countRegister(F.ScaledReg, LUIdx);
3329ff0cc061SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs)
33307d523365SDimitry Andric RegUses.countRegister(BaseReg, LUIdx);
3331f22ef01cSRoman Divacky }
3332f22ef01cSRoman Divacky
33337d523365SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
33347d523365SDimitry Andric /// return true. Return false otherwise.
InsertFormula(LSRUse & LU,unsigned LUIdx,const Formula & F)3335f22ef01cSRoman Divacky bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
333691bc56edSDimitry Andric // Do not insert formula that we will not be able to expand.
333791bc56edSDimitry Andric assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
333891bc56edSDimitry Andric "Formula is illegal");
33397a7e6055SDimitry Andric
33407a7e6055SDimitry Andric if (!LU.InsertFormula(F, *L))
3341f22ef01cSRoman Divacky return false;
3342f22ef01cSRoman Divacky
3343f22ef01cSRoman Divacky CountRegisters(F, LUIdx);
3344f22ef01cSRoman Divacky return true;
3345f22ef01cSRoman Divacky }
3346f22ef01cSRoman Divacky
33477d523365SDimitry Andric /// Check for other uses of loop-invariant values which we're tracking. These
33487d523365SDimitry Andric /// other uses will pin these values in registers, making them less profitable
33497d523365SDimitry Andric /// for elimination.
3350f22ef01cSRoman Divacky /// TODO: This currently misses non-constant addrec step registers.
3351f22ef01cSRoman Divacky /// TODO: Should this give more weight to users inside the loop?
3352f22ef01cSRoman Divacky void
CollectLoopInvariantFixupsAndFormulae()3353f22ef01cSRoman Divacky LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3354f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
335539d628a0SDimitry Andric SmallPtrSet<const SCEV *, 32> Visited;
3356f22ef01cSRoman Divacky
3357f22ef01cSRoman Divacky while (!Worklist.empty()) {
3358f22ef01cSRoman Divacky const SCEV *S = Worklist.pop_back_val();
3359f22ef01cSRoman Divacky
336039d628a0SDimitry Andric // Don't process the same SCEV twice
336139d628a0SDimitry Andric if (!Visited.insert(S).second)
336239d628a0SDimitry Andric continue;
336339d628a0SDimitry Andric
3364f22ef01cSRoman Divacky if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3365ffd1746dSEd Schouten Worklist.append(N->op_begin(), N->op_end());
3366f22ef01cSRoman Divacky else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3367f22ef01cSRoman Divacky Worklist.push_back(C->getOperand());
3368f22ef01cSRoman Divacky else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3369f22ef01cSRoman Divacky Worklist.push_back(D->getLHS());
3370f22ef01cSRoman Divacky Worklist.push_back(D->getRHS());
337191bc56edSDimitry Andric } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
337291bc56edSDimitry Andric const Value *V = US->getValue();
3373ffd1746dSEd Schouten if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3374ffd1746dSEd Schouten // Look for instructions defined outside the loop.
3375f22ef01cSRoman Divacky if (L->contains(Inst)) continue;
3376ffd1746dSEd Schouten } else if (isa<UndefValue>(V))
3377ffd1746dSEd Schouten // Undef doesn't have a live range, so it doesn't matter.
3378ffd1746dSEd Schouten continue;
337991bc56edSDimitry Andric for (const Use &U : V->uses()) {
338091bc56edSDimitry Andric const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3381f22ef01cSRoman Divacky // Ignore non-instructions.
3382f22ef01cSRoman Divacky if (!UserInst)
3383f22ef01cSRoman Divacky continue;
3384f22ef01cSRoman Divacky // Ignore instructions in other functions (as can happen with
3385f22ef01cSRoman Divacky // Constants).
3386f22ef01cSRoman Divacky if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3387f22ef01cSRoman Divacky continue;
3388f22ef01cSRoman Divacky // Ignore instructions not dominated by the loop.
3389f22ef01cSRoman Divacky const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3390f22ef01cSRoman Divacky UserInst->getParent() :
3391f22ef01cSRoman Divacky cast<PHINode>(UserInst)->getIncomingBlock(
339291bc56edSDimitry Andric PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3393f22ef01cSRoman Divacky if (!DT.dominates(L->getHeader(), UseBB))
3394f22ef01cSRoman Divacky continue;
33957d523365SDimitry Andric // Don't bother if the instruction is in a BB which ends in an EHPad.
33967d523365SDimitry Andric if (UseBB->getTerminator()->isEHPad())
33977d523365SDimitry Andric continue;
339894c53d40SDimitry Andric // Don't bother rewriting PHIs in catchswitch blocks.
339994c53d40SDimitry Andric if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
340094c53d40SDimitry Andric continue;
3401f22ef01cSRoman Divacky // Ignore uses which are part of other SCEV expressions, to avoid
3402f22ef01cSRoman Divacky // analyzing them multiple times.
3403f22ef01cSRoman Divacky if (SE.isSCEVable(UserInst->getType())) {
3404f22ef01cSRoman Divacky const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3405f22ef01cSRoman Divacky // If the user is a no-op, look through to its uses.
3406f22ef01cSRoman Divacky if (!isa<SCEVUnknown>(UserS))
3407f22ef01cSRoman Divacky continue;
340891bc56edSDimitry Andric if (UserS == US) {
3409f22ef01cSRoman Divacky Worklist.push_back(
3410f22ef01cSRoman Divacky SE.getUnknown(const_cast<Instruction *>(UserInst)));
3411f22ef01cSRoman Divacky continue;
3412f22ef01cSRoman Divacky }
3413f22ef01cSRoman Divacky }
3414f22ef01cSRoman Divacky // Ignore icmp instructions which are already being analyzed.
3415f22ef01cSRoman Divacky if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
341691bc56edSDimitry Andric unsigned OtherIdx = !U.getOperandNo();
3417f22ef01cSRoman Divacky Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
34182754fe60SDimitry Andric if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3419f22ef01cSRoman Divacky continue;
3420f22ef01cSRoman Divacky }
3421f22ef01cSRoman Divacky
34227d523365SDimitry Andric std::pair<size_t, int64_t> P = getUse(
34237d523365SDimitry Andric S, LSRUse::Basic, MemAccessTy());
3424d88c1a5aSDimitry Andric size_t LUIdx = P.first;
3425d88c1a5aSDimitry Andric int64_t Offset = P.second;
3426d88c1a5aSDimitry Andric LSRUse &LU = Uses[LUIdx];
3427d88c1a5aSDimitry Andric LSRFixup &LF = LU.getNewFixup();
3428d88c1a5aSDimitry Andric LF.UserInst = const_cast<Instruction *>(UserInst);
3429d88c1a5aSDimitry Andric LF.OperandValToReplace = U;
3430d88c1a5aSDimitry Andric LF.Offset = Offset;
3431f22ef01cSRoman Divacky LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3432e580952dSDimitry Andric if (!LU.WidestFixupType ||
3433e580952dSDimitry Andric SE.getTypeSizeInBits(LU.WidestFixupType) <
3434e580952dSDimitry Andric SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3435e580952dSDimitry Andric LU.WidestFixupType = LF.OperandValToReplace->getType();
3436d88c1a5aSDimitry Andric InsertSupplementalFormula(US, LU, LUIdx);
3437f22ef01cSRoman Divacky CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3438f22ef01cSRoman Divacky break;
3439f22ef01cSRoman Divacky }
3440f22ef01cSRoman Divacky }
3441f22ef01cSRoman Divacky }
3442f22ef01cSRoman Divacky }
3443f22ef01cSRoman Divacky
34447d523365SDimitry Andric /// Split S into subexpressions which can be pulled out into separate
34457d523365SDimitry Andric /// registers. If C is non-null, multiply each subexpression by C.
34467ae0e2c9SDimitry Andric ///
34477ae0e2c9SDimitry Andric /// Return remainder expression after factoring the subexpressions captured by
34487ae0e2c9SDimitry Andric /// Ops. If Ops is complete, return NULL.
CollectSubexprs(const SCEV * S,const SCEVConstant * C,SmallVectorImpl<const SCEV * > & Ops,const Loop * L,ScalarEvolution & SE,unsigned Depth=0)34497ae0e2c9SDimitry Andric static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3450f22ef01cSRoman Divacky SmallVectorImpl<const SCEV *> &Ops,
3451ffd1746dSEd Schouten const Loop *L,
34527ae0e2c9SDimitry Andric ScalarEvolution &SE,
34537ae0e2c9SDimitry Andric unsigned Depth = 0) {
34547ae0e2c9SDimitry Andric // Arbitrarily cap recursion to protect compile time.
34557ae0e2c9SDimitry Andric if (Depth >= 3)
34567ae0e2c9SDimitry Andric return S;
34577ae0e2c9SDimitry Andric
3458f22ef01cSRoman Divacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3459f22ef01cSRoman Divacky // Break out add operands.
3460ff0cc061SDimitry Andric for (const SCEV *S : Add->operands()) {
3461ff0cc061SDimitry Andric const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
34627ae0e2c9SDimitry Andric if (Remainder)
34637ae0e2c9SDimitry Andric Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
34647ae0e2c9SDimitry Andric }
346591bc56edSDimitry Andric return nullptr;
3466f22ef01cSRoman Divacky } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3467f22ef01cSRoman Divacky // Split a non-zero base out of an addrec.
3468d88c1a5aSDimitry Andric if (AR->getStart()->isZero() || !AR->isAffine())
34697ae0e2c9SDimitry Andric return S;
34707ae0e2c9SDimitry Andric
34717ae0e2c9SDimitry Andric const SCEV *Remainder = CollectSubexprs(AR->getStart(),
34727ae0e2c9SDimitry Andric C, Ops, L, SE, Depth+1);
34737ae0e2c9SDimitry Andric // Split the non-zero AddRec unless it is part of a nested recurrence that
34747ae0e2c9SDimitry Andric // does not pertain to this loop.
34757ae0e2c9SDimitry Andric if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
34767ae0e2c9SDimitry Andric Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
347791bc56edSDimitry Andric Remainder = nullptr;
34787ae0e2c9SDimitry Andric }
34797ae0e2c9SDimitry Andric if (Remainder != AR->getStart()) {
34807ae0e2c9SDimitry Andric if (!Remainder)
34817ae0e2c9SDimitry Andric Remainder = SE.getConstant(AR->getType(), 0);
34827ae0e2c9SDimitry Andric return SE.getAddRecExpr(Remainder,
3483f22ef01cSRoman Divacky AR->getStepRecurrence(SE),
34843b0f4066SDimitry Andric AR->getLoop(),
34853b0f4066SDimitry Andric //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
34867ae0e2c9SDimitry Andric SCEV::FlagAnyWrap);
3487f22ef01cSRoman Divacky }
3488f22ef01cSRoman Divacky } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3489f22ef01cSRoman Divacky // Break (C * (a + b + c)) into C*a + C*b + C*c.
34907ae0e2c9SDimitry Andric if (Mul->getNumOperands() != 2)
34917ae0e2c9SDimitry Andric return S;
3492f22ef01cSRoman Divacky if (const SCEVConstant *Op0 =
3493f22ef01cSRoman Divacky dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
34947ae0e2c9SDimitry Andric C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
34957ae0e2c9SDimitry Andric const SCEV *Remainder =
34967ae0e2c9SDimitry Andric CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
34977ae0e2c9SDimitry Andric if (Remainder)
34987ae0e2c9SDimitry Andric Ops.push_back(SE.getMulExpr(C, Remainder));
349991bc56edSDimitry Andric return nullptr;
3500f22ef01cSRoman Divacky }
3501f22ef01cSRoman Divacky }
35027ae0e2c9SDimitry Andric return S;
3503f22ef01cSRoman Divacky }
3504f22ef01cSRoman Divacky
35054ba319b5SDimitry Andric /// Return true if the SCEV represents a value that may end up as a
35064ba319b5SDimitry Andric /// post-increment operation.
mayUsePostIncMode(const TargetTransformInfo & TTI,LSRUse & LU,const SCEV * S,const Loop * L,ScalarEvolution & SE)35074ba319b5SDimitry Andric static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
35084ba319b5SDimitry Andric LSRUse &LU, const SCEV *S, const Loop *L,
35094ba319b5SDimitry Andric ScalarEvolution &SE) {
35104ba319b5SDimitry Andric if (LU.Kind != LSRUse::Address ||
35114ba319b5SDimitry Andric !LU.AccessTy.getType()->isIntOrIntVectorTy())
35124ba319b5SDimitry Andric return false;
35134ba319b5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
35144ba319b5SDimitry Andric if (!AR)
35154ba319b5SDimitry Andric return false;
35164ba319b5SDimitry Andric const SCEV *LoopStep = AR->getStepRecurrence(SE);
35174ba319b5SDimitry Andric if (!isa<SCEVConstant>(LoopStep))
35184ba319b5SDimitry Andric return false;
35194ba319b5SDimitry Andric if (LU.AccessTy.getType()->getScalarSizeInBits() !=
35204ba319b5SDimitry Andric LoopStep->getType()->getScalarSizeInBits())
35214ba319b5SDimitry Andric return false;
35224ba319b5SDimitry Andric // Check if a post-indexed load/store can be used.
35234ba319b5SDimitry Andric if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
35244ba319b5SDimitry Andric TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
35254ba319b5SDimitry Andric const SCEV *LoopStart = AR->getStart();
35264ba319b5SDimitry Andric if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
35274ba319b5SDimitry Andric return true;
35284ba319b5SDimitry Andric }
35294ba319b5SDimitry Andric return false;
35304ba319b5SDimitry Andric }
35314ba319b5SDimitry Andric
35324ba319b5SDimitry Andric /// Helper function for LSRInstance::GenerateReassociations.
GenerateReassociationsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,unsigned Depth,size_t Idx,bool IsScaledReg)353391bc56edSDimitry Andric void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
353491bc56edSDimitry Andric const Formula &Base,
353591bc56edSDimitry Andric unsigned Depth, size_t Idx,
353691bc56edSDimitry Andric bool IsScaledReg) {
353791bc56edSDimitry Andric const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
35384ba319b5SDimitry Andric // Don't generate reassociations for the base register of a value that
35394ba319b5SDimitry Andric // may generate a post-increment operator. The reason is that the
35404ba319b5SDimitry Andric // reassociations cause extra base+register formula to be created,
35414ba319b5SDimitry Andric // and possibly chosen, but the post-increment is more efficient.
35424ba319b5SDimitry Andric if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
35434ba319b5SDimitry Andric return;
3544e580952dSDimitry Andric SmallVector<const SCEV *, 8> AddOps;
354591bc56edSDimitry Andric const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
35467ae0e2c9SDimitry Andric if (Remainder)
35477ae0e2c9SDimitry Andric AddOps.push_back(Remainder);
3548ffd1746dSEd Schouten
354991bc56edSDimitry Andric if (AddOps.size() == 1)
355091bc56edSDimitry Andric return;
3551f22ef01cSRoman Divacky
3552f22ef01cSRoman Divacky for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
355391bc56edSDimitry Andric JE = AddOps.end();
355491bc56edSDimitry Andric J != JE; ++J) {
3555e580952dSDimitry Andric // Loop-variant "unknown" values are uninteresting; we won't be able to
3556e580952dSDimitry Andric // do anything meaningful with them.
35572754fe60SDimitry Andric if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3558e580952dSDimitry Andric continue;
3559e580952dSDimitry Andric
3560f22ef01cSRoman Divacky // Don't pull a constant into a register if the constant could be folded
3561f22ef01cSRoman Divacky // into an immediate field.
3562139f7f9bSDimitry Andric if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3563139f7f9bSDimitry Andric LU.AccessTy, *J, Base.getNumRegs() > 1))
3564f22ef01cSRoman Divacky continue;
3565f22ef01cSRoman Divacky
3566f22ef01cSRoman Divacky // Collect all operands except *J.
356791bc56edSDimitry Andric SmallVector<const SCEV *, 8> InnerAddOps(
356891bc56edSDimitry Andric ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
356991bc56edSDimitry Andric InnerAddOps.append(std::next(J),
357091bc56edSDimitry Andric ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3571f22ef01cSRoman Divacky
3572f22ef01cSRoman Divacky // Don't leave just a constant behind in a register if the constant could
3573f22ef01cSRoman Divacky // be folded into an immediate field.
3574f22ef01cSRoman Divacky if (InnerAddOps.size() == 1 &&
3575139f7f9bSDimitry Andric isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3576139f7f9bSDimitry Andric LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3577f22ef01cSRoman Divacky continue;
3578f22ef01cSRoman Divacky
3579f22ef01cSRoman Divacky const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3580f22ef01cSRoman Divacky if (InnerSum->isZero())
3581f22ef01cSRoman Divacky continue;
3582f22ef01cSRoman Divacky Formula F = Base;
3583bd5abe19SDimitry Andric
3584bd5abe19SDimitry Andric // Add the remaining pieces of the add back into the new formula.
3585bd5abe19SDimitry Andric const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
358691bc56edSDimitry Andric if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3587139f7f9bSDimitry Andric TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3588bd5abe19SDimitry Andric InnerSumSC->getValue()->getZExtValue())) {
358991bc56edSDimitry Andric F.UnfoldedOffset =
359091bc56edSDimitry Andric (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
359191bc56edSDimitry Andric if (IsScaledReg)
359291bc56edSDimitry Andric F.ScaledReg = nullptr;
359391bc56edSDimitry Andric else
359491bc56edSDimitry Andric F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
359591bc56edSDimitry Andric } else if (IsScaledReg)
359691bc56edSDimitry Andric F.ScaledReg = InnerSum;
359791bc56edSDimitry Andric else
359891bc56edSDimitry Andric F.BaseRegs[Idx] = InnerSum;
3599bd5abe19SDimitry Andric
3600bd5abe19SDimitry Andric // Add J as its own register, or an unfolded immediate.
3601bd5abe19SDimitry Andric const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3602139f7f9bSDimitry Andric if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3603139f7f9bSDimitry Andric TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3604bd5abe19SDimitry Andric SC->getValue()->getZExtValue()))
360591bc56edSDimitry Andric F.UnfoldedOffset =
360691bc56edSDimitry Andric (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3607bd5abe19SDimitry Andric else
3608f22ef01cSRoman Divacky F.BaseRegs.push_back(*J);
360991bc56edSDimitry Andric // We may have changed the number of register in base regs, adjust the
361091bc56edSDimitry Andric // formula accordingly.
36117a7e6055SDimitry Andric F.canonicalize(*L);
3612bd5abe19SDimitry Andric
3613f22ef01cSRoman Divacky if (InsertFormula(LU, LUIdx, F))
3614f22ef01cSRoman Divacky // If that formula hadn't been seen before, recurse to find more like
3615f22ef01cSRoman Divacky // it.
36164ba319b5SDimitry Andric // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
36174ba319b5SDimitry Andric // Because just Depth is not enough to bound compile time.
36184ba319b5SDimitry Andric // This means that every time AddOps.size() is greater 16^x we will add
36194ba319b5SDimitry Andric // x to Depth.
36204ba319b5SDimitry Andric GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
36214ba319b5SDimitry Andric Depth + 1 + (Log2_32(AddOps.size()) >> 2));
3622f22ef01cSRoman Divacky }
3623f22ef01cSRoman Divacky }
362491bc56edSDimitry Andric
36257d523365SDimitry Andric /// Split out subexpressions from adds and the bases of addrecs.
GenerateReassociations(LSRUse & LU,unsigned LUIdx,Formula Base,unsigned Depth)362691bc56edSDimitry Andric void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
362791bc56edSDimitry Andric Formula Base, unsigned Depth) {
36287a7e6055SDimitry Andric assert(Base.isCanonical(*L) && "Input must be in the canonical form");
362991bc56edSDimitry Andric // Arbitrarily cap recursion to protect compile time.
363091bc56edSDimitry Andric if (Depth >= 3)
363191bc56edSDimitry Andric return;
363291bc56edSDimitry Andric
363391bc56edSDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
363491bc56edSDimitry Andric GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
363591bc56edSDimitry Andric
363691bc56edSDimitry Andric if (Base.Scale == 1)
363791bc56edSDimitry Andric GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
363891bc56edSDimitry Andric /* Idx */ -1, /* IsScaledReg */ true);
3639f22ef01cSRoman Divacky }
3640f22ef01cSRoman Divacky
36417d523365SDimitry Andric /// Generate a formula consisting of all of the loop-dominating registers added
36427d523365SDimitry Andric /// into a single register.
GenerateCombinations(LSRUse & LU,unsigned LUIdx,Formula Base)3643f22ef01cSRoman Divacky void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3644f22ef01cSRoman Divacky Formula Base) {
3645f22ef01cSRoman Divacky // This method is only interesting on a plurality of registers.
3646*b5893f02SDimitry Andric if (Base.BaseRegs.size() + (Base.Scale == 1) +
3647*b5893f02SDimitry Andric (Base.UnfoldedOffset != 0) <= 1)
364891bc56edSDimitry Andric return;
3649f22ef01cSRoman Divacky
365091bc56edSDimitry Andric // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
365191bc56edSDimitry Andric // processing the formula.
36527d523365SDimitry Andric Base.unscale();
3653f22ef01cSRoman Divacky SmallVector<const SCEV *, 4> Ops;
3654*b5893f02SDimitry Andric Formula NewBase = Base;
3655*b5893f02SDimitry Andric NewBase.BaseRegs.clear();
3656*b5893f02SDimitry Andric Type *CombinedIntegerType = nullptr;
3657ff0cc061SDimitry Andric for (const SCEV *BaseReg : Base.BaseRegs) {
36582754fe60SDimitry Andric if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3659*b5893f02SDimitry Andric !SE.hasComputableLoopEvolution(BaseReg, L)) {
3660*b5893f02SDimitry Andric if (!CombinedIntegerType)
3661*b5893f02SDimitry Andric CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
3662f22ef01cSRoman Divacky Ops.push_back(BaseReg);
3663f22ef01cSRoman Divacky }
3664*b5893f02SDimitry Andric else
3665*b5893f02SDimitry Andric NewBase.BaseRegs.push_back(BaseReg);
3666*b5893f02SDimitry Andric }
3667*b5893f02SDimitry Andric
3668*b5893f02SDimitry Andric // If no register is relevant, we're done.
3669*b5893f02SDimitry Andric if (Ops.size() == 0)
3670*b5893f02SDimitry Andric return;
3671*b5893f02SDimitry Andric
3672*b5893f02SDimitry Andric // Utility function for generating the required variants of the combined
3673*b5893f02SDimitry Andric // registers.
3674*b5893f02SDimitry Andric auto GenerateFormula = [&](const SCEV *Sum) {
3675*b5893f02SDimitry Andric Formula F = NewBase;
3676*b5893f02SDimitry Andric
3677f22ef01cSRoman Divacky // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3678f22ef01cSRoman Divacky // opportunity to fold something. For now, just ignore such cases
3679f22ef01cSRoman Divacky // rather than proceed with zero in a register.
3680*b5893f02SDimitry Andric if (Sum->isZero())
3681*b5893f02SDimitry Andric return;
3682*b5893f02SDimitry Andric
3683f22ef01cSRoman Divacky F.BaseRegs.push_back(Sum);
36847a7e6055SDimitry Andric F.canonicalize(*L);
3685f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, F);
3686*b5893f02SDimitry Andric };
3687*b5893f02SDimitry Andric
3688*b5893f02SDimitry Andric // If we collected at least two registers, generate a formula combining them.
3689*b5893f02SDimitry Andric if (Ops.size() > 1) {
3690*b5893f02SDimitry Andric SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
3691*b5893f02SDimitry Andric GenerateFormula(SE.getAddExpr(OpsCopy));
3692f22ef01cSRoman Divacky }
3693*b5893f02SDimitry Andric
3694*b5893f02SDimitry Andric // If we have an unfolded offset, generate a formula combining it with the
3695*b5893f02SDimitry Andric // registers collected.
3696*b5893f02SDimitry Andric if (NewBase.UnfoldedOffset) {
3697*b5893f02SDimitry Andric assert(CombinedIntegerType && "Missing a type for the unfolded offset");
3698*b5893f02SDimitry Andric Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
3699*b5893f02SDimitry Andric true));
3700*b5893f02SDimitry Andric NewBase.UnfoldedOffset = 0;
3701*b5893f02SDimitry Andric GenerateFormula(SE.getAddExpr(Ops));
3702f22ef01cSRoman Divacky }
3703f22ef01cSRoman Divacky }
3704f22ef01cSRoman Divacky
37054ba319b5SDimitry Andric /// Helper function for LSRInstance::GenerateSymbolicOffsets.
GenerateSymbolicOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,size_t Idx,bool IsScaledReg)370691bc56edSDimitry Andric void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
370791bc56edSDimitry Andric const Formula &Base, size_t Idx,
370891bc56edSDimitry Andric bool IsScaledReg) {
370991bc56edSDimitry Andric const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
371091bc56edSDimitry Andric GlobalValue *GV = ExtractSymbol(G, SE);
371191bc56edSDimitry Andric if (G->isZero() || !GV)
371291bc56edSDimitry Andric return;
371391bc56edSDimitry Andric Formula F = Base;
371491bc56edSDimitry Andric F.BaseGV = GV;
371591bc56edSDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
371691bc56edSDimitry Andric return;
371791bc56edSDimitry Andric if (IsScaledReg)
371891bc56edSDimitry Andric F.ScaledReg = G;
371991bc56edSDimitry Andric else
372091bc56edSDimitry Andric F.BaseRegs[Idx] = G;
372191bc56edSDimitry Andric (void)InsertFormula(LU, LUIdx, F);
372291bc56edSDimitry Andric }
372391bc56edSDimitry Andric
37247d523365SDimitry Andric /// Generate reuse formulae using symbolic offsets.
GenerateSymbolicOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)3725f22ef01cSRoman Divacky void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3726f22ef01cSRoman Divacky Formula Base) {
3727f22ef01cSRoman Divacky // We can't add a symbolic offset if the address already contains one.
3728139f7f9bSDimitry Andric if (Base.BaseGV) return;
3729f22ef01cSRoman Divacky
373091bc56edSDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
373191bc56edSDimitry Andric GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
373291bc56edSDimitry Andric if (Base.Scale == 1)
373391bc56edSDimitry Andric GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
373491bc56edSDimitry Andric /* IsScaledReg */ true);
373591bc56edSDimitry Andric }
373691bc56edSDimitry Andric
37374ba319b5SDimitry Andric /// Helper function for LSRInstance::GenerateConstantOffsets.
GenerateConstantOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,const SmallVectorImpl<int64_t> & Worklist,size_t Idx,bool IsScaledReg)373891bc56edSDimitry Andric void LSRInstance::GenerateConstantOffsetsImpl(
373991bc56edSDimitry Andric LSRUse &LU, unsigned LUIdx, const Formula &Base,
374091bc56edSDimitry Andric const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
374191bc56edSDimitry Andric const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3742ff0cc061SDimitry Andric for (int64_t Offset : Worklist) {
3743f22ef01cSRoman Divacky Formula F = Base;
3744ff0cc061SDimitry Andric F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3745ff0cc061SDimitry Andric if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
374691bc56edSDimitry Andric LU.AccessTy, F)) {
374791bc56edSDimitry Andric // Add the offset to the base register.
3748ff0cc061SDimitry Andric const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
374991bc56edSDimitry Andric // If it cancelled out, drop the base register, otherwise update it.
375091bc56edSDimitry Andric if (NewG->isZero()) {
375191bc56edSDimitry Andric if (IsScaledReg) {
375291bc56edSDimitry Andric F.Scale = 0;
375391bc56edSDimitry Andric F.ScaledReg = nullptr;
375491bc56edSDimitry Andric } else
37557d523365SDimitry Andric F.deleteBaseReg(F.BaseRegs[Idx]);
37567a7e6055SDimitry Andric F.canonicalize(*L);
375791bc56edSDimitry Andric } else if (IsScaledReg)
375891bc56edSDimitry Andric F.ScaledReg = NewG;
375991bc56edSDimitry Andric else
376091bc56edSDimitry Andric F.BaseRegs[Idx] = NewG;
376191bc56edSDimitry Andric
3762f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, F);
3763f22ef01cSRoman Divacky }
3764f22ef01cSRoman Divacky }
3765f22ef01cSRoman Divacky
376691bc56edSDimitry Andric int64_t Imm = ExtractImmediate(G, SE);
376791bc56edSDimitry Andric if (G->isZero() || Imm == 0)
376891bc56edSDimitry Andric return;
376991bc56edSDimitry Andric Formula F = Base;
377091bc56edSDimitry Andric F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
377191bc56edSDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
377291bc56edSDimitry Andric return;
377391bc56edSDimitry Andric if (IsScaledReg)
377491bc56edSDimitry Andric F.ScaledReg = G;
377591bc56edSDimitry Andric else
377691bc56edSDimitry Andric F.BaseRegs[Idx] = G;
377791bc56edSDimitry Andric (void)InsertFormula(LU, LUIdx, F);
377891bc56edSDimitry Andric }
377991bc56edSDimitry Andric
3780f22ef01cSRoman Divacky /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
GenerateConstantOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)3781f22ef01cSRoman Divacky void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3782f22ef01cSRoman Divacky Formula Base) {
3783f22ef01cSRoman Divacky // TODO: For now, just add the min and max offset, because it usually isn't
3784f22ef01cSRoman Divacky // worthwhile looking at everything inbetween.
3785ffd1746dSEd Schouten SmallVector<int64_t, 2> Worklist;
3786f22ef01cSRoman Divacky Worklist.push_back(LU.MinOffset);
3787f22ef01cSRoman Divacky if (LU.MaxOffset != LU.MinOffset)
3788f22ef01cSRoman Divacky Worklist.push_back(LU.MaxOffset);
3789f22ef01cSRoman Divacky
379091bc56edSDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
379191bc56edSDimitry Andric GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
379291bc56edSDimitry Andric if (Base.Scale == 1)
379391bc56edSDimitry Andric GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
379491bc56edSDimitry Andric /* IsScaledReg */ true);
3795f22ef01cSRoman Divacky }
3796f22ef01cSRoman Divacky
37977d523365SDimitry Andric /// For ICmpZero, check to see if we can scale up the comparison. For example, x
37987d523365SDimitry Andric /// == y -> x*c == y*c.
GenerateICmpZeroScales(LSRUse & LU,unsigned LUIdx,Formula Base)3799f22ef01cSRoman Divacky void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3800f22ef01cSRoman Divacky Formula Base) {
3801f22ef01cSRoman Divacky if (LU.Kind != LSRUse::ICmpZero) return;
3802f22ef01cSRoman Divacky
3803f22ef01cSRoman Divacky // Determine the integer type for the base formula.
38046122f3e6SDimitry Andric Type *IntTy = Base.getType();
3805f22ef01cSRoman Divacky if (!IntTy) return;
3806f22ef01cSRoman Divacky if (SE.getTypeSizeInBits(IntTy) > 64) return;
3807f22ef01cSRoman Divacky
3808f22ef01cSRoman Divacky // Don't do this if there is more than one offset.
3809f22ef01cSRoman Divacky if (LU.MinOffset != LU.MaxOffset) return;
3810f22ef01cSRoman Divacky
38112cab237bSDimitry Andric // Check if transformation is valid. It is illegal to multiply pointer.
38122cab237bSDimitry Andric if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
38132cab237bSDimitry Andric return;
38142cab237bSDimitry Andric for (const SCEV *BaseReg : Base.BaseRegs)
38152cab237bSDimitry Andric if (BaseReg->getType()->isPointerTy())
38162cab237bSDimitry Andric return;
3817139f7f9bSDimitry Andric assert(!Base.BaseGV && "ICmpZero use is not legal!");
3818f22ef01cSRoman Divacky
3819f22ef01cSRoman Divacky // Check each interesting stride.
3820ff0cc061SDimitry Andric for (int64_t Factor : Factors) {
3821f22ef01cSRoman Divacky // Check that the multiplication doesn't overflow.
38222cab237bSDimitry Andric if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
3823f22ef01cSRoman Divacky continue;
3824139f7f9bSDimitry Andric int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3825139f7f9bSDimitry Andric if (NewBaseOffset / Factor != Base.BaseOffset)
3826f22ef01cSRoman Divacky continue;
382785d60e68SDimitry Andric // If the offset will be truncated at this use, check that it is in bounds.
382885d60e68SDimitry Andric if (!IntTy->isPointerTy() &&
382985d60e68SDimitry Andric !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
383085d60e68SDimitry Andric continue;
3831f22ef01cSRoman Divacky
3832f22ef01cSRoman Divacky // Check that multiplying with the use offset doesn't overflow.
3833f22ef01cSRoman Divacky int64_t Offset = LU.MinOffset;
38342cab237bSDimitry Andric if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
3835f22ef01cSRoman Divacky continue;
3836f22ef01cSRoman Divacky Offset = (uint64_t)Offset * Factor;
3837f22ef01cSRoman Divacky if (Offset / Factor != LU.MinOffset)
3838f22ef01cSRoman Divacky continue;
383985d60e68SDimitry Andric // If the offset will be truncated at this use, check that it is in bounds.
384085d60e68SDimitry Andric if (!IntTy->isPointerTy() &&
384185d60e68SDimitry Andric !ConstantInt::isValueValidForType(IntTy, Offset))
384285d60e68SDimitry Andric continue;
3843f22ef01cSRoman Divacky
3844ffd1746dSEd Schouten Formula F = Base;
3845139f7f9bSDimitry Andric F.BaseOffset = NewBaseOffset;
3846ffd1746dSEd Schouten
3847f22ef01cSRoman Divacky // Check that this scale is legal.
3848139f7f9bSDimitry Andric if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3849f22ef01cSRoman Divacky continue;
3850f22ef01cSRoman Divacky
3851f22ef01cSRoman Divacky // Compensate for the use having MinOffset built into it.
3852139f7f9bSDimitry Andric F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3853f22ef01cSRoman Divacky
3854f22ef01cSRoman Divacky const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3855f22ef01cSRoman Divacky
3856f22ef01cSRoman Divacky // Check that multiplying with each base register doesn't overflow.
3857f22ef01cSRoman Divacky for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3858f22ef01cSRoman Divacky F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3859f22ef01cSRoman Divacky if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3860f22ef01cSRoman Divacky goto next;
3861f22ef01cSRoman Divacky }
3862f22ef01cSRoman Divacky
3863f22ef01cSRoman Divacky // Check that multiplying with the scaled register doesn't overflow.
3864f22ef01cSRoman Divacky if (F.ScaledReg) {
3865f22ef01cSRoman Divacky F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3866f22ef01cSRoman Divacky if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3867f22ef01cSRoman Divacky continue;
3868f22ef01cSRoman Divacky }
3869f22ef01cSRoman Divacky
3870bd5abe19SDimitry Andric // Check that multiplying with the unfolded offset doesn't overflow.
3871bd5abe19SDimitry Andric if (F.UnfoldedOffset != 0) {
38722cab237bSDimitry Andric if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
38732cab237bSDimitry Andric Factor == -1)
3874bd5abe19SDimitry Andric continue;
3875bd5abe19SDimitry Andric F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3876bd5abe19SDimitry Andric if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3877bd5abe19SDimitry Andric continue;
387885d60e68SDimitry Andric // If the offset will be truncated, check that it is in bounds.
387985d60e68SDimitry Andric if (!IntTy->isPointerTy() &&
388085d60e68SDimitry Andric !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
388185d60e68SDimitry Andric continue;
3882bd5abe19SDimitry Andric }
3883bd5abe19SDimitry Andric
3884f22ef01cSRoman Divacky // If we make it here and it's legal, add it.
3885f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, F);
3886f22ef01cSRoman Divacky next:;
3887f22ef01cSRoman Divacky }
3888f22ef01cSRoman Divacky }
3889f22ef01cSRoman Divacky
38907d523365SDimitry Andric /// Generate stride factor reuse formulae by making use of scaled-offset address
38917d523365SDimitry Andric /// modes, for example.
GenerateScales(LSRUse & LU,unsigned LUIdx,Formula Base)3892f22ef01cSRoman Divacky void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3893f22ef01cSRoman Divacky // Determine the integer type for the base formula.
38946122f3e6SDimitry Andric Type *IntTy = Base.getType();
3895f22ef01cSRoman Divacky if (!IntTy) return;
3896f22ef01cSRoman Divacky
3897f22ef01cSRoman Divacky // If this Formula already has a scaled register, we can't add another one.
389891bc56edSDimitry Andric // Try to unscale the formula to generate a better scale.
38997d523365SDimitry Andric if (Base.Scale != 0 && !Base.unscale())
390091bc56edSDimitry Andric return;
390191bc56edSDimitry Andric
39027d523365SDimitry Andric assert(Base.Scale == 0 && "unscale did not did its job!");
3903f22ef01cSRoman Divacky
3904f22ef01cSRoman Divacky // Check each interesting stride.
3905ff0cc061SDimitry Andric for (int64_t Factor : Factors) {
3906139f7f9bSDimitry Andric Base.Scale = Factor;
3907139f7f9bSDimitry Andric Base.HasBaseReg = Base.BaseRegs.size() > 1;
3908f22ef01cSRoman Divacky // Check whether this scale is going to be legal.
3909139f7f9bSDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3910139f7f9bSDimitry Andric Base)) {
3911f22ef01cSRoman Divacky // As a special-case, handle special out-of-loop Basic users specially.
3912f22ef01cSRoman Divacky // TODO: Reconsider this special case.
3913f22ef01cSRoman Divacky if (LU.Kind == LSRUse::Basic &&
3914139f7f9bSDimitry Andric isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3915139f7f9bSDimitry Andric LU.AccessTy, Base) &&
3916f22ef01cSRoman Divacky LU.AllFixupsOutsideLoop)
3917f22ef01cSRoman Divacky LU.Kind = LSRUse::Special;
3918f22ef01cSRoman Divacky else
3919f22ef01cSRoman Divacky continue;
3920f22ef01cSRoman Divacky }
3921f22ef01cSRoman Divacky // For an ICmpZero, negating a solitary base register won't lead to
3922f22ef01cSRoman Divacky // new solutions.
3923f22ef01cSRoman Divacky if (LU.Kind == LSRUse::ICmpZero &&
3924139f7f9bSDimitry Andric !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3925f22ef01cSRoman Divacky continue;
39267a7e6055SDimitry Andric // For each addrec base reg, if its loop is current loop, apply the scale.
39277a7e6055SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
39287a7e6055SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
39297a7e6055SDimitry Andric if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
3930f22ef01cSRoman Divacky const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3931f22ef01cSRoman Divacky if (FactorS->isZero())
3932f22ef01cSRoman Divacky continue;
3933f22ef01cSRoman Divacky // Divide out the factor, ignoring high bits, since we'll be
3934f22ef01cSRoman Divacky // scaling the value back up in the end.
3935f22ef01cSRoman Divacky if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3936f22ef01cSRoman Divacky // TODO: This could be optimized to avoid all the copying.
3937f22ef01cSRoman Divacky Formula F = Base;
3938f22ef01cSRoman Divacky F.ScaledReg = Quotient;
39397d523365SDimitry Andric F.deleteBaseReg(F.BaseRegs[i]);
394091bc56edSDimitry Andric // The canonical representation of 1*reg is reg, which is already in
394191bc56edSDimitry Andric // Base. In that case, do not try to insert the formula, it will be
394291bc56edSDimitry Andric // rejected anyway.
39437a7e6055SDimitry Andric if (F.Scale == 1 && (F.BaseRegs.empty() ||
39447a7e6055SDimitry Andric (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
394591bc56edSDimitry Andric continue;
39467a7e6055SDimitry Andric // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
39477a7e6055SDimitry Andric // non canonical Formula with ScaledReg's loop not being L.
39487a7e6055SDimitry Andric if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
39497a7e6055SDimitry Andric F.canonicalize(*L);
3950f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, F);
3951f22ef01cSRoman Divacky }
3952f22ef01cSRoman Divacky }
3953f22ef01cSRoman Divacky }
3954f22ef01cSRoman Divacky }
39557a7e6055SDimitry Andric }
3956f22ef01cSRoman Divacky
39577d523365SDimitry Andric /// Generate reuse formulae from different IV types.
GenerateTruncates(LSRUse & LU,unsigned LUIdx,Formula Base)3958f22ef01cSRoman Divacky void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3959f22ef01cSRoman Divacky // Don't bother truncating symbolic values.
3960139f7f9bSDimitry Andric if (Base.BaseGV) return;
3961f22ef01cSRoman Divacky
3962f22ef01cSRoman Divacky // Determine the integer type for the base formula.
39636122f3e6SDimitry Andric Type *DstTy = Base.getType();
3964f22ef01cSRoman Divacky if (!DstTy) return;
3965f22ef01cSRoman Divacky DstTy = SE.getEffectiveSCEVType(DstTy);
3966f22ef01cSRoman Divacky
3967ff0cc061SDimitry Andric for (Type *SrcTy : Types) {
3968139f7f9bSDimitry Andric if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3969f22ef01cSRoman Divacky Formula F = Base;
3970f22ef01cSRoman Divacky
3971ff0cc061SDimitry Andric if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
3972ff0cc061SDimitry Andric for (const SCEV *&BaseReg : F.BaseRegs)
3973ff0cc061SDimitry Andric BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
3974f22ef01cSRoman Divacky
3975f22ef01cSRoman Divacky // TODO: This assumes we've done basic processing on all uses and
3976f22ef01cSRoman Divacky // have an idea what the register usage is.
3977f22ef01cSRoman Divacky if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3978f22ef01cSRoman Divacky continue;
3979f22ef01cSRoman Divacky
3980d8866befSDimitry Andric F.canonicalize(*L);
3981f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, F);
3982f22ef01cSRoman Divacky }
3983f22ef01cSRoman Divacky }
3984f22ef01cSRoman Divacky }
3985f22ef01cSRoman Divacky
3986f22ef01cSRoman Divacky namespace {
3987f22ef01cSRoman Divacky
39887d523365SDimitry Andric /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
39897d523365SDimitry Andric /// modifications so that the search phase doesn't have to worry about the data
39907d523365SDimitry Andric /// structures moving underneath it.
3991f22ef01cSRoman Divacky struct WorkItem {
3992f22ef01cSRoman Divacky size_t LUIdx;
3993f22ef01cSRoman Divacky int64_t Imm;
3994f22ef01cSRoman Divacky const SCEV *OrigReg;
3995f22ef01cSRoman Divacky
WorkItem__anon244971d90b11::WorkItem3996f22ef01cSRoman Divacky WorkItem(size_t LI, int64_t I, const SCEV *R)
3997f22ef01cSRoman Divacky : LUIdx(LI), Imm(I), OrigReg(R) {}
3998f22ef01cSRoman Divacky
3999f22ef01cSRoman Divacky void print(raw_ostream &OS) const;
4000f22ef01cSRoman Divacky void dump() const;
4001f22ef01cSRoman Divacky };
4002f22ef01cSRoman Divacky
4003d88c1a5aSDimitry Andric } // end anonymous namespace
4004f22ef01cSRoman Divacky
40052cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const4006f22ef01cSRoman Divacky void WorkItem::print(raw_ostream &OS) const {
4007f22ef01cSRoman Divacky OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
4008f22ef01cSRoman Divacky << " , add offset " << Imm;
4009f22ef01cSRoman Divacky }
4010f22ef01cSRoman Divacky
dump() const40117a7e6055SDimitry Andric LLVM_DUMP_METHOD void WorkItem::dump() const {
4012f22ef01cSRoman Divacky print(errs()); errs() << '\n';
4013f22ef01cSRoman Divacky }
40147a7e6055SDimitry Andric #endif
4015f22ef01cSRoman Divacky
40167d523365SDimitry Andric /// Look for registers which are a constant distance apart and try to form reuse
40177d523365SDimitry Andric /// opportunities between them.
GenerateCrossUseConstantOffsets()4018f22ef01cSRoman Divacky void LSRInstance::GenerateCrossUseConstantOffsets() {
4019f22ef01cSRoman Divacky // Group the registers by their value without any added constant offset.
40202cab237bSDimitry Andric using ImmMapTy = std::map<int64_t, const SCEV *>;
40212cab237bSDimitry Andric
4022ff0cc061SDimitry Andric DenseMap<const SCEV *, ImmMapTy> Map;
4023f22ef01cSRoman Divacky DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4024f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> Sequence;
4025ff0cc061SDimitry Andric for (const SCEV *Use : RegUses) {
4026ff0cc061SDimitry Andric const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
4027f22ef01cSRoman Divacky int64_t Imm = ExtractImmediate(Reg, SE);
4028ff0cc061SDimitry Andric auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
4029f22ef01cSRoman Divacky if (Pair.second)
4030f22ef01cSRoman Divacky Sequence.push_back(Reg);
4031ff0cc061SDimitry Andric Pair.first->second.insert(std::make_pair(Imm, Use));
4032ff0cc061SDimitry Andric UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
4033f22ef01cSRoman Divacky }
4034f22ef01cSRoman Divacky
4035f22ef01cSRoman Divacky // Now examine each set of registers with the same base value. Build up
4036f22ef01cSRoman Divacky // a list of work to do and do the work in a separate step so that we're
4037f22ef01cSRoman Divacky // not adding formulae and register counts while we're searching.
4038f22ef01cSRoman Divacky SmallVector<WorkItem, 32> WorkItems;
4039f22ef01cSRoman Divacky SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
4040ff0cc061SDimitry Andric for (const SCEV *Reg : Sequence) {
4041f22ef01cSRoman Divacky const ImmMapTy &Imms = Map.find(Reg)->second;
4042f22ef01cSRoman Divacky
4043f22ef01cSRoman Divacky // It's not worthwhile looking for reuse if there's only one offset.
4044f22ef01cSRoman Divacky if (Imms.size() == 1)
4045f22ef01cSRoman Divacky continue;
4046f22ef01cSRoman Divacky
40474ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
40484ba319b5SDimitry Andric for (const auto &Entry
40494ba319b5SDimitry Andric : Imms) dbgs()
40504ba319b5SDimitry Andric << ' ' << Entry.first;
4051f22ef01cSRoman Divacky dbgs() << '\n');
4052f22ef01cSRoman Divacky
4053f22ef01cSRoman Divacky // Examine each offset.
4054f22ef01cSRoman Divacky for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4055f22ef01cSRoman Divacky J != JE; ++J) {
4056f22ef01cSRoman Divacky const SCEV *OrigReg = J->second;
4057f22ef01cSRoman Divacky
4058f22ef01cSRoman Divacky int64_t JImm = J->first;
4059f22ef01cSRoman Divacky const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4060f22ef01cSRoman Divacky
4061f22ef01cSRoman Divacky if (!isa<SCEVConstant>(OrigReg) &&
4062f22ef01cSRoman Divacky UsedByIndicesMap[Reg].count() == 1) {
40634ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
40644ba319b5SDimitry Andric << '\n');
4065f22ef01cSRoman Divacky continue;
4066f22ef01cSRoman Divacky }
4067f22ef01cSRoman Divacky
4068f22ef01cSRoman Divacky // Conservatively examine offsets between this orig reg a few selected
4069f22ef01cSRoman Divacky // other orig regs.
4070f22ef01cSRoman Divacky ImmMapTy::const_iterator OtherImms[] = {
407191bc56edSDimitry Andric Imms.begin(), std::prev(Imms.end()),
407291bc56edSDimitry Andric Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
407391bc56edSDimitry Andric 2)
4074f22ef01cSRoman Divacky };
4075f22ef01cSRoman Divacky for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
4076f22ef01cSRoman Divacky ImmMapTy::const_iterator M = OtherImms[i];
4077f22ef01cSRoman Divacky if (M == J || M == JE) continue;
4078f22ef01cSRoman Divacky
4079f22ef01cSRoman Divacky // Compute the difference between the two.
4080f22ef01cSRoman Divacky int64_t Imm = (uint64_t)JImm - M->first;
408160ff8e32SDimitry Andric for (unsigned LUIdx : UsedByIndices.set_bits())
4082f22ef01cSRoman Divacky // Make a memo of this use, offset, and register tuple.
408339d628a0SDimitry Andric if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
4084f22ef01cSRoman Divacky WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
4085f22ef01cSRoman Divacky }
4086f22ef01cSRoman Divacky }
4087f22ef01cSRoman Divacky }
4088f22ef01cSRoman Divacky
4089f22ef01cSRoman Divacky Map.clear();
4090f22ef01cSRoman Divacky Sequence.clear();
4091f22ef01cSRoman Divacky UsedByIndicesMap.clear();
4092f22ef01cSRoman Divacky UniqueItems.clear();
4093f22ef01cSRoman Divacky
4094f22ef01cSRoman Divacky // Now iterate through the worklist and add new formulae.
4095ff0cc061SDimitry Andric for (const WorkItem &WI : WorkItems) {
4096f22ef01cSRoman Divacky size_t LUIdx = WI.LUIdx;
4097f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4098f22ef01cSRoman Divacky int64_t Imm = WI.Imm;
4099f22ef01cSRoman Divacky const SCEV *OrigReg = WI.OrigReg;
4100f22ef01cSRoman Divacky
41016122f3e6SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
4102f22ef01cSRoman Divacky const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
4103f22ef01cSRoman Divacky unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
4104f22ef01cSRoman Divacky
4105f22ef01cSRoman Divacky // TODO: Use a more targeted data structure.
4106f22ef01cSRoman Divacky for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
410791bc56edSDimitry Andric Formula F = LU.Formulae[L];
410891bc56edSDimitry Andric // FIXME: The code for the scaled and unscaled registers looks
410991bc56edSDimitry Andric // very similar but slightly different. Investigate if they
411091bc56edSDimitry Andric // could be merged. That way, we would not have to unscale the
411191bc56edSDimitry Andric // Formula.
41127d523365SDimitry Andric F.unscale();
4113f22ef01cSRoman Divacky // Use the immediate in the scaled register.
4114f22ef01cSRoman Divacky if (F.ScaledReg == OrigReg) {
4115139f7f9bSDimitry Andric int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
4116f22ef01cSRoman Divacky // Don't create 50 + reg(-50).
4117f22ef01cSRoman Divacky if (F.referencesReg(SE.getSCEV(
4118139f7f9bSDimitry Andric ConstantInt::get(IntTy, -(uint64_t)Offset))))
4119f22ef01cSRoman Divacky continue;
4120f22ef01cSRoman Divacky Formula NewF = F;
4121139f7f9bSDimitry Andric NewF.BaseOffset = Offset;
4122139f7f9bSDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4123139f7f9bSDimitry Andric NewF))
4124f22ef01cSRoman Divacky continue;
4125f22ef01cSRoman Divacky NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
4126f22ef01cSRoman Divacky
4127f22ef01cSRoman Divacky // If the new scale is a constant in a register, and adding the constant
4128f22ef01cSRoman Divacky // value to the immediate would produce a value closer to zero than the
4129f22ef01cSRoman Divacky // immediate itself, then the formula isn't worthwhile.
4130f22ef01cSRoman Divacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
41317d523365SDimitry Andric if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
41327d523365SDimitry Andric (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
4133ff0cc061SDimitry Andric .ule(std::abs(NewF.BaseOffset)))
4134f22ef01cSRoman Divacky continue;
4135f22ef01cSRoman Divacky
4136f22ef01cSRoman Divacky // OK, looks good.
41377a7e6055SDimitry Andric NewF.canonicalize(*this->L);
4138f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, NewF);
4139f22ef01cSRoman Divacky } else {
4140f22ef01cSRoman Divacky // Use the immediate in a base register.
4141f22ef01cSRoman Divacky for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
4142f22ef01cSRoman Divacky const SCEV *BaseReg = F.BaseRegs[N];
4143f22ef01cSRoman Divacky if (BaseReg != OrigReg)
4144f22ef01cSRoman Divacky continue;
4145f22ef01cSRoman Divacky Formula NewF = F;
4146139f7f9bSDimitry Andric NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
4147139f7f9bSDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
4148139f7f9bSDimitry Andric LU.Kind, LU.AccessTy, NewF)) {
41494ba319b5SDimitry Andric if (TTI.shouldFavorPostInc() &&
41504ba319b5SDimitry Andric mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
41514ba319b5SDimitry Andric continue;
4152139f7f9bSDimitry Andric if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
4153f22ef01cSRoman Divacky continue;
4154bd5abe19SDimitry Andric NewF = F;
4155bd5abe19SDimitry Andric NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
4156bd5abe19SDimitry Andric }
4157f22ef01cSRoman Divacky NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
4158f22ef01cSRoman Divacky
4159f22ef01cSRoman Divacky // If the new formula has a constant in a register, and adding the
4160f22ef01cSRoman Divacky // constant value to the immediate would produce a value closer to
4161f22ef01cSRoman Divacky // zero than the immediate itself, then the formula isn't worthwhile.
4162ff0cc061SDimitry Andric for (const SCEV *NewReg : NewF.BaseRegs)
4163ff0cc061SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
41647d523365SDimitry Andric if ((C->getAPInt() + NewF.BaseOffset)
41657d523365SDimitry Andric .abs()
41667d523365SDimitry Andric .slt(std::abs(NewF.BaseOffset)) &&
41677d523365SDimitry Andric (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
4168f785676fSDimitry Andric countTrailingZeros<uint64_t>(NewF.BaseOffset))
4169f22ef01cSRoman Divacky goto skip_formula;
4170f22ef01cSRoman Divacky
4171f22ef01cSRoman Divacky // Ok, looks good.
41727a7e6055SDimitry Andric NewF.canonicalize(*this->L);
4173f22ef01cSRoman Divacky (void)InsertFormula(LU, LUIdx, NewF);
4174f22ef01cSRoman Divacky break;
4175f22ef01cSRoman Divacky skip_formula:;
4176f22ef01cSRoman Divacky }
4177f22ef01cSRoman Divacky }
4178f22ef01cSRoman Divacky }
4179f22ef01cSRoman Divacky }
4180f22ef01cSRoman Divacky }
4181f22ef01cSRoman Divacky
41827d523365SDimitry Andric /// Generate formulae for each use.
4183f22ef01cSRoman Divacky void
GenerateAllReuseFormulae()4184f22ef01cSRoman Divacky LSRInstance::GenerateAllReuseFormulae() {
4185f22ef01cSRoman Divacky // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4186f22ef01cSRoman Divacky // queries are more precise.
4187f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4188f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4189f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4190f22ef01cSRoman Divacky GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4191f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4192f22ef01cSRoman Divacky GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4193f22ef01cSRoman Divacky }
4194f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4195f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4196f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4197f22ef01cSRoman Divacky GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4198f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4199f22ef01cSRoman Divacky GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4200f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4201f22ef01cSRoman Divacky GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4202f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4203f22ef01cSRoman Divacky GenerateScales(LU, LUIdx, LU.Formulae[i]);
4204f22ef01cSRoman Divacky }
4205f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4206f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4207f22ef01cSRoman Divacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4208f22ef01cSRoman Divacky GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4209f22ef01cSRoman Divacky }
4210f22ef01cSRoman Divacky
4211f22ef01cSRoman Divacky GenerateCrossUseConstantOffsets();
4212e580952dSDimitry Andric
42134ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"
4214e580952dSDimitry Andric "After generating reuse formulae:\n";
4215e580952dSDimitry Andric print_uses(dbgs()));
4216f22ef01cSRoman Divacky }
4217f22ef01cSRoman Divacky
42182754fe60SDimitry Andric /// If there are multiple formulae with the same set of registers used
4219f22ef01cSRoman Divacky /// by other uses, pick the best one and delete the others.
FilterOutUndesirableDedicatedRegisters()4220f22ef01cSRoman Divacky void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
42212754fe60SDimitry Andric DenseSet<const SCEV *> VisitedRegs;
42222754fe60SDimitry Andric SmallPtrSet<const SCEV *, 16> Regs;
4223dff0c46cSDimitry Andric SmallPtrSet<const SCEV *, 16> LoserRegs;
4224f22ef01cSRoman Divacky #ifndef NDEBUG
4225f22ef01cSRoman Divacky bool ChangedFormulae = false;
4226f22ef01cSRoman Divacky #endif
4227f22ef01cSRoman Divacky
4228f22ef01cSRoman Divacky // Collect the best formula for each unique set of shared registers. This
4229f22ef01cSRoman Divacky // is reset for each use.
42302cab237bSDimitry Andric using BestFormulaeTy =
42312cab237bSDimitry Andric DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
42322cab237bSDimitry Andric
4233f22ef01cSRoman Divacky BestFormulaeTy BestFormulae;
4234f22ef01cSRoman Divacky
4235f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4236f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
42374ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
42384ba319b5SDimitry Andric dbgs() << '\n');
4239f22ef01cSRoman Divacky
4240f22ef01cSRoman Divacky bool Any = false;
4241f22ef01cSRoman Divacky for (size_t FIdx = 0, NumForms = LU.Formulae.size();
4242f22ef01cSRoman Divacky FIdx != NumForms; ++FIdx) {
4243f22ef01cSRoman Divacky Formula &F = LU.Formulae[FIdx];
4244f22ef01cSRoman Divacky
4245dff0c46cSDimitry Andric // Some formulas are instant losers. For example, they may depend on
4246dff0c46cSDimitry Andric // nonexistent AddRecs from other loops. These need to be filtered
4247dff0c46cSDimitry Andric // immediately, otherwise heuristics could choose them over others leading
4248dff0c46cSDimitry Andric // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4249dff0c46cSDimitry Andric // avoids the need to recompute this information across formulae using the
4250dff0c46cSDimitry Andric // same bad AddRec. Passing LoserRegs is also essential unless we remove
4251dff0c46cSDimitry Andric // the corresponding bad register from the Regs set.
4252dff0c46cSDimitry Andric Cost CostF;
4253dff0c46cSDimitry Andric Regs.clear();
4254d88c1a5aSDimitry Andric CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
4255dff0c46cSDimitry Andric if (CostF.isLoser()) {
4256dff0c46cSDimitry Andric // During initial formula generation, undesirable formulae are generated
4257dff0c46cSDimitry Andric // by uses within other loops that have some non-trivial address mode or
4258dff0c46cSDimitry Andric // use the postinc form of the IV. LSR needs to provide these formulae
4259dff0c46cSDimitry Andric // as the basis of rediscovering the desired formula that uses an AddRec
4260dff0c46cSDimitry Andric // corresponding to the existing phi. Once all formulae have been
4261dff0c46cSDimitry Andric // generated, these initial losers may be pruned.
42624ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
4263dff0c46cSDimitry Andric dbgs() << "\n");
4264dff0c46cSDimitry Andric }
4265dff0c46cSDimitry Andric else {
4266139f7f9bSDimitry Andric SmallVector<const SCEV *, 4> Key;
4267ff0cc061SDimitry Andric for (const SCEV *Reg : F.BaseRegs) {
4268f22ef01cSRoman Divacky if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4269f22ef01cSRoman Divacky Key.push_back(Reg);
4270f22ef01cSRoman Divacky }
4271f22ef01cSRoman Divacky if (F.ScaledReg &&
4272f22ef01cSRoman Divacky RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
4273f22ef01cSRoman Divacky Key.push_back(F.ScaledReg);
4274f22ef01cSRoman Divacky // Unstable sort by host order ok, because this is only used for
4275f22ef01cSRoman Divacky // uniquifying.
4276*b5893f02SDimitry Andric llvm::sort(Key);
4277f22ef01cSRoman Divacky
4278f22ef01cSRoman Divacky std::pair<BestFormulaeTy::const_iterator, bool> P =
4279f22ef01cSRoman Divacky BestFormulae.insert(std::make_pair(Key, FIdx));
4280dff0c46cSDimitry Andric if (P.second)
4281dff0c46cSDimitry Andric continue;
4282dff0c46cSDimitry Andric
4283f22ef01cSRoman Divacky Formula &Best = LU.Formulae[P.first->second];
42842754fe60SDimitry Andric
42852754fe60SDimitry Andric Cost CostBest;
42862754fe60SDimitry Andric Regs.clear();
4287d88c1a5aSDimitry Andric CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
4288db17bf38SDimitry Andric if (CostF.isLess(CostBest, TTI))
4289f22ef01cSRoman Divacky std::swap(F, Best);
42904ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4291f22ef01cSRoman Divacky dbgs() << "\n"
42924ba319b5SDimitry Andric " in favor of formula ";
42934ba319b5SDimitry Andric Best.print(dbgs()); dbgs() << '\n');
4294dff0c46cSDimitry Andric }
4295f22ef01cSRoman Divacky #ifndef NDEBUG
4296f22ef01cSRoman Divacky ChangedFormulae = true;
4297f22ef01cSRoman Divacky #endif
4298f22ef01cSRoman Divacky LU.DeleteFormula(F);
4299f22ef01cSRoman Divacky --FIdx;
4300f22ef01cSRoman Divacky --NumForms;
4301f22ef01cSRoman Divacky Any = true;
4302f22ef01cSRoman Divacky }
4303f22ef01cSRoman Divacky
4304f22ef01cSRoman Divacky // Now that we've filtered out some formulae, recompute the Regs set.
4305f22ef01cSRoman Divacky if (Any)
4306f22ef01cSRoman Divacky LU.RecomputeRegs(LUIdx, RegUses);
4307f22ef01cSRoman Divacky
4308f22ef01cSRoman Divacky // Reset this to prepare for the next use.
4309f22ef01cSRoman Divacky BestFormulae.clear();
4310f22ef01cSRoman Divacky }
4311f22ef01cSRoman Divacky
43124ba319b5SDimitry Andric LLVM_DEBUG(if (ChangedFormulae) {
4313f22ef01cSRoman Divacky dbgs() << "\n"
4314f22ef01cSRoman Divacky "After filtering out undesirable candidates:\n";
4315f22ef01cSRoman Divacky print_uses(dbgs());
4316f22ef01cSRoman Divacky });
4317f22ef01cSRoman Divacky }
4318f22ef01cSRoman Divacky
43197d523365SDimitry Andric /// Estimate the worst-case number of solutions the solver might have to
43207d523365SDimitry Andric /// consider. It almost never considers this many solutions because it prune the
43217d523365SDimitry Andric /// search space, but the pruning isn't always sufficient.
EstimateSearchSpaceComplexity() const4322f22ef01cSRoman Divacky size_t LSRInstance::EstimateSearchSpaceComplexity() const {
43232754fe60SDimitry Andric size_t Power = 1;
4324ff0cc061SDimitry Andric for (const LSRUse &LU : Uses) {
4325ff0cc061SDimitry Andric size_t FSize = LU.Formulae.size();
4326f22ef01cSRoman Divacky if (FSize >= ComplexityLimit) {
4327f22ef01cSRoman Divacky Power = ComplexityLimit;
4328f22ef01cSRoman Divacky break;
4329f22ef01cSRoman Divacky }
4330f22ef01cSRoman Divacky Power *= FSize;
4331f22ef01cSRoman Divacky if (Power >= ComplexityLimit)
4332f22ef01cSRoman Divacky break;
4333f22ef01cSRoman Divacky }
4334f22ef01cSRoman Divacky return Power;
4335f22ef01cSRoman Divacky }
4336f22ef01cSRoman Divacky
43377d523365SDimitry Andric /// When one formula uses a superset of the registers of another formula, it
43387d523365SDimitry Andric /// won't help reduce register pressure (though it may not necessarily hurt
43397d523365SDimitry Andric /// register pressure); remove it to simplify the system.
NarrowSearchSpaceByDetectingSupersets()4340e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4341f22ef01cSRoman Divacky if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
43424ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4343f22ef01cSRoman Divacky
43444ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
4345f22ef01cSRoman Divacky "which use a superset of registers used by other "
4346f22ef01cSRoman Divacky "formulae.\n");
4347f22ef01cSRoman Divacky
4348f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4349f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4350f22ef01cSRoman Divacky bool Any = false;
4351f22ef01cSRoman Divacky for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4352f22ef01cSRoman Divacky Formula &F = LU.Formulae[i];
4353f22ef01cSRoman Divacky // Look for a formula with a constant or GV in a register. If the use
4354f22ef01cSRoman Divacky // also has a formula with that same value in an immediate field,
4355f22ef01cSRoman Divacky // delete the one that uses a register.
4356f22ef01cSRoman Divacky for (SmallVectorImpl<const SCEV *>::const_iterator
4357f22ef01cSRoman Divacky I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4358f22ef01cSRoman Divacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4359f22ef01cSRoman Divacky Formula NewF = F;
4360139f7f9bSDimitry Andric NewF.BaseOffset += C->getValue()->getSExtValue();
4361f22ef01cSRoman Divacky NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4362f22ef01cSRoman Divacky (I - F.BaseRegs.begin()));
4363f22ef01cSRoman Divacky if (LU.HasFormulaWithSameRegs(NewF)) {
43644ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
43654ba319b5SDimitry Andric dbgs() << '\n');
4366f22ef01cSRoman Divacky LU.DeleteFormula(F);
4367f22ef01cSRoman Divacky --i;
4368f22ef01cSRoman Divacky --e;
4369f22ef01cSRoman Divacky Any = true;
4370f22ef01cSRoman Divacky break;
4371f22ef01cSRoman Divacky }
4372f22ef01cSRoman Divacky } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4373f22ef01cSRoman Divacky if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4374139f7f9bSDimitry Andric if (!F.BaseGV) {
4375f22ef01cSRoman Divacky Formula NewF = F;
4376139f7f9bSDimitry Andric NewF.BaseGV = GV;
4377f22ef01cSRoman Divacky NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4378f22ef01cSRoman Divacky (I - F.BaseRegs.begin()));
4379f22ef01cSRoman Divacky if (LU.HasFormulaWithSameRegs(NewF)) {
43804ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4381f22ef01cSRoman Divacky dbgs() << '\n');
4382f22ef01cSRoman Divacky LU.DeleteFormula(F);
4383f22ef01cSRoman Divacky --i;
4384f22ef01cSRoman Divacky --e;
4385f22ef01cSRoman Divacky Any = true;
4386f22ef01cSRoman Divacky break;
4387f22ef01cSRoman Divacky }
4388f22ef01cSRoman Divacky }
4389f22ef01cSRoman Divacky }
4390f22ef01cSRoman Divacky }
4391f22ef01cSRoman Divacky }
4392f22ef01cSRoman Divacky if (Any)
4393f22ef01cSRoman Divacky LU.RecomputeRegs(LUIdx, RegUses);
4394f22ef01cSRoman Divacky }
4395f22ef01cSRoman Divacky
43964ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4397f22ef01cSRoman Divacky }
4398e580952dSDimitry Andric }
4399f22ef01cSRoman Divacky
44007d523365SDimitry Andric /// When there are many registers for expressions like A, A+1, A+2, etc.,
44017d523365SDimitry Andric /// allocate a single register for them.
NarrowSearchSpaceByCollapsingUnrolledCode()4402e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4403139f7f9bSDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4404139f7f9bSDimitry Andric return;
4405f22ef01cSRoman Divacky
44064ba319b5SDimitry Andric LLVM_DEBUG(
44074ba319b5SDimitry Andric dbgs() << "The search space is too complex.\n"
4408139f7f9bSDimitry Andric "Narrowing the search space by assuming that uses separated "
4409139f7f9bSDimitry Andric "by a constant offset will use the same registers.\n");
4410f22ef01cSRoman Divacky
4411f22ef01cSRoman Divacky // This is especially useful for unrolled loops.
4412f22ef01cSRoman Divacky
4413f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4414f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4415ff0cc061SDimitry Andric for (const Formula &F : LU.Formulae) {
441691bc56edSDimitry Andric if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4417139f7f9bSDimitry Andric continue;
4418139f7f9bSDimitry Andric
4419139f7f9bSDimitry Andric LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4420139f7f9bSDimitry Andric if (!LUThatHas)
4421139f7f9bSDimitry Andric continue;
4422139f7f9bSDimitry Andric
4423139f7f9bSDimitry Andric if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4424139f7f9bSDimitry Andric LU.Kind, LU.AccessTy))
4425139f7f9bSDimitry Andric continue;
4426139f7f9bSDimitry Andric
44274ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
4428f22ef01cSRoman Divacky
4429f22ef01cSRoman Divacky LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4430f22ef01cSRoman Divacky
4431d88c1a5aSDimitry Andric // Transfer the fixups of LU to LUThatHas.
4432d88c1a5aSDimitry Andric for (LSRFixup &Fixup : LU.Fixups) {
4433139f7f9bSDimitry Andric Fixup.Offset += F.BaseOffset;
4434d88c1a5aSDimitry Andric LUThatHas->pushFixup(Fixup);
44354ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
44362754fe60SDimitry Andric }
44372754fe60SDimitry Andric
4438f22ef01cSRoman Divacky // Delete formulae from the new use which are no longer legal.
4439f22ef01cSRoman Divacky bool Any = false;
4440f22ef01cSRoman Divacky for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4441f22ef01cSRoman Divacky Formula &F = LUThatHas->Formulae[i];
4442139f7f9bSDimitry Andric if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4443139f7f9bSDimitry Andric LUThatHas->Kind, LUThatHas->AccessTy, F)) {
44444ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
4445f22ef01cSRoman Divacky LUThatHas->DeleteFormula(F);
4446f22ef01cSRoman Divacky --i;
4447f22ef01cSRoman Divacky --e;
4448f22ef01cSRoman Divacky Any = true;
4449f22ef01cSRoman Divacky }
4450f22ef01cSRoman Divacky }
4451139f7f9bSDimitry Andric
4452f22ef01cSRoman Divacky if (Any)
4453f22ef01cSRoman Divacky LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4454f22ef01cSRoman Divacky
4455f22ef01cSRoman Divacky // Delete the old use.
44562754fe60SDimitry Andric DeleteUse(LU, LUIdx);
4457f22ef01cSRoman Divacky --LUIdx;
4458f22ef01cSRoman Divacky --NumUses;
4459f22ef01cSRoman Divacky break;
4460f22ef01cSRoman Divacky }
4461f22ef01cSRoman Divacky }
4462f22ef01cSRoman Divacky
44634ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4464e580952dSDimitry Andric }
4465f22ef01cSRoman Divacky
44667d523365SDimitry Andric /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4467e580952dSDimitry Andric /// we've done more filtering, as it may be able to find more formulae to
4468e580952dSDimitry Andric /// eliminate.
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters()4469e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4470e580952dSDimitry Andric if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
44714ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4472e580952dSDimitry Andric
44734ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4474e580952dSDimitry Andric "undesirable dedicated registers.\n");
4475e580952dSDimitry Andric
4476e580952dSDimitry Andric FilterOutUndesirableDedicatedRegisters();
4477e580952dSDimitry Andric
44784ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4479e580952dSDimitry Andric }
4480e580952dSDimitry Andric }
4481e580952dSDimitry Andric
4482c4394386SDimitry Andric /// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
4483c4394386SDimitry Andric /// Pick the best one and delete the others.
4484c4394386SDimitry Andric /// This narrowing heuristic is to keep as many formulae with different
4485c4394386SDimitry Andric /// Scale and ScaledReg pair as possible while narrowing the search space.
4486c4394386SDimitry Andric /// The benefit is that it is more likely to find out a better solution
4487c4394386SDimitry Andric /// from a formulae set with more Scale and ScaledReg variations than
4488c4394386SDimitry Andric /// a formulae set with the same Scale and ScaledReg. The picking winner
44894ba319b5SDimitry Andric /// reg heuristic will often keep the formulae with the same Scale and
4490c4394386SDimitry Andric /// ScaledReg and filter others, and we want to avoid that if possible.
NarrowSearchSpaceByFilterFormulaWithSameScaledReg()4491c4394386SDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4492c4394386SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4493c4394386SDimitry Andric return;
4494c4394386SDimitry Andric
44954ba319b5SDimitry Andric LLVM_DEBUG(
44964ba319b5SDimitry Andric dbgs() << "The search space is too complex.\n"
4497c4394386SDimitry Andric "Narrowing the search space by choosing the best Formula "
4498c4394386SDimitry Andric "from the Formulae with the same Scale and ScaledReg.\n");
4499c4394386SDimitry Andric
4500c4394386SDimitry Andric // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
45012cab237bSDimitry Andric using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
45022cab237bSDimitry Andric
4503c4394386SDimitry Andric BestFormulaeTy BestFormulae;
4504c4394386SDimitry Andric #ifndef NDEBUG
4505c4394386SDimitry Andric bool ChangedFormulae = false;
4506c4394386SDimitry Andric #endif
4507c4394386SDimitry Andric DenseSet<const SCEV *> VisitedRegs;
4508c4394386SDimitry Andric SmallPtrSet<const SCEV *, 16> Regs;
4509c4394386SDimitry Andric
4510c4394386SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4511c4394386SDimitry Andric LSRUse &LU = Uses[LUIdx];
45124ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
45134ba319b5SDimitry Andric dbgs() << '\n');
4514c4394386SDimitry Andric
4515c4394386SDimitry Andric // Return true if Formula FA is better than Formula FB.
4516c4394386SDimitry Andric auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4517c4394386SDimitry Andric // First we will try to choose the Formula with fewer new registers.
4518c4394386SDimitry Andric // For a register used by current Formula, the more the register is
4519c4394386SDimitry Andric // shared among LSRUses, the less we increase the register number
4520c4394386SDimitry Andric // counter of the formula.
4521c4394386SDimitry Andric size_t FARegNum = 0;
4522c4394386SDimitry Andric for (const SCEV *Reg : FA.BaseRegs) {
4523c4394386SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4524c4394386SDimitry Andric FARegNum += (NumUses - UsedByIndices.count() + 1);
4525c4394386SDimitry Andric }
4526c4394386SDimitry Andric size_t FBRegNum = 0;
4527c4394386SDimitry Andric for (const SCEV *Reg : FB.BaseRegs) {
4528c4394386SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4529c4394386SDimitry Andric FBRegNum += (NumUses - UsedByIndices.count() + 1);
4530c4394386SDimitry Andric }
4531c4394386SDimitry Andric if (FARegNum != FBRegNum)
4532c4394386SDimitry Andric return FARegNum < FBRegNum;
4533c4394386SDimitry Andric
4534c4394386SDimitry Andric // If the new register numbers are the same, choose the Formula with
4535c4394386SDimitry Andric // less Cost.
4536c4394386SDimitry Andric Cost CostFA, CostFB;
4537c4394386SDimitry Andric Regs.clear();
4538c4394386SDimitry Andric CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
4539c4394386SDimitry Andric Regs.clear();
4540c4394386SDimitry Andric CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
4541c4394386SDimitry Andric return CostFA.isLess(CostFB, TTI);
4542c4394386SDimitry Andric };
4543c4394386SDimitry Andric
4544c4394386SDimitry Andric bool Any = false;
4545c4394386SDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4546c4394386SDimitry Andric ++FIdx) {
4547c4394386SDimitry Andric Formula &F = LU.Formulae[FIdx];
4548c4394386SDimitry Andric if (!F.ScaledReg)
4549c4394386SDimitry Andric continue;
4550c4394386SDimitry Andric auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
4551c4394386SDimitry Andric if (P.second)
4552c4394386SDimitry Andric continue;
4553c4394386SDimitry Andric
4554c4394386SDimitry Andric Formula &Best = LU.Formulae[P.first->second];
4555c4394386SDimitry Andric if (IsBetterThan(F, Best))
4556c4394386SDimitry Andric std::swap(F, Best);
45574ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4558c4394386SDimitry Andric dbgs() << "\n"
4559c4394386SDimitry Andric " in favor of formula ";
4560c4394386SDimitry Andric Best.print(dbgs()); dbgs() << '\n');
4561c4394386SDimitry Andric #ifndef NDEBUG
4562c4394386SDimitry Andric ChangedFormulae = true;
4563c4394386SDimitry Andric #endif
4564c4394386SDimitry Andric LU.DeleteFormula(F);
4565c4394386SDimitry Andric --FIdx;
4566c4394386SDimitry Andric --NumForms;
4567c4394386SDimitry Andric Any = true;
4568c4394386SDimitry Andric }
4569c4394386SDimitry Andric if (Any)
4570c4394386SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
4571c4394386SDimitry Andric
4572c4394386SDimitry Andric // Reset this to prepare for the next use.
4573c4394386SDimitry Andric BestFormulae.clear();
4574c4394386SDimitry Andric }
4575c4394386SDimitry Andric
45764ba319b5SDimitry Andric LLVM_DEBUG(if (ChangedFormulae) {
4577c4394386SDimitry Andric dbgs() << "\n"
4578c4394386SDimitry Andric "After filtering out undesirable candidates:\n";
4579c4394386SDimitry Andric print_uses(dbgs());
4580c4394386SDimitry Andric });
4581c4394386SDimitry Andric }
4582c4394386SDimitry Andric
45837a7e6055SDimitry Andric /// The function delete formulas with high registers number expectation.
45847a7e6055SDimitry Andric /// Assuming we don't know the value of each formula (already delete
45857a7e6055SDimitry Andric /// all inefficient), generate probability of not selecting for each
45867a7e6055SDimitry Andric /// register.
45877a7e6055SDimitry Andric /// For example,
45887a7e6055SDimitry Andric /// Use1:
45897a7e6055SDimitry Andric /// reg(a) + reg({0,+,1})
45907a7e6055SDimitry Andric /// reg(a) + reg({-1,+,1}) + 1
45917a7e6055SDimitry Andric /// reg({a,+,1})
45927a7e6055SDimitry Andric /// Use2:
45937a7e6055SDimitry Andric /// reg(b) + reg({0,+,1})
45947a7e6055SDimitry Andric /// reg(b) + reg({-1,+,1}) + 1
45957a7e6055SDimitry Andric /// reg({b,+,1})
45967a7e6055SDimitry Andric /// Use3:
45977a7e6055SDimitry Andric /// reg(c) + reg(b) + reg({0,+,1})
45987a7e6055SDimitry Andric /// reg(c) + reg({b,+,1})
45997a7e6055SDimitry Andric ///
46007a7e6055SDimitry Andric /// Probability of not selecting
46017a7e6055SDimitry Andric /// Use1 Use2 Use3
46027a7e6055SDimitry Andric /// reg(a) (1/3) * 1 * 1
46037a7e6055SDimitry Andric /// reg(b) 1 * (1/3) * (1/2)
46047a7e6055SDimitry Andric /// reg({0,+,1}) (2/3) * (2/3) * (1/2)
46057a7e6055SDimitry Andric /// reg({-1,+,1}) (2/3) * (2/3) * 1
46067a7e6055SDimitry Andric /// reg({a,+,1}) (2/3) * 1 * 1
46077a7e6055SDimitry Andric /// reg({b,+,1}) 1 * (2/3) * (2/3)
46087a7e6055SDimitry Andric /// reg(c) 1 * 1 * 0
46097a7e6055SDimitry Andric ///
46107a7e6055SDimitry Andric /// Now count registers number mathematical expectation for each formula:
46117a7e6055SDimitry Andric /// Note that for each use we exclude probability if not selecting for the use.
46127a7e6055SDimitry Andric /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
46137a7e6055SDimitry Andric /// probabilty 1/3 of not selecting for Use1).
46147a7e6055SDimitry Andric /// Use1:
46157a7e6055SDimitry Andric /// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
46167a7e6055SDimitry Andric /// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
46177a7e6055SDimitry Andric /// reg({a,+,1}) 1
46187a7e6055SDimitry Andric /// Use2:
46197a7e6055SDimitry Andric /// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
46207a7e6055SDimitry Andric /// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
46217a7e6055SDimitry Andric /// reg({b,+,1}) 2/3
46227a7e6055SDimitry Andric /// Use3:
46237a7e6055SDimitry Andric /// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
46247a7e6055SDimitry Andric /// reg(c) + reg({b,+,1}) 1 + 2/3
NarrowSearchSpaceByDeletingCostlyFormulas()46257a7e6055SDimitry Andric void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
46267a7e6055SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
46277a7e6055SDimitry Andric return;
46287a7e6055SDimitry Andric // Ok, we have too many of formulae on our hands to conveniently handle.
46297a7e6055SDimitry Andric // Use a rough heuristic to thin out the list.
46307a7e6055SDimitry Andric
46317a7e6055SDimitry Andric // Set of Regs wich will be 100% used in final solution.
46327a7e6055SDimitry Andric // Used in each formula of a solution (in example above this is reg(c)).
46337a7e6055SDimitry Andric // We can skip them in calculations.
46347a7e6055SDimitry Andric SmallPtrSet<const SCEV *, 4> UniqRegs;
46354ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
46367a7e6055SDimitry Andric
46377a7e6055SDimitry Andric // Map each register to probability of not selecting
46387a7e6055SDimitry Andric DenseMap <const SCEV *, float> RegNumMap;
46397a7e6055SDimitry Andric for (const SCEV *Reg : RegUses) {
46407a7e6055SDimitry Andric if (UniqRegs.count(Reg))
46417a7e6055SDimitry Andric continue;
46427a7e6055SDimitry Andric float PNotSel = 1;
46437a7e6055SDimitry Andric for (const LSRUse &LU : Uses) {
46447a7e6055SDimitry Andric if (!LU.Regs.count(Reg))
46457a7e6055SDimitry Andric continue;
46467a7e6055SDimitry Andric float P = LU.getNotSelectedProbability(Reg);
46477a7e6055SDimitry Andric if (P != 0.0)
46487a7e6055SDimitry Andric PNotSel *= P;
46497a7e6055SDimitry Andric else
46507a7e6055SDimitry Andric UniqRegs.insert(Reg);
46517a7e6055SDimitry Andric }
46527a7e6055SDimitry Andric RegNumMap.insert(std::make_pair(Reg, PNotSel));
46537a7e6055SDimitry Andric }
46547a7e6055SDimitry Andric
46554ba319b5SDimitry Andric LLVM_DEBUG(
46564ba319b5SDimitry Andric dbgs() << "Narrowing the search space by deleting costly formulas\n");
46577a7e6055SDimitry Andric
46587a7e6055SDimitry Andric // Delete formulas where registers number expectation is high.
46597a7e6055SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
46607a7e6055SDimitry Andric LSRUse &LU = Uses[LUIdx];
46617a7e6055SDimitry Andric // If nothing to delete - continue.
46627a7e6055SDimitry Andric if (LU.Formulae.size() < 2)
46637a7e6055SDimitry Andric continue;
46647a7e6055SDimitry Andric // This is temporary solution to test performance. Float should be
46657a7e6055SDimitry Andric // replaced with round independent type (based on integers) to avoid
46667a7e6055SDimitry Andric // different results for different target builds.
46677a7e6055SDimitry Andric float FMinRegNum = LU.Formulae[0].getNumRegs();
46687a7e6055SDimitry Andric float FMinARegNum = LU.Formulae[0].getNumRegs();
46697a7e6055SDimitry Andric size_t MinIdx = 0;
46707a7e6055SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
46717a7e6055SDimitry Andric Formula &F = LU.Formulae[i];
46727a7e6055SDimitry Andric float FRegNum = 0;
46737a7e6055SDimitry Andric float FARegNum = 0;
46747a7e6055SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) {
46757a7e6055SDimitry Andric if (UniqRegs.count(BaseReg))
46767a7e6055SDimitry Andric continue;
46777a7e6055SDimitry Andric FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
46787a7e6055SDimitry Andric if (isa<SCEVAddRecExpr>(BaseReg))
46797a7e6055SDimitry Andric FARegNum +=
46807a7e6055SDimitry Andric RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
46817a7e6055SDimitry Andric }
46827a7e6055SDimitry Andric if (const SCEV *ScaledReg = F.ScaledReg) {
46837a7e6055SDimitry Andric if (!UniqRegs.count(ScaledReg)) {
46847a7e6055SDimitry Andric FRegNum +=
46857a7e6055SDimitry Andric RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
46867a7e6055SDimitry Andric if (isa<SCEVAddRecExpr>(ScaledReg))
46877a7e6055SDimitry Andric FARegNum +=
46887a7e6055SDimitry Andric RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
46897a7e6055SDimitry Andric }
46907a7e6055SDimitry Andric }
46917a7e6055SDimitry Andric if (FMinRegNum > FRegNum ||
46927a7e6055SDimitry Andric (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
46937a7e6055SDimitry Andric FMinRegNum = FRegNum;
46947a7e6055SDimitry Andric FMinARegNum = FARegNum;
46957a7e6055SDimitry Andric MinIdx = i;
46967a7e6055SDimitry Andric }
46977a7e6055SDimitry Andric }
46984ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
46997a7e6055SDimitry Andric dbgs() << " with min reg num " << FMinRegNum << '\n');
47007a7e6055SDimitry Andric if (MinIdx != 0)
47017a7e6055SDimitry Andric std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
47027a7e6055SDimitry Andric while (LU.Formulae.size() != 1) {
47034ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
47047a7e6055SDimitry Andric dbgs() << '\n');
47057a7e6055SDimitry Andric LU.Formulae.pop_back();
47067a7e6055SDimitry Andric }
47077a7e6055SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
47087a7e6055SDimitry Andric assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
47097a7e6055SDimitry Andric Formula &F = LU.Formulae[0];
47104ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
47117a7e6055SDimitry Andric // When we choose the formula, the regs become unique.
47127a7e6055SDimitry Andric UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
47137a7e6055SDimitry Andric if (F.ScaledReg)
47147a7e6055SDimitry Andric UniqRegs.insert(F.ScaledReg);
47157a7e6055SDimitry Andric }
47164ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
47177a7e6055SDimitry Andric }
47187a7e6055SDimitry Andric
47197d523365SDimitry Andric /// Pick a register which seems likely to be profitable, and then in any use
47207d523365SDimitry Andric /// which has any reference to that register, delete all formulae which do not
47217d523365SDimitry Andric /// reference that register.
NarrowSearchSpaceByPickingWinnerRegs()4722e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4723f22ef01cSRoman Divacky // With all other options exhausted, loop until the system is simple
4724f22ef01cSRoman Divacky // enough to handle.
4725f22ef01cSRoman Divacky SmallPtrSet<const SCEV *, 4> Taken;
4726f22ef01cSRoman Divacky while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4727f22ef01cSRoman Divacky // Ok, we have too many of formulae on our hands to conveniently handle.
4728f22ef01cSRoman Divacky // Use a rough heuristic to thin out the list.
47294ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4730f22ef01cSRoman Divacky
4731f22ef01cSRoman Divacky // Pick the register which is used by the most LSRUses, which is likely
4732f22ef01cSRoman Divacky // to be a good reuse register candidate.
473391bc56edSDimitry Andric const SCEV *Best = nullptr;
4734f22ef01cSRoman Divacky unsigned BestNum = 0;
4735ff0cc061SDimitry Andric for (const SCEV *Reg : RegUses) {
4736f22ef01cSRoman Divacky if (Taken.count(Reg))
4737f22ef01cSRoman Divacky continue;
4738d88c1a5aSDimitry Andric if (!Best) {
4739f22ef01cSRoman Divacky Best = Reg;
4740d88c1a5aSDimitry Andric BestNum = RegUses.getUsedByIndices(Reg).count();
4741d88c1a5aSDimitry Andric } else {
4742f22ef01cSRoman Divacky unsigned Count = RegUses.getUsedByIndices(Reg).count();
4743f22ef01cSRoman Divacky if (Count > BestNum) {
4744f22ef01cSRoman Divacky Best = Reg;
4745f22ef01cSRoman Divacky BestNum = Count;
4746f22ef01cSRoman Divacky }
4747f22ef01cSRoman Divacky }
4748f22ef01cSRoman Divacky }
4749f22ef01cSRoman Divacky
47504ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
4751f22ef01cSRoman Divacky << " will yield profitable reuse.\n");
4752f22ef01cSRoman Divacky Taken.insert(Best);
4753f22ef01cSRoman Divacky
4754f22ef01cSRoman Divacky // In any use with formulae which references this register, delete formulae
4755f22ef01cSRoman Divacky // which don't reference it.
4756f22ef01cSRoman Divacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4757f22ef01cSRoman Divacky LSRUse &LU = Uses[LUIdx];
4758f22ef01cSRoman Divacky if (!LU.Regs.count(Best)) continue;
4759f22ef01cSRoman Divacky
4760f22ef01cSRoman Divacky bool Any = false;
4761f22ef01cSRoman Divacky for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4762f22ef01cSRoman Divacky Formula &F = LU.Formulae[i];
4763f22ef01cSRoman Divacky if (!F.referencesReg(Best)) {
47644ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
4765f22ef01cSRoman Divacky LU.DeleteFormula(F);
4766f22ef01cSRoman Divacky --e;
4767f22ef01cSRoman Divacky --i;
4768f22ef01cSRoman Divacky Any = true;
4769f22ef01cSRoman Divacky assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
4770f22ef01cSRoman Divacky continue;
4771f22ef01cSRoman Divacky }
4772f22ef01cSRoman Divacky }
4773f22ef01cSRoman Divacky
4774f22ef01cSRoman Divacky if (Any)
4775f22ef01cSRoman Divacky LU.RecomputeRegs(LUIdx, RegUses);
4776f22ef01cSRoman Divacky }
4777f22ef01cSRoman Divacky
47784ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4779f22ef01cSRoman Divacky }
4780f22ef01cSRoman Divacky }
4781f22ef01cSRoman Divacky
47827d523365SDimitry Andric /// If there are an extraordinary number of formulae to choose from, use some
47837d523365SDimitry Andric /// rough heuristics to prune down the number of formulae. This keeps the main
47847d523365SDimitry Andric /// solver from taking an extraordinary amount of time in some worst-case
47857d523365SDimitry Andric /// scenarios.
NarrowSearchSpaceUsingHeuristics()4786e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4787e580952dSDimitry Andric NarrowSearchSpaceByDetectingSupersets();
4788e580952dSDimitry Andric NarrowSearchSpaceByCollapsingUnrolledCode();
4789e580952dSDimitry Andric NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4790c4394386SDimitry Andric if (FilterSameScaledReg)
4791c4394386SDimitry Andric NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
47927a7e6055SDimitry Andric if (LSRExpNarrow)
47937a7e6055SDimitry Andric NarrowSearchSpaceByDeletingCostlyFormulas();
47947a7e6055SDimitry Andric else
4795e580952dSDimitry Andric NarrowSearchSpaceByPickingWinnerRegs();
4796e580952dSDimitry Andric }
4797e580952dSDimitry Andric
47987d523365SDimitry Andric /// This is the recursive solver.
SolveRecurse(SmallVectorImpl<const Formula * > & Solution,Cost & SolutionCost,SmallVectorImpl<const Formula * > & Workspace,const Cost & CurCost,const SmallPtrSet<const SCEV *,16> & CurRegs,DenseSet<const SCEV * > & VisitedRegs) const4799f22ef01cSRoman Divacky void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4800f22ef01cSRoman Divacky Cost &SolutionCost,
4801f22ef01cSRoman Divacky SmallVectorImpl<const Formula *> &Workspace,
4802f22ef01cSRoman Divacky const Cost &CurCost,
4803f22ef01cSRoman Divacky const SmallPtrSet<const SCEV *, 16> &CurRegs,
4804f22ef01cSRoman Divacky DenseSet<const SCEV *> &VisitedRegs) const {
4805f22ef01cSRoman Divacky // Some ideas:
4806f22ef01cSRoman Divacky // - prune more:
4807f22ef01cSRoman Divacky // - use more aggressive filtering
4808f22ef01cSRoman Divacky // - sort the formula so that the most profitable solutions are found first
4809f22ef01cSRoman Divacky // - sort the uses too
4810f22ef01cSRoman Divacky // - search faster:
4811f22ef01cSRoman Divacky // - don't compute a cost, and then compare. compare while computing a cost
4812f22ef01cSRoman Divacky // and bail early.
4813f22ef01cSRoman Divacky // - track register sets with SmallBitVector
4814f22ef01cSRoman Divacky
4815f22ef01cSRoman Divacky const LSRUse &LU = Uses[Workspace.size()];
4816f22ef01cSRoman Divacky
4817f22ef01cSRoman Divacky // If this use references any register that's already a part of the
4818f22ef01cSRoman Divacky // in-progress solution, consider it a requirement that a formula must
4819f22ef01cSRoman Divacky // reference that register in order to be considered. This prunes out
4820f22ef01cSRoman Divacky // unprofitable searching.
4821f22ef01cSRoman Divacky SmallSetVector<const SCEV *, 4> ReqRegs;
482239d628a0SDimitry Andric for (const SCEV *S : CurRegs)
482339d628a0SDimitry Andric if (LU.Regs.count(S))
482439d628a0SDimitry Andric ReqRegs.insert(S);
4825f22ef01cSRoman Divacky
4826f22ef01cSRoman Divacky SmallPtrSet<const SCEV *, 16> NewRegs;
4827f22ef01cSRoman Divacky Cost NewCost;
4828ff0cc061SDimitry Andric for (const Formula &F : LU.Formulae) {
482991bc56edSDimitry Andric // Ignore formulae which may not be ideal in terms of register reuse of
483091bc56edSDimitry Andric // ReqRegs. The formula should use all required registers before
483191bc56edSDimitry Andric // introducing new ones.
483291bc56edSDimitry Andric int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4833ff0cc061SDimitry Andric for (const SCEV *Reg : ReqRegs) {
483491bc56edSDimitry Andric if ((F.ScaledReg && F.ScaledReg == Reg) ||
4835d88c1a5aSDimitry Andric is_contained(F.BaseRegs, Reg)) {
483691bc56edSDimitry Andric --NumReqRegsToFind;
483791bc56edSDimitry Andric if (NumReqRegsToFind == 0)
4838dff0c46cSDimitry Andric break;
4839f22ef01cSRoman Divacky }
4840dff0c46cSDimitry Andric }
484191bc56edSDimitry Andric if (NumReqRegsToFind != 0) {
4842dff0c46cSDimitry Andric // If none of the formulae satisfied the required registers, then we could
4843dff0c46cSDimitry Andric // clear ReqRegs and try again. Currently, we simply give up in this case.
4844dff0c46cSDimitry Andric continue;
4845dff0c46cSDimitry Andric }
4846f22ef01cSRoman Divacky
4847f22ef01cSRoman Divacky // Evaluate the cost of the current formula. If it's already worse than
4848f22ef01cSRoman Divacky // the current best, prune the search at that point.
4849f22ef01cSRoman Divacky NewCost = CurCost;
4850f22ef01cSRoman Divacky NewRegs = CurRegs;
4851d88c1a5aSDimitry Andric NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
4852db17bf38SDimitry Andric if (NewCost.isLess(SolutionCost, TTI)) {
4853f22ef01cSRoman Divacky Workspace.push_back(&F);
4854f22ef01cSRoman Divacky if (Workspace.size() != Uses.size()) {
4855f22ef01cSRoman Divacky SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4856f22ef01cSRoman Divacky NewRegs, VisitedRegs);
4857f22ef01cSRoman Divacky if (F.getNumRegs() == 1 && Workspace.size() == 1)
4858f22ef01cSRoman Divacky VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4859f22ef01cSRoman Divacky } else {
48604ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
48614ba319b5SDimitry Andric dbgs() << ".\n Regs:"; for (const SCEV *S
48624ba319b5SDimitry Andric : NewRegs) dbgs()
48634ba319b5SDimitry Andric << ' ' << *S;
4864f22ef01cSRoman Divacky dbgs() << '\n');
4865f22ef01cSRoman Divacky
4866f22ef01cSRoman Divacky SolutionCost = NewCost;
4867f22ef01cSRoman Divacky Solution = Workspace;
4868f22ef01cSRoman Divacky }
4869f22ef01cSRoman Divacky Workspace.pop_back();
4870f22ef01cSRoman Divacky }
4871f22ef01cSRoman Divacky }
4872f22ef01cSRoman Divacky }
4873f22ef01cSRoman Divacky
48747d523365SDimitry Andric /// Choose one formula from each use. Return the results in the given Solution
48757d523365SDimitry Andric /// vector.
Solve(SmallVectorImpl<const Formula * > & Solution) const4876f22ef01cSRoman Divacky void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4877f22ef01cSRoman Divacky SmallVector<const Formula *, 8> Workspace;
4878f22ef01cSRoman Divacky Cost SolutionCost;
487991bc56edSDimitry Andric SolutionCost.Lose();
4880f22ef01cSRoman Divacky Cost CurCost;
4881f22ef01cSRoman Divacky SmallPtrSet<const SCEV *, 16> CurRegs;
4882f22ef01cSRoman Divacky DenseSet<const SCEV *> VisitedRegs;
4883f22ef01cSRoman Divacky Workspace.reserve(Uses.size());
4884f22ef01cSRoman Divacky
4885f22ef01cSRoman Divacky // SolveRecurse does all the work.
4886f22ef01cSRoman Divacky SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4887f22ef01cSRoman Divacky CurRegs, VisitedRegs);
48886122f3e6SDimitry Andric if (Solution.empty()) {
48894ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
48906122f3e6SDimitry Andric return;
48916122f3e6SDimitry Andric }
4892f22ef01cSRoman Divacky
4893f22ef01cSRoman Divacky // Ok, we've now made all our decisions.
48944ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"
48954ba319b5SDimitry Andric "The chosen solution requires ";
48964ba319b5SDimitry Andric SolutionCost.print(dbgs()); dbgs() << ":\n";
4897f22ef01cSRoman Divacky for (size_t i = 0, e = Uses.size(); i != e; ++i) {
4898f22ef01cSRoman Divacky dbgs() << " ";
4899f22ef01cSRoman Divacky Uses[i].print(dbgs());
4900f22ef01cSRoman Divacky dbgs() << "\n"
4901f22ef01cSRoman Divacky " ";
4902f22ef01cSRoman Divacky Solution[i]->print(dbgs());
4903f22ef01cSRoman Divacky dbgs() << '\n';
4904f22ef01cSRoman Divacky });
4905f22ef01cSRoman Divacky
4906f22ef01cSRoman Divacky assert(Solution.size() == Uses.size() && "Malformed solution!");
4907f22ef01cSRoman Divacky }
4908f22ef01cSRoman Divacky
49097d523365SDimitry Andric /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
49107d523365SDimitry Andric /// we can go while still being dominated by the input positions. This helps
49117d523365SDimitry Andric /// canonicalize the insert position, which encourages sharing.
4912f22ef01cSRoman Divacky BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,const SmallVectorImpl<Instruction * > & Inputs) const4913f22ef01cSRoman Divacky LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4914f22ef01cSRoman Divacky const SmallVectorImpl<Instruction *> &Inputs)
4915f22ef01cSRoman Divacky const {
49163ca95b02SDimitry Andric Instruction *Tentative = &*IP;
4917d88c1a5aSDimitry Andric while (true) {
49183ca95b02SDimitry Andric bool AllDominate = true;
49193ca95b02SDimitry Andric Instruction *BetterPos = nullptr;
49203ca95b02SDimitry Andric // Don't bother attempting to insert before a catchswitch, their basic block
49213ca95b02SDimitry Andric // cannot have other non-PHI instructions.
49223ca95b02SDimitry Andric if (isa<CatchSwitchInst>(Tentative))
49233ca95b02SDimitry Andric return IP;
49243ca95b02SDimitry Andric
49253ca95b02SDimitry Andric for (Instruction *Inst : Inputs) {
49263ca95b02SDimitry Andric if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
49273ca95b02SDimitry Andric AllDominate = false;
49283ca95b02SDimitry Andric break;
49293ca95b02SDimitry Andric }
49303ca95b02SDimitry Andric // Attempt to find an insert position in the middle of the block,
49313ca95b02SDimitry Andric // instead of at the end, so that it can be used for other expansions.
49323ca95b02SDimitry Andric if (Tentative->getParent() == Inst->getParent() &&
49333ca95b02SDimitry Andric (!BetterPos || !DT.dominates(Inst, BetterPos)))
49343ca95b02SDimitry Andric BetterPos = &*std::next(BasicBlock::iterator(Inst));
49353ca95b02SDimitry Andric }
49363ca95b02SDimitry Andric if (!AllDominate)
49373ca95b02SDimitry Andric break;
49383ca95b02SDimitry Andric if (BetterPos)
49393ca95b02SDimitry Andric IP = BetterPos->getIterator();
49403ca95b02SDimitry Andric else
49413ca95b02SDimitry Andric IP = Tentative->getIterator();
49423ca95b02SDimitry Andric
4943f22ef01cSRoman Divacky const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4944f22ef01cSRoman Divacky unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4945f22ef01cSRoman Divacky
4946f22ef01cSRoman Divacky BasicBlock *IDom;
4947f22ef01cSRoman Divacky for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4948f22ef01cSRoman Divacky if (!Rung) return IP;
4949f22ef01cSRoman Divacky Rung = Rung->getIDom();
4950f22ef01cSRoman Divacky if (!Rung) return IP;
4951f22ef01cSRoman Divacky IDom = Rung->getBlock();
4952f22ef01cSRoman Divacky
4953f22ef01cSRoman Divacky // Don't climb into a loop though.
4954f22ef01cSRoman Divacky const Loop *IDomLoop = LI.getLoopFor(IDom);
4955f22ef01cSRoman Divacky unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4956f22ef01cSRoman Divacky if (IDomDepth <= IPLoopDepth &&
4957f22ef01cSRoman Divacky (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4958f22ef01cSRoman Divacky break;
4959f22ef01cSRoman Divacky }
4960f22ef01cSRoman Divacky
49613ca95b02SDimitry Andric Tentative = IDom->getTerminator();
4962f22ef01cSRoman Divacky }
4963f22ef01cSRoman Divacky
4964f22ef01cSRoman Divacky return IP;
4965f22ef01cSRoman Divacky }
4966f22ef01cSRoman Divacky
49677d523365SDimitry Andric /// Determine an input position which will be dominated by the operands and
49687d523365SDimitry Andric /// which will dominate the result.
4969f22ef01cSRoman Divacky BasicBlock::iterator
AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,const LSRFixup & LF,const LSRUse & LU,SCEVExpander & Rewriter) const4970dff0c46cSDimitry Andric LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4971f22ef01cSRoman Divacky const LSRFixup &LF,
4972dff0c46cSDimitry Andric const LSRUse &LU,
4973dff0c46cSDimitry Andric SCEVExpander &Rewriter) const {
4974f22ef01cSRoman Divacky // Collect some instructions which must be dominated by the
4975f22ef01cSRoman Divacky // expanding replacement. These must be dominated by any operands that
4976f22ef01cSRoman Divacky // will be required in the expansion.
4977f22ef01cSRoman Divacky SmallVector<Instruction *, 4> Inputs;
4978f22ef01cSRoman Divacky if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4979f22ef01cSRoman Divacky Inputs.push_back(I);
4980f22ef01cSRoman Divacky if (LU.Kind == LSRUse::ICmpZero)
4981f22ef01cSRoman Divacky if (Instruction *I =
4982f22ef01cSRoman Divacky dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4983f22ef01cSRoman Divacky Inputs.push_back(I);
4984f22ef01cSRoman Divacky if (LF.PostIncLoops.count(L)) {
4985f22ef01cSRoman Divacky if (LF.isUseFullyOutsideLoop(L))
4986f22ef01cSRoman Divacky Inputs.push_back(L->getLoopLatch()->getTerminator());
4987f22ef01cSRoman Divacky else
4988f22ef01cSRoman Divacky Inputs.push_back(IVIncInsertPos);
4989f22ef01cSRoman Divacky }
4990f22ef01cSRoman Divacky // The expansion must also be dominated by the increment positions of any
4991f22ef01cSRoman Divacky // loops it for which it is using post-inc mode.
4992ff0cc061SDimitry Andric for (const Loop *PIL : LF.PostIncLoops) {
4993f22ef01cSRoman Divacky if (PIL == L) continue;
4994f22ef01cSRoman Divacky
4995f22ef01cSRoman Divacky // Be dominated by the loop exit.
4996f22ef01cSRoman Divacky SmallVector<BasicBlock *, 4> ExitingBlocks;
4997f22ef01cSRoman Divacky PIL->getExitingBlocks(ExitingBlocks);
4998f22ef01cSRoman Divacky if (!ExitingBlocks.empty()) {
4999f22ef01cSRoman Divacky BasicBlock *BB = ExitingBlocks[0];
5000f22ef01cSRoman Divacky for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
5001f22ef01cSRoman Divacky BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5002f22ef01cSRoman Divacky Inputs.push_back(BB->getTerminator());
5003f22ef01cSRoman Divacky }
5004f22ef01cSRoman Divacky }
5005f22ef01cSRoman Divacky
50067d523365SDimitry Andric assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5007dff0c46cSDimitry Andric && !isa<DbgInfoIntrinsic>(LowestIP) &&
5008dff0c46cSDimitry Andric "Insertion point must be a normal instruction");
5009dff0c46cSDimitry Andric
5010f22ef01cSRoman Divacky // Then, climb up the immediate dominator tree as far as we can go while
5011f22ef01cSRoman Divacky // still being dominated by the input positions.
5012dff0c46cSDimitry Andric BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
5013f22ef01cSRoman Divacky
5014f22ef01cSRoman Divacky // Don't insert instructions before PHI nodes.
5015f22ef01cSRoman Divacky while (isa<PHINode>(IP)) ++IP;
5016f22ef01cSRoman Divacky
50176122f3e6SDimitry Andric // Ignore landingpad instructions.
50183ca95b02SDimitry Andric while (IP->isEHPad()) ++IP;
50196122f3e6SDimitry Andric
5020f22ef01cSRoman Divacky // Ignore debug intrinsics.
5021f22ef01cSRoman Divacky while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5022f22ef01cSRoman Divacky
5023dff0c46cSDimitry Andric // Set IP below instructions recently inserted by SCEVExpander. This keeps the
5024dff0c46cSDimitry Andric // IP consistent across expansions and allows the previously inserted
5025dff0c46cSDimitry Andric // instructions to be reused by subsequent expansion.
50267d523365SDimitry Andric while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
50277d523365SDimitry Andric ++IP;
5028dff0c46cSDimitry Andric
5029f22ef01cSRoman Divacky return IP;
5030f22ef01cSRoman Divacky }
5031f22ef01cSRoman Divacky
50327d523365SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
50337d523365SDimitry Andric /// is called "expanding").
Expand(const LSRUse & LU,const LSRFixup & LF,const Formula & F,BasicBlock::iterator IP,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const5034f37b6182SDimitry Andric Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
5035f37b6182SDimitry Andric const Formula &F, BasicBlock::iterator IP,
5036f22ef01cSRoman Divacky SCEVExpander &Rewriter,
5037f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5038f785676fSDimitry Andric if (LU.RigidFormula)
5039f785676fSDimitry Andric return LF.OperandValToReplace;
5040f22ef01cSRoman Divacky
5041f22ef01cSRoman Divacky // Determine an input position which will be dominated by the operands and
5042f22ef01cSRoman Divacky // which will dominate the result.
5043dff0c46cSDimitry Andric IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
50446c4bc1bdSDimitry Andric Rewriter.setInsertPoint(&*IP);
5045f22ef01cSRoman Divacky
5046f22ef01cSRoman Divacky // Inform the Rewriter if we have a post-increment use, so that it can
5047f22ef01cSRoman Divacky // perform an advantageous expansion.
5048f22ef01cSRoman Divacky Rewriter.setPostInc(LF.PostIncLoops);
5049f22ef01cSRoman Divacky
5050f22ef01cSRoman Divacky // This is the type that the user actually needs.
50516122f3e6SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType();
5052f22ef01cSRoman Divacky // This will be the type that we'll initially expand to.
50536122f3e6SDimitry Andric Type *Ty = F.getType();
5054f22ef01cSRoman Divacky if (!Ty)
5055f22ef01cSRoman Divacky // No type known; just expand directly to the ultimate type.
5056f22ef01cSRoman Divacky Ty = OpTy;
5057f22ef01cSRoman Divacky else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
5058f22ef01cSRoman Divacky // Expand directly to the ultimate type if it's the right size.
5059f22ef01cSRoman Divacky Ty = OpTy;
5060f22ef01cSRoman Divacky // This is the type to do integer arithmetic in.
50616122f3e6SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(Ty);
5062f22ef01cSRoman Divacky
5063f22ef01cSRoman Divacky // Build up a list of operands to add together to form the full base.
5064f22ef01cSRoman Divacky SmallVector<const SCEV *, 8> Ops;
5065f22ef01cSRoman Divacky
5066f22ef01cSRoman Divacky // Expand the BaseRegs portion.
5067ff0cc061SDimitry Andric for (const SCEV *Reg : F.BaseRegs) {
5068f22ef01cSRoman Divacky assert(!Reg->isZero() && "Zero allocated in a base register!");
5069f22ef01cSRoman Divacky
5070f22ef01cSRoman Divacky // If we're expanding for a post-inc user, make the post-inc adjustment.
50717a7e6055SDimitry Andric Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
50726c4bc1bdSDimitry Andric Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
5073f22ef01cSRoman Divacky }
5074f22ef01cSRoman Divacky
5075f22ef01cSRoman Divacky // Expand the ScaledReg portion.
507691bc56edSDimitry Andric Value *ICmpScaledV = nullptr;
5077139f7f9bSDimitry Andric if (F.Scale != 0) {
5078f22ef01cSRoman Divacky const SCEV *ScaledS = F.ScaledReg;
5079f22ef01cSRoman Divacky
5080f22ef01cSRoman Divacky // If we're expanding for a post-inc user, make the post-inc adjustment.
5081f22ef01cSRoman Divacky PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
50827a7e6055SDimitry Andric ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
5083f22ef01cSRoman Divacky
5084f22ef01cSRoman Divacky if (LU.Kind == LSRUse::ICmpZero) {
508591bc56edSDimitry Andric // Expand ScaleReg as if it was part of the base regs.
508691bc56edSDimitry Andric if (F.Scale == 1)
508791bc56edSDimitry Andric Ops.push_back(
50886c4bc1bdSDimitry Andric SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
508991bc56edSDimitry Andric else {
5090f22ef01cSRoman Divacky // An interesting way of "folding" with an icmp is to use a negated
5091f22ef01cSRoman Divacky // scale, which we'll implement by inserting it into the other operand
5092f22ef01cSRoman Divacky // of the icmp.
5093139f7f9bSDimitry Andric assert(F.Scale == -1 &&
5094f22ef01cSRoman Divacky "The only scale supported by ICmpZero uses is -1!");
50956c4bc1bdSDimitry Andric ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
509691bc56edSDimitry Andric }
5097f22ef01cSRoman Divacky } else {
5098f22ef01cSRoman Divacky // Otherwise just expand the scaled register and an explicit scale,
5099f22ef01cSRoman Divacky // which is expected to be matched as part of the address.
51007ae0e2c9SDimitry Andric
51017ae0e2c9SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting address modes.
510291bc56edSDimitry Andric // Unless the addressing mode will not be folded.
510391bc56edSDimitry Andric if (!Ops.empty() && LU.Kind == LSRUse::Address &&
510491bc56edSDimitry Andric isAMCompletelyFolded(TTI, LU, F)) {
51054ba319b5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
51067ae0e2c9SDimitry Andric Ops.clear();
51077ae0e2c9SDimitry Andric Ops.push_back(SE.getUnknown(FullV));
51087ae0e2c9SDimitry Andric }
51096c4bc1bdSDimitry Andric ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
511091bc56edSDimitry Andric if (F.Scale != 1)
511191bc56edSDimitry Andric ScaledS =
511291bc56edSDimitry Andric SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
5113f22ef01cSRoman Divacky Ops.push_back(ScaledS);
5114f22ef01cSRoman Divacky }
5115f22ef01cSRoman Divacky }
5116f22ef01cSRoman Divacky
5117f22ef01cSRoman Divacky // Expand the GV portion.
5118139f7f9bSDimitry Andric if (F.BaseGV) {
5119f22ef01cSRoman Divacky // Flush the operand list to suppress SCEVExpander hoisting.
51207ae0e2c9SDimitry Andric if (!Ops.empty()) {
51216c4bc1bdSDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
51227ae0e2c9SDimitry Andric Ops.clear();
51237ae0e2c9SDimitry Andric Ops.push_back(SE.getUnknown(FullV));
51247ae0e2c9SDimitry Andric }
5125139f7f9bSDimitry Andric Ops.push_back(SE.getUnknown(F.BaseGV));
51267ae0e2c9SDimitry Andric }
51277ae0e2c9SDimitry Andric
51287ae0e2c9SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting of both folded and
51297ae0e2c9SDimitry Andric // unfolded offsets. LSR assumes they both live next to their uses.
51307ae0e2c9SDimitry Andric if (!Ops.empty()) {
51316c4bc1bdSDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
5132f22ef01cSRoman Divacky Ops.clear();
5133f22ef01cSRoman Divacky Ops.push_back(SE.getUnknown(FullV));
5134f22ef01cSRoman Divacky }
5135f22ef01cSRoman Divacky
5136f22ef01cSRoman Divacky // Expand the immediate portion.
5137139f7f9bSDimitry Andric int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
5138f22ef01cSRoman Divacky if (Offset != 0) {
5139f22ef01cSRoman Divacky if (LU.Kind == LSRUse::ICmpZero) {
5140f22ef01cSRoman Divacky // The other interesting way of "folding" with an ICmpZero is to use a
5141f22ef01cSRoman Divacky // negated immediate.
5142f22ef01cSRoman Divacky if (!ICmpScaledV)
51436122f3e6SDimitry Andric ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
5144f22ef01cSRoman Divacky else {
5145f22ef01cSRoman Divacky Ops.push_back(SE.getUnknown(ICmpScaledV));
5146f22ef01cSRoman Divacky ICmpScaledV = ConstantInt::get(IntTy, Offset);
5147f22ef01cSRoman Divacky }
5148f22ef01cSRoman Divacky } else {
5149f22ef01cSRoman Divacky // Just add the immediate values. These again are expected to be matched
5150f22ef01cSRoman Divacky // as part of the address.
5151f22ef01cSRoman Divacky Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
5152f22ef01cSRoman Divacky }
5153f22ef01cSRoman Divacky }
5154f22ef01cSRoman Divacky
5155bd5abe19SDimitry Andric // Expand the unfolded offset portion.
5156bd5abe19SDimitry Andric int64_t UnfoldedOffset = F.UnfoldedOffset;
5157bd5abe19SDimitry Andric if (UnfoldedOffset != 0) {
5158bd5abe19SDimitry Andric // Just add the immediate values.
5159bd5abe19SDimitry Andric Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
5160bd5abe19SDimitry Andric UnfoldedOffset)));
5161bd5abe19SDimitry Andric }
5162bd5abe19SDimitry Andric
5163f22ef01cSRoman Divacky // Emit instructions summing all the operands.
5164f22ef01cSRoman Divacky const SCEV *FullS = Ops.empty() ?
5165f22ef01cSRoman Divacky SE.getConstant(IntTy, 0) :
5166f22ef01cSRoman Divacky SE.getAddExpr(Ops);
51676c4bc1bdSDimitry Andric Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
5168f22ef01cSRoman Divacky
5169f22ef01cSRoman Divacky // We're done expanding now, so reset the rewriter.
5170f22ef01cSRoman Divacky Rewriter.clearPostInc();
5171f22ef01cSRoman Divacky
5172f22ef01cSRoman Divacky // An ICmpZero Formula represents an ICmp which we're handling as a
5173f22ef01cSRoman Divacky // comparison against zero. Now that we've expanded an expression for that
5174f22ef01cSRoman Divacky // form, update the ICmp's other operand.
5175f22ef01cSRoman Divacky if (LU.Kind == LSRUse::ICmpZero) {
5176f22ef01cSRoman Divacky ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
517797bc6c73SDimitry Andric DeadInsts.emplace_back(CI->getOperand(1));
5178139f7f9bSDimitry Andric assert(!F.BaseGV && "ICmp does not support folding a global value and "
5179f22ef01cSRoman Divacky "a scale at the same time!");
5180139f7f9bSDimitry Andric if (F.Scale == -1) {
5181f22ef01cSRoman Divacky if (ICmpScaledV->getType() != OpTy) {
5182f22ef01cSRoman Divacky Instruction *Cast =
5183f22ef01cSRoman Divacky CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
5184f22ef01cSRoman Divacky OpTy, false),
5185f22ef01cSRoman Divacky ICmpScaledV, OpTy, "tmp", CI);
5186f22ef01cSRoman Divacky ICmpScaledV = Cast;
5187f22ef01cSRoman Divacky }
5188f22ef01cSRoman Divacky CI->setOperand(1, ICmpScaledV);
5189f22ef01cSRoman Divacky } else {
519091bc56edSDimitry Andric // A scale of 1 means that the scale has been expanded as part of the
519191bc56edSDimitry Andric // base regs.
519291bc56edSDimitry Andric assert((F.Scale == 0 || F.Scale == 1) &&
5193f22ef01cSRoman Divacky "ICmp does not support folding a global value and "
5194f22ef01cSRoman Divacky "a scale at the same time!");
5195f22ef01cSRoman Divacky Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
5196f22ef01cSRoman Divacky -(uint64_t)Offset);
5197f22ef01cSRoman Divacky if (C->getType() != OpTy)
5198f22ef01cSRoman Divacky C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
5199f22ef01cSRoman Divacky OpTy, false),
5200f22ef01cSRoman Divacky C, OpTy);
5201f22ef01cSRoman Divacky
5202f22ef01cSRoman Divacky CI->setOperand(1, C);
5203f22ef01cSRoman Divacky }
5204f22ef01cSRoman Divacky }
5205f22ef01cSRoman Divacky
5206f22ef01cSRoman Divacky return FullV;
5207f22ef01cSRoman Divacky }
5208f22ef01cSRoman Divacky
52097d523365SDimitry Andric /// Helper for Rewrite. PHI nodes are special because the use of their operands
52107d523365SDimitry Andric /// effectively happens in their predecessor blocks, so the expression may need
52117d523365SDimitry Andric /// to be expanded in multiple places.
RewriteForPHI(PHINode * PN,const LSRUse & LU,const LSRFixup & LF,const Formula & F,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const5212f37b6182SDimitry Andric void LSRInstance::RewriteForPHI(
5213f37b6182SDimitry Andric PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5214f37b6182SDimitry Andric SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5215f22ef01cSRoman Divacky DenseMap<BasicBlock *, Value *> Inserted;
5216f22ef01cSRoman Divacky for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
5217f22ef01cSRoman Divacky if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
5218f22ef01cSRoman Divacky BasicBlock *BB = PN->getIncomingBlock(i);
5219f22ef01cSRoman Divacky
5220f22ef01cSRoman Divacky // If this is a critical edge, split the edge so that we do not insert
5221f22ef01cSRoman Divacky // the code on all predecessor/successor paths. We do this unless this
5222f22ef01cSRoman Divacky // is the canonical backedge for this loop, which complicates post-inc
5223f22ef01cSRoman Divacky // users.
5224f22ef01cSRoman Divacky if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
522594c53d40SDimitry Andric !isa<IndirectBrInst>(BB->getTerminator()) &&
522694c53d40SDimitry Andric !isa<CatchSwitchInst>(BB->getTerminator())) {
52276122f3e6SDimitry Andric BasicBlock *Parent = PN->getParent();
52286122f3e6SDimitry Andric Loop *PNLoop = LI.getLoopFor(Parent);
52296122f3e6SDimitry Andric if (!PNLoop || Parent != PNLoop->getHeader()) {
5230f22ef01cSRoman Divacky // Split the critical edge.
523191bc56edSDimitry Andric BasicBlock *NewBB = nullptr;
52326122f3e6SDimitry Andric if (!Parent->isLandingPad()) {
5233ff0cc061SDimitry Andric NewBB = SplitCriticalEdge(BB, Parent,
5234ff0cc061SDimitry Andric CriticalEdgeSplittingOptions(&DT, &LI)
5235ff0cc061SDimitry Andric .setMergeIdenticalEdges()
5236ff0cc061SDimitry Andric .setDontDeleteUselessPHIs());
52376122f3e6SDimitry Andric } else {
52386122f3e6SDimitry Andric SmallVector<BasicBlock*, 2> NewBBs;
52397d523365SDimitry Andric SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
52406122f3e6SDimitry Andric NewBB = NewBBs[0];
52416122f3e6SDimitry Andric }
52423861d79fSDimitry Andric // If NewBB==NULL, then SplitCriticalEdge refused to split because all
52433861d79fSDimitry Andric // phi predecessors are identical. The simple thing to do is skip
52443861d79fSDimitry Andric // splitting in this case rather than complicate the API.
52453861d79fSDimitry Andric if (NewBB) {
5246f22ef01cSRoman Divacky // If PN is outside of the loop and BB is in the loop, we want to
5247f22ef01cSRoman Divacky // move the block to be immediately before the PHI block, not
5248f22ef01cSRoman Divacky // immediately after BB.
5249f22ef01cSRoman Divacky if (L->contains(BB) && !L->contains(PN))
5250f22ef01cSRoman Divacky NewBB->moveBefore(PN->getParent());
5251f22ef01cSRoman Divacky
5252f22ef01cSRoman Divacky // Splitting the edge can reduce the number of PHI entries we have.
5253f22ef01cSRoman Divacky e = PN->getNumIncomingValues();
5254f22ef01cSRoman Divacky BB = NewBB;
5255f22ef01cSRoman Divacky i = PN->getBasicBlockIndex(BB);
5256f22ef01cSRoman Divacky }
52572754fe60SDimitry Andric }
52583861d79fSDimitry Andric }
5259f22ef01cSRoman Divacky
5260f22ef01cSRoman Divacky std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
526191bc56edSDimitry Andric Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
5262f22ef01cSRoman Divacky if (!Pair.second)
5263f22ef01cSRoman Divacky PN->setIncomingValue(i, Pair.first->second);
5264f22ef01cSRoman Divacky else {
5265d88c1a5aSDimitry Andric Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(),
52667d523365SDimitry Andric Rewriter, DeadInsts);
5267f22ef01cSRoman Divacky
5268f22ef01cSRoman Divacky // If this is reuse-by-noop-cast, insert the noop cast.
52696122f3e6SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType();
5270f22ef01cSRoman Divacky if (FullV->getType() != OpTy)
5271f22ef01cSRoman Divacky FullV =
5272f22ef01cSRoman Divacky CastInst::Create(CastInst::getCastOpcode(FullV, false,
5273f22ef01cSRoman Divacky OpTy, false),
5274f22ef01cSRoman Divacky FullV, LF.OperandValToReplace->getType(),
5275f22ef01cSRoman Divacky "tmp", BB->getTerminator());
5276f22ef01cSRoman Divacky
5277f22ef01cSRoman Divacky PN->setIncomingValue(i, FullV);
5278f22ef01cSRoman Divacky Pair.first->second = FullV;
5279f22ef01cSRoman Divacky }
5280f22ef01cSRoman Divacky }
5281f22ef01cSRoman Divacky }
5282f22ef01cSRoman Divacky
52837d523365SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
52847d523365SDimitry Andric /// is called "expanding"), and update the UserInst to reference the newly
52857d523365SDimitry Andric /// expanded value.
Rewrite(const LSRUse & LU,const LSRFixup & LF,const Formula & F,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const5286f37b6182SDimitry Andric void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
5287f37b6182SDimitry Andric const Formula &F, SCEVExpander &Rewriter,
5288f37b6182SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5289f22ef01cSRoman Divacky // First, find an insertion point that dominates UserInst. For PHI nodes,
5290f22ef01cSRoman Divacky // find the nearest block which dominates all the relevant uses.
5291f22ef01cSRoman Divacky if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5292d88c1a5aSDimitry Andric RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
5293f22ef01cSRoman Divacky } else {
52947d523365SDimitry Andric Value *FullV =
5295d88c1a5aSDimitry Andric Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
5296f22ef01cSRoman Divacky
5297f22ef01cSRoman Divacky // If this is reuse-by-noop-cast, insert the noop cast.
52986122f3e6SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType();
5299f22ef01cSRoman Divacky if (FullV->getType() != OpTy) {
5300f22ef01cSRoman Divacky Instruction *Cast =
5301f22ef01cSRoman Divacky CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
5302f22ef01cSRoman Divacky FullV, OpTy, "tmp", LF.UserInst);
5303f22ef01cSRoman Divacky FullV = Cast;
5304f22ef01cSRoman Divacky }
5305f22ef01cSRoman Divacky
5306f22ef01cSRoman Divacky // Update the user. ICmpZero is handled specially here (for now) because
5307f22ef01cSRoman Divacky // Expand may have updated one of the operands of the icmp already, and
5308f22ef01cSRoman Divacky // its new value may happen to be equal to LF.OperandValToReplace, in
5309f22ef01cSRoman Divacky // which case doing replaceUsesOfWith leads to replacing both operands
5310f22ef01cSRoman Divacky // with the same value. TODO: Reorganize this.
5311d88c1a5aSDimitry Andric if (LU.Kind == LSRUse::ICmpZero)
5312f22ef01cSRoman Divacky LF.UserInst->setOperand(0, FullV);
5313f22ef01cSRoman Divacky else
5314f22ef01cSRoman Divacky LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5315f22ef01cSRoman Divacky }
5316f22ef01cSRoman Divacky
531797bc6c73SDimitry Andric DeadInsts.emplace_back(LF.OperandValToReplace);
5318f22ef01cSRoman Divacky }
5319f22ef01cSRoman Divacky
53207d523365SDimitry Andric /// Rewrite all the fixup locations with new values, following the chosen
53217d523365SDimitry Andric /// solution.
ImplementSolution(const SmallVectorImpl<const Formula * > & Solution)53227d523365SDimitry Andric void LSRInstance::ImplementSolution(
53237d523365SDimitry Andric const SmallVectorImpl<const Formula *> &Solution) {
5324f22ef01cSRoman Divacky // Keep track of instructions we may have made dead, so that
5325f22ef01cSRoman Divacky // we can remove them after we are done working.
5326f37b6182SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts;
5327f22ef01cSRoman Divacky
5328ff0cc061SDimitry Andric SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
5329ff0cc061SDimitry Andric "lsr");
5330dff0c46cSDimitry Andric #ifndef NDEBUG
5331dff0c46cSDimitry Andric Rewriter.setDebugType(DEBUG_TYPE);
5332dff0c46cSDimitry Andric #endif
5333f22ef01cSRoman Divacky Rewriter.disableCanonicalMode();
53346122f3e6SDimitry Andric Rewriter.enableLSRMode();
5335f22ef01cSRoman Divacky Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
5336f22ef01cSRoman Divacky
5337dff0c46cSDimitry Andric // Mark phi nodes that terminate chains so the expander tries to reuse them.
5338ff0cc061SDimitry Andric for (const IVChain &Chain : IVChainVec) {
5339ff0cc061SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5340dff0c46cSDimitry Andric Rewriter.setChainedPhi(PN);
5341dff0c46cSDimitry Andric }
5342dff0c46cSDimitry Andric
5343f22ef01cSRoman Divacky // Expand the new value definitions and update the users.
5344d88c1a5aSDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
5345d88c1a5aSDimitry Andric for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
5346d88c1a5aSDimitry Andric Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
5347f22ef01cSRoman Divacky Changed = true;
5348f22ef01cSRoman Divacky }
5349f22ef01cSRoman Divacky
5350ff0cc061SDimitry Andric for (const IVChain &Chain : IVChainVec) {
5351ff0cc061SDimitry Andric GenerateIVChain(Chain, Rewriter, DeadInsts);
5352dff0c46cSDimitry Andric Changed = true;
5353dff0c46cSDimitry Andric }
5354f22ef01cSRoman Divacky // Clean up after ourselves. This must be done before deleting any
5355f22ef01cSRoman Divacky // instructions.
5356f22ef01cSRoman Divacky Rewriter.clear();
5357f22ef01cSRoman Divacky
5358f22ef01cSRoman Divacky Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
5359f22ef01cSRoman Divacky }
5360f22ef01cSRoman Divacky
LSRInstance(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI)53617d523365SDimitry Andric LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
53627d523365SDimitry Andric DominatorTree &DT, LoopInfo &LI,
53637d523365SDimitry Andric const TargetTransformInfo &TTI)
53642cab237bSDimitry Andric : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
5365f22ef01cSRoman Divacky // If LoopSimplify form is not available, stay out of trouble.
5366dff0c46cSDimitry Andric if (!L->isLoopSimplifyForm())
5367dff0c46cSDimitry Andric return;
5368f22ef01cSRoman Divacky
5369f22ef01cSRoman Divacky // If there's no interesting work to be done, bail early.
5370f22ef01cSRoman Divacky if (IU.empty()) return;
5371f22ef01cSRoman Divacky
5372cb4dff85SDimitry Andric // If there's too much analysis to be done, bail early. We won't be able to
5373cb4dff85SDimitry Andric // model the problem anyway.
5374cb4dff85SDimitry Andric unsigned NumUsers = 0;
5375ff0cc061SDimitry Andric for (const IVStrideUse &U : IU) {
5376cb4dff85SDimitry Andric if (++NumUsers > MaxIVUsers) {
5377ff0cc061SDimitry Andric (void)U;
53784ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
53794ba319b5SDimitry Andric << "\n");
5380cb4dff85SDimitry Andric return;
5381cb4dff85SDimitry Andric }
5382ce479d84SDimitry Andric // Bail out if we have a PHI on an EHPad that gets a value from a
5383ce479d84SDimitry Andric // CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
5384ce479d84SDimitry Andric // no good place to stick any instructions.
5385ce479d84SDimitry Andric if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
5386ce479d84SDimitry Andric auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
5387ce479d84SDimitry Andric if (isa<FuncletPadInst>(FirstNonPHI) ||
5388ce479d84SDimitry Andric isa<CatchSwitchInst>(FirstNonPHI))
5389ce479d84SDimitry Andric for (BasicBlock *PredBB : PN->blocks())
5390ce479d84SDimitry Andric if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5391ce479d84SDimitry Andric return;
5392ce479d84SDimitry Andric }
5393cb4dff85SDimitry Andric }
5394cb4dff85SDimitry Andric
5395dff0c46cSDimitry Andric #ifndef NDEBUG
5396dff0c46cSDimitry Andric // All dominating loops must have preheaders, or SCEVExpander may not be able
5397dff0c46cSDimitry Andric // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
5398dff0c46cSDimitry Andric //
5399dff0c46cSDimitry Andric // IVUsers analysis should only create users that are dominated by simple loop
5400dff0c46cSDimitry Andric // headers. Since this loop should dominate all of its users, its user list
5401dff0c46cSDimitry Andric // should be empty if this loop itself is not within a simple loop nest.
5402dff0c46cSDimitry Andric for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
5403dff0c46cSDimitry Andric Rung; Rung = Rung->getIDom()) {
5404dff0c46cSDimitry Andric BasicBlock *BB = Rung->getBlock();
5405dff0c46cSDimitry Andric const Loop *DomLoop = LI.getLoopFor(BB);
5406dff0c46cSDimitry Andric if (DomLoop && DomLoop->getHeader() == BB) {
5407dff0c46cSDimitry Andric assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
5408dff0c46cSDimitry Andric }
5409dff0c46cSDimitry Andric }
5410dff0c46cSDimitry Andric #endif // DEBUG
5411dff0c46cSDimitry Andric
54124ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nLSR on loop ";
541391bc56edSDimitry Andric L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
5414f22ef01cSRoman Divacky dbgs() << ":\n");
5415f22ef01cSRoman Divacky
5416f22ef01cSRoman Divacky // First, perform some low-level loop optimizations.
5417f22ef01cSRoman Divacky OptimizeShadowIV();
5418f22ef01cSRoman Divacky OptimizeLoopTermCond();
5419f22ef01cSRoman Divacky
54206122f3e6SDimitry Andric // If loop preparation eliminates all interesting IV users, bail.
54216122f3e6SDimitry Andric if (IU.empty()) return;
54226122f3e6SDimitry Andric
54236122f3e6SDimitry Andric // Skip nested loops until we can model them better with formulae.
5424dff0c46cSDimitry Andric if (!L->empty()) {
54254ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
54266122f3e6SDimitry Andric return;
54276122f3e6SDimitry Andric }
54286122f3e6SDimitry Andric
5429f22ef01cSRoman Divacky // Start collecting data and preparing for the solver.
5430dff0c46cSDimitry Andric CollectChains();
5431f22ef01cSRoman Divacky CollectInterestingTypesAndFactors();
5432f22ef01cSRoman Divacky CollectFixupsAndInitialFormulae();
5433f22ef01cSRoman Divacky CollectLoopInvariantFixupsAndFormulae();
5434f22ef01cSRoman Divacky
54354ba319b5SDimitry Andric if (Uses.empty())
54364ba319b5SDimitry Andric return;
54374ba319b5SDimitry Andric
54384ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
5439f22ef01cSRoman Divacky print_uses(dbgs()));
5440f22ef01cSRoman Divacky
5441f22ef01cSRoman Divacky // Now use the reuse data to generate a bunch of interesting ways
5442f22ef01cSRoman Divacky // to formulate the values needed for the uses.
5443f22ef01cSRoman Divacky GenerateAllReuseFormulae();
5444f22ef01cSRoman Divacky
5445f22ef01cSRoman Divacky FilterOutUndesirableDedicatedRegisters();
5446f22ef01cSRoman Divacky NarrowSearchSpaceUsingHeuristics();
5447f22ef01cSRoman Divacky
5448f22ef01cSRoman Divacky SmallVector<const Formula *, 8> Solution;
5449f22ef01cSRoman Divacky Solve(Solution);
5450f22ef01cSRoman Divacky
5451f22ef01cSRoman Divacky // Release memory that is no longer needed.
5452f22ef01cSRoman Divacky Factors.clear();
5453f22ef01cSRoman Divacky Types.clear();
5454f22ef01cSRoman Divacky RegUses.clear();
5455f22ef01cSRoman Divacky
54566122f3e6SDimitry Andric if (Solution.empty())
54576122f3e6SDimitry Andric return;
54586122f3e6SDimitry Andric
5459f22ef01cSRoman Divacky #ifndef NDEBUG
5460f22ef01cSRoman Divacky // Formulae should be legal.
5461ff0cc061SDimitry Andric for (const LSRUse &LU : Uses) {
5462ff0cc061SDimitry Andric for (const Formula &F : LU.Formulae)
5463139f7f9bSDimitry Andric assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
5464ff0cc061SDimitry Andric F) && "Illegal formula generated!");
5465f22ef01cSRoman Divacky };
5466f22ef01cSRoman Divacky #endif
5467f22ef01cSRoman Divacky
5468f22ef01cSRoman Divacky // Now that we've decided what we want, make it so.
54697d523365SDimitry Andric ImplementSolution(Solution);
5470f22ef01cSRoman Divacky }
5471f22ef01cSRoman Divacky
54722cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print_factors_and_types(raw_ostream & OS) const5473f22ef01cSRoman Divacky void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
5474f22ef01cSRoman Divacky if (Factors.empty() && Types.empty()) return;
5475f22ef01cSRoman Divacky
5476f22ef01cSRoman Divacky OS << "LSR has identified the following interesting factors and types: ";
5477f22ef01cSRoman Divacky bool First = true;
5478f22ef01cSRoman Divacky
5479ff0cc061SDimitry Andric for (int64_t Factor : Factors) {
5480f22ef01cSRoman Divacky if (!First) OS << ", ";
5481f22ef01cSRoman Divacky First = false;
5482ff0cc061SDimitry Andric OS << '*' << Factor;
5483f22ef01cSRoman Divacky }
5484f22ef01cSRoman Divacky
5485ff0cc061SDimitry Andric for (Type *Ty : Types) {
5486f22ef01cSRoman Divacky if (!First) OS << ", ";
5487f22ef01cSRoman Divacky First = false;
5488ff0cc061SDimitry Andric OS << '(' << *Ty << ')';
5489f22ef01cSRoman Divacky }
5490f22ef01cSRoman Divacky OS << '\n';
5491f22ef01cSRoman Divacky }
5492f22ef01cSRoman Divacky
print_fixups(raw_ostream & OS) const5493f22ef01cSRoman Divacky void LSRInstance::print_fixups(raw_ostream &OS) const {
5494f22ef01cSRoman Divacky OS << "LSR is examining the following fixup sites:\n";
5495d88c1a5aSDimitry Andric for (const LSRUse &LU : Uses)
5496d88c1a5aSDimitry Andric for (const LSRFixup &LF : LU.Fixups) {
5497f22ef01cSRoman Divacky dbgs() << " ";
5498ff0cc061SDimitry Andric LF.print(OS);
5499f22ef01cSRoman Divacky OS << '\n';
5500f22ef01cSRoman Divacky }
5501f22ef01cSRoman Divacky }
5502f22ef01cSRoman Divacky
print_uses(raw_ostream & OS) const5503f22ef01cSRoman Divacky void LSRInstance::print_uses(raw_ostream &OS) const {
5504f22ef01cSRoman Divacky OS << "LSR is examining the following uses:\n";
5505ff0cc061SDimitry Andric for (const LSRUse &LU : Uses) {
5506f22ef01cSRoman Divacky dbgs() << " ";
5507f22ef01cSRoman Divacky LU.print(OS);
5508f22ef01cSRoman Divacky OS << '\n';
5509ff0cc061SDimitry Andric for (const Formula &F : LU.Formulae) {
5510f22ef01cSRoman Divacky OS << " ";
5511ff0cc061SDimitry Andric F.print(OS);
5512f22ef01cSRoman Divacky OS << '\n';
5513f22ef01cSRoman Divacky }
5514f22ef01cSRoman Divacky }
5515f22ef01cSRoman Divacky }
5516f22ef01cSRoman Divacky
print(raw_ostream & OS) const5517f22ef01cSRoman Divacky void LSRInstance::print(raw_ostream &OS) const {
5518f22ef01cSRoman Divacky print_factors_and_types(OS);
5519f22ef01cSRoman Divacky print_fixups(OS);
5520f22ef01cSRoman Divacky print_uses(OS);
5521f22ef01cSRoman Divacky }
5522f22ef01cSRoman Divacky
dump() const55237a7e6055SDimitry Andric LLVM_DUMP_METHOD void LSRInstance::dump() const {
5524f22ef01cSRoman Divacky print(errs()); errs() << '\n';
5525f22ef01cSRoman Divacky }
55267a7e6055SDimitry Andric #endif
5527f22ef01cSRoman Divacky
5528f22ef01cSRoman Divacky namespace {
5529f22ef01cSRoman Divacky
5530f22ef01cSRoman Divacky class LoopStrengthReduce : public LoopPass {
5531f22ef01cSRoman Divacky public:
5532f22ef01cSRoman Divacky static char ID; // Pass ID, replacement for typeid
5533d88c1a5aSDimitry Andric
5534139f7f9bSDimitry Andric LoopStrengthReduce();
5535f22ef01cSRoman Divacky
5536f22ef01cSRoman Divacky private:
553791bc56edSDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override;
553891bc56edSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
5539f22ef01cSRoman Divacky };
5540f22ef01cSRoman Divacky
5541d88c1a5aSDimitry Andric } // end anonymous namespace
5542f22ef01cSRoman Divacky
LoopStrengthReduce()5543139f7f9bSDimitry Andric LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
55442754fe60SDimitry Andric initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
55452754fe60SDimitry Andric }
5546f22ef01cSRoman Divacky
getAnalysisUsage(AnalysisUsage & AU) const5547f22ef01cSRoman Divacky void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
5548f22ef01cSRoman Divacky // We split critical edges, so we change the CFG. However, we do update
5549f22ef01cSRoman Divacky // many analyses if they are around.
5550f22ef01cSRoman Divacky AU.addPreservedID(LoopSimplifyID);
5551f22ef01cSRoman Divacky
5552ff0cc061SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
5553ff0cc061SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
5554f22ef01cSRoman Divacky AU.addRequiredID(LoopSimplifyID);
555591bc56edSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
555691bc56edSDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
55577d523365SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
55587d523365SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>();
55592754fe60SDimitry Andric // Requiring LoopSimplify a second time here prevents IVUsers from running
55602754fe60SDimitry Andric // twice, since LoopSimplify was invalidated by running ScalarEvolution.
55612754fe60SDimitry Andric AU.addRequiredID(LoopSimplifyID);
55623ca95b02SDimitry Andric AU.addRequired<IVUsersWrapperPass>();
55633ca95b02SDimitry Andric AU.addPreserved<IVUsersWrapperPass>();
5564ff0cc061SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
5565f22ef01cSRoman Divacky }
5566f22ef01cSRoman Divacky
ReduceLoopStrength(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI)5567d88c1a5aSDimitry Andric static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
5568d88c1a5aSDimitry Andric DominatorTree &DT, LoopInfo &LI,
5569d88c1a5aSDimitry Andric const TargetTransformInfo &TTI) {
5570f22ef01cSRoman Divacky bool Changed = false;
5571f22ef01cSRoman Divacky
5572f22ef01cSRoman Divacky // Run the main LSR transformation.
55737d523365SDimitry Andric Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5574f22ef01cSRoman Divacky
5575dff0c46cSDimitry Andric // Remove any extra phis created by processing inner loops.
5576f22ef01cSRoman Divacky Changed |= DeleteDeadPHIs(L->getHeader());
5577139f7f9bSDimitry Andric if (EnablePhiElim && L->isLoopSimplifyForm()) {
5578f37b6182SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts;
5579ff0cc061SDimitry Andric const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5580d88c1a5aSDimitry Andric SCEVExpander Rewriter(SE, DL, "lsr");
5581dff0c46cSDimitry Andric #ifndef NDEBUG
5582dff0c46cSDimitry Andric Rewriter.setDebugType(DEBUG_TYPE);
5583dff0c46cSDimitry Andric #endif
5584d88c1a5aSDimitry Andric unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
5585dff0c46cSDimitry Andric if (numFolded) {
5586dff0c46cSDimitry Andric Changed = true;
5587dff0c46cSDimitry Andric DeleteTriviallyDeadInstructions(DeadInsts);
5588dff0c46cSDimitry Andric DeleteDeadPHIs(L->getHeader());
5589dff0c46cSDimitry Andric }
5590dff0c46cSDimitry Andric }
5591f22ef01cSRoman Divacky return Changed;
5592f22ef01cSRoman Divacky }
5593d88c1a5aSDimitry Andric
runOnLoop(Loop * L,LPPassManager &)5594d88c1a5aSDimitry Andric bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
5595d88c1a5aSDimitry Andric if (skipLoop(L))
5596d88c1a5aSDimitry Andric return false;
5597d88c1a5aSDimitry Andric
5598d88c1a5aSDimitry Andric auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
5599d88c1a5aSDimitry Andric auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5600d88c1a5aSDimitry Andric auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5601d88c1a5aSDimitry Andric auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5602d88c1a5aSDimitry Andric const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5603d88c1a5aSDimitry Andric *L->getHeader()->getParent());
5604d88c1a5aSDimitry Andric return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
5605d88c1a5aSDimitry Andric }
5606d88c1a5aSDimitry Andric
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)5607f1a29dd3SDimitry Andric PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
5608f1a29dd3SDimitry Andric LoopStandardAnalysisResults &AR,
5609f1a29dd3SDimitry Andric LPMUpdater &) {
5610f1a29dd3SDimitry Andric if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
5611f1a29dd3SDimitry Andric AR.DT, AR.LI, AR.TTI))
5612d88c1a5aSDimitry Andric return PreservedAnalyses::all();
5613d88c1a5aSDimitry Andric
5614d88c1a5aSDimitry Andric return getLoopPassPreservedAnalyses();
5615d88c1a5aSDimitry Andric }
5616d88c1a5aSDimitry Andric
5617d88c1a5aSDimitry Andric char LoopStrengthReduce::ID = 0;
56182cab237bSDimitry Andric
5619d88c1a5aSDimitry Andric INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
5620d88c1a5aSDimitry Andric "Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)5621d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
5622d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5623d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
5624d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
5625d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
5626d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
5627d88c1a5aSDimitry Andric INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
5628d88c1a5aSDimitry Andric "Loop Strength Reduction", false, false)
5629d88c1a5aSDimitry Andric
5630d88c1a5aSDimitry Andric Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
5631