1f22ef01cSRoman Divacky //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky //                     The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This transformation analyzes and transforms the induction variables (and
11f22ef01cSRoman Divacky // computations derived from them) into forms suitable for efficient execution
12f22ef01cSRoman Divacky // on the target.
13f22ef01cSRoman Divacky //
14f22ef01cSRoman Divacky // This pass performs a strength reduction on array references inside loops that
15f22ef01cSRoman Divacky // have as one or more of their components the loop induction variable, it
16f22ef01cSRoman Divacky // rewrites expressions to take advantage of scaled-index addressing modes
17f22ef01cSRoman Divacky // available on the target, and it performs a variety of other optimizations
18f22ef01cSRoman Divacky // related to loop induction variables.
19f22ef01cSRoman Divacky //
20f22ef01cSRoman Divacky // Terminology note: this code has a lot of handling for "post-increment" or
21f22ef01cSRoman Divacky // "post-inc" users. This is not talking about post-increment addressing modes;
22f22ef01cSRoman Divacky // it is instead talking about code like this:
23f22ef01cSRoman Divacky //
24f22ef01cSRoman Divacky //   %i = phi [ 0, %entry ], [ %i.next, %latch ]
25f22ef01cSRoman Divacky //   ...
26f22ef01cSRoman Divacky //   %i.next = add %i, 1
27f22ef01cSRoman Divacky //   %c = icmp eq %i.next, %n
28f22ef01cSRoman Divacky //
29f22ef01cSRoman Divacky // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30f22ef01cSRoman Divacky // it's useful to think about these as the same register, with some uses using
31ff0cc061SDimitry Andric // the value of the register before the add and some using it after. In this
32f22ef01cSRoman Divacky // example, the icmp is a post-increment user, since it uses %i.next, which is
33f22ef01cSRoman Divacky // the value of the induction variable after the increment. The other common
34f22ef01cSRoman Divacky // case of post-increment users is users outside the loop.
35f22ef01cSRoman Divacky //
36f22ef01cSRoman Divacky // TODO: More sophistication in the way Formulae are generated and filtered.
37f22ef01cSRoman Divacky //
38f22ef01cSRoman Divacky // TODO: Handle multiple loops at a time.
39f22ef01cSRoman Divacky //
40139f7f9bSDimitry Andric // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41139f7f9bSDimitry Andric //       of a GlobalValue?
42f22ef01cSRoman Divacky //
43f22ef01cSRoman Divacky // TODO: When truncation is free, truncate ICmp users' operands to make it a
44f22ef01cSRoman Divacky //       smaller encoding (on x86 at least).
45f22ef01cSRoman Divacky //
46f22ef01cSRoman Divacky // TODO: When a negated register is used by an add (such as in a list of
47f22ef01cSRoman Divacky //       multiple base registers, or as the increment expression in an addrec),
48f22ef01cSRoman Divacky //       we may not actually need both reg and (-1 * reg) in registers; the
49f22ef01cSRoman Divacky //       negation can be implemented by using a sub instead of an add. The
50f22ef01cSRoman Divacky //       lack of support for taking this into consideration when making
51f22ef01cSRoman Divacky //       register pressure decisions is partly worked around by the "Special"
52f22ef01cSRoman Divacky //       use kind.
53f22ef01cSRoman Divacky //
54f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
55f22ef01cSRoman Divacky 
56d88c1a5aSDimitry Andric #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
57d88c1a5aSDimitry Andric #include "llvm/ADT/APInt.h"
58d88c1a5aSDimitry Andric #include "llvm/ADT/DenseMap.h"
59139f7f9bSDimitry Andric #include "llvm/ADT/DenseSet.h"
6091bc56edSDimitry Andric #include "llvm/ADT/Hashing.h"
61d88c1a5aSDimitry Andric #include "llvm/ADT/PointerIntPair.h"
62f1a29dd3SDimitry Andric #include "llvm/ADT/STLExtras.h"
63139f7f9bSDimitry Andric #include "llvm/ADT/SetVector.h"
64139f7f9bSDimitry Andric #include "llvm/ADT/SmallBitVector.h"
65d88c1a5aSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
66d88c1a5aSDimitry Andric #include "llvm/ADT/SmallSet.h"
67d88c1a5aSDimitry Andric #include "llvm/ADT/SmallVector.h"
682cab237bSDimitry Andric #include "llvm/ADT/iterator_range.h"
69139f7f9bSDimitry Andric #include "llvm/Analysis/IVUsers.h"
702cab237bSDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
71d88c1a5aSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
72f22ef01cSRoman Divacky #include "llvm/Analysis/LoopPass.h"
73d88c1a5aSDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
74f22ef01cSRoman Divacky #include "llvm/Analysis/ScalarEvolutionExpander.h"
75d88c1a5aSDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
76d88c1a5aSDimitry Andric #include "llvm/Analysis/ScalarEvolutionNormalization.h"
77139f7f9bSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
784ba319b5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
794ba319b5SDimitry Andric #include "llvm/Config/llvm-config.h"
80d88c1a5aSDimitry Andric #include "llvm/IR/BasicBlock.h"
81d88c1a5aSDimitry Andric #include "llvm/IR/Constant.h"
82139f7f9bSDimitry Andric #include "llvm/IR/Constants.h"
83139f7f9bSDimitry Andric #include "llvm/IR/DerivedTypes.h"
8491bc56edSDimitry Andric #include "llvm/IR/Dominators.h"
85d88c1a5aSDimitry Andric #include "llvm/IR/GlobalValue.h"
86f1a29dd3SDimitry Andric #include "llvm/IR/IRBuilder.h"
872cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
88d88c1a5aSDimitry Andric #include "llvm/IR/Instruction.h"
89139f7f9bSDimitry Andric #include "llvm/IR/Instructions.h"
90139f7f9bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
912cab237bSDimitry Andric #include "llvm/IR/Intrinsics.h"
92f1a29dd3SDimitry Andric #include "llvm/IR/Module.h"
93d88c1a5aSDimitry Andric #include "llvm/IR/OperandTraits.h"
94d88c1a5aSDimitry Andric #include "llvm/IR/Operator.h"
952cab237bSDimitry Andric #include "llvm/IR/PassManager.h"
96d88c1a5aSDimitry Andric #include "llvm/IR/Type.h"
972cab237bSDimitry Andric #include "llvm/IR/Use.h"
982cab237bSDimitry Andric #include "llvm/IR/User.h"
99d88c1a5aSDimitry Andric #include "llvm/IR/Value.h"
10091bc56edSDimitry Andric #include "llvm/IR/ValueHandle.h"
101d88c1a5aSDimitry Andric #include "llvm/Pass.h"
102d88c1a5aSDimitry Andric #include "llvm/Support/Casting.h"
1036122f3e6SDimitry Andric #include "llvm/Support/CommandLine.h"
104d88c1a5aSDimitry Andric #include "llvm/Support/Compiler.h"
105139f7f9bSDimitry Andric #include "llvm/Support/Debug.h"
106d88c1a5aSDimitry Andric #include "llvm/Support/ErrorHandling.h"
107d88c1a5aSDimitry Andric #include "llvm/Support/MathExtras.h"
108f22ef01cSRoman Divacky #include "llvm/Support/raw_ostream.h"
109d88c1a5aSDimitry Andric #include "llvm/Transforms/Scalar.h"
1104ba319b5SDimitry Andric #include "llvm/Transforms/Utils.h"
111139f7f9bSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
112f22ef01cSRoman Divacky #include <algorithm>
113d88c1a5aSDimitry Andric #include <cassert>
114d88c1a5aSDimitry Andric #include <cstddef>
115d88c1a5aSDimitry Andric #include <cstdint>
116d88c1a5aSDimitry Andric #include <cstdlib>
117d88c1a5aSDimitry Andric #include <iterator>
1182cab237bSDimitry Andric #include <limits>
119d88c1a5aSDimitry Andric #include <map>
120d88c1a5aSDimitry Andric #include <utility>
121d88c1a5aSDimitry Andric 
122f22ef01cSRoman Divacky using namespace llvm;
123f22ef01cSRoman Divacky 
12491bc56edSDimitry Andric #define DEBUG_TYPE "loop-reduce"
12591bc56edSDimitry Andric 
1264ba319b5SDimitry Andric /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
127cb4dff85SDimitry Andric /// bail out. This threshold is far beyond the number of users that LSR can
128cb4dff85SDimitry Andric /// conceivably solve, so it should not affect generated code, but catches the
129cb4dff85SDimitry Andric /// worst cases before LSR burns too much compile time and stack space.
130cb4dff85SDimitry Andric static const unsigned MaxIVUsers = 200;
131cb4dff85SDimitry Andric 
1326122f3e6SDimitry Andric // Temporary flag to cleanup congruent phis after LSR phi expansion.
1336122f3e6SDimitry Andric // It's currently disabled until we can determine whether it's truly useful or
1346122f3e6SDimitry Andric // not. The flag should be removed after the v3.0 release.
135dff0c46cSDimitry Andric // This is now needed for ivchains.
136dff0c46cSDimitry Andric static cl::opt<bool> EnablePhiElim(
137dff0c46cSDimitry Andric   "enable-lsr-phielim", cl::Hidden, cl::init(true),
138dff0c46cSDimitry Andric   cl::desc("Enable LSR phi elimination"));
139dff0c46cSDimitry Andric 
1407a7e6055SDimitry Andric // The flag adds instruction count to solutions cost comparision.
1417a7e6055SDimitry Andric static cl::opt<bool> InsnsCost(
1422cab237bSDimitry Andric   "lsr-insns-cost", cl::Hidden, cl::init(true),
1437a7e6055SDimitry Andric   cl::desc("Add instruction count to a LSR cost model"));
1447a7e6055SDimitry Andric 
1457a7e6055SDimitry Andric // Flag to choose how to narrow complex lsr solution
1467a7e6055SDimitry Andric static cl::opt<bool> LSRExpNarrow(
1477a7e6055SDimitry Andric   "lsr-exp-narrow", cl::Hidden, cl::init(false),
1487a7e6055SDimitry Andric   cl::desc("Narrow LSR complex solution using"
1497a7e6055SDimitry Andric            " expectation of registers number"));
1507a7e6055SDimitry Andric 
151c4394386SDimitry Andric // Flag to narrow search space by filtering non-optimal formulae with
152c4394386SDimitry Andric // the same ScaledReg and Scale.
153c4394386SDimitry Andric static cl::opt<bool> FilterSameScaledReg(
154c4394386SDimitry Andric     "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
155c4394386SDimitry Andric     cl::desc("Narrow LSR search space by filtering non-optimal formulae"
156c4394386SDimitry Andric              " with the same ScaledReg and Scale"));
157c4394386SDimitry Andric 
158*b5893f02SDimitry Andric static cl::opt<unsigned> ComplexityLimit(
159*b5893f02SDimitry Andric   "lsr-complexity-limit", cl::Hidden,
160*b5893f02SDimitry Andric   cl::init(std::numeric_limits<uint16_t>::max()),
161*b5893f02SDimitry Andric   cl::desc("LSR search space complexity limit"));
162*b5893f02SDimitry Andric 
163dff0c46cSDimitry Andric #ifndef NDEBUG
164dff0c46cSDimitry Andric // Stress test IV chain generation.
165dff0c46cSDimitry Andric static cl::opt<bool> StressIVChain(
166dff0c46cSDimitry Andric   "stress-ivchain", cl::Hidden, cl::init(false),
167dff0c46cSDimitry Andric   cl::desc("Stress test LSR IV chains"));
168dff0c46cSDimitry Andric #else
169dff0c46cSDimitry Andric static bool StressIVChain = false;
170dff0c46cSDimitry Andric #endif
1716122f3e6SDimitry Andric 
172f22ef01cSRoman Divacky namespace {
173f22ef01cSRoman Divacky 
1747d523365SDimitry Andric struct MemAccessTy {
1757d523365SDimitry Andric   /// Used in situations where the accessed memory type is unknown.
1762cab237bSDimitry Andric   static const unsigned UnknownAddressSpace =
1772cab237bSDimitry Andric       std::numeric_limits<unsigned>::max();
1787d523365SDimitry Andric 
1792cab237bSDimitry Andric   Type *MemTy = nullptr;
1802cab237bSDimitry Andric   unsigned AddrSpace = UnknownAddressSpace;
1817d523365SDimitry Andric 
1822cab237bSDimitry Andric   MemAccessTy() = default;
MemAccessTy__anon244971d90111::MemAccessTy1832cab237bSDimitry Andric   MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
1847d523365SDimitry Andric 
operator ==__anon244971d90111::MemAccessTy1857d523365SDimitry Andric   bool operator==(MemAccessTy Other) const {
1867d523365SDimitry Andric     return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
1877d523365SDimitry Andric   }
1887d523365SDimitry Andric 
operator !=__anon244971d90111::MemAccessTy1897d523365SDimitry Andric   bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
1907d523365SDimitry Andric 
getUnknown__anon244971d90111::MemAccessTy191899ca3d6SDimitry Andric   static MemAccessTy getUnknown(LLVMContext &Ctx,
192899ca3d6SDimitry Andric                                 unsigned AS = UnknownAddressSpace) {
193899ca3d6SDimitry Andric     return MemAccessTy(Type::getVoidTy(Ctx), AS);
1947d523365SDimitry Andric   }
1954ba319b5SDimitry Andric 
getType__anon244971d90111::MemAccessTy1964ba319b5SDimitry Andric   Type *getType() { return MemTy; }
1977d523365SDimitry Andric };
1987d523365SDimitry Andric 
1997d523365SDimitry Andric /// This class holds data which is used to order reuse candidates.
200f22ef01cSRoman Divacky class RegSortData {
201f22ef01cSRoman Divacky public:
2027d523365SDimitry Andric   /// This represents the set of LSRUse indices which reference
203f22ef01cSRoman Divacky   /// a particular register.
204f22ef01cSRoman Divacky   SmallBitVector UsedByIndices;
205f22ef01cSRoman Divacky 
206f22ef01cSRoman Divacky   void print(raw_ostream &OS) const;
207f22ef01cSRoman Divacky   void dump() const;
208f22ef01cSRoman Divacky };
209f22ef01cSRoman Divacky 
210d88c1a5aSDimitry Andric } // end anonymous namespace
211f22ef01cSRoman Divacky 
2122cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const213f22ef01cSRoman Divacky void RegSortData::print(raw_ostream &OS) const {
214f22ef01cSRoman Divacky   OS << "[NumUses=" << UsedByIndices.count() << ']';
215f22ef01cSRoman Divacky }
216f22ef01cSRoman Divacky 
dump() const2177a7e6055SDimitry Andric LLVM_DUMP_METHOD void RegSortData::dump() const {
218f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
219f22ef01cSRoman Divacky }
2207a7e6055SDimitry Andric #endif
221f22ef01cSRoman Divacky 
222f22ef01cSRoman Divacky namespace {
223f22ef01cSRoman Divacky 
2247d523365SDimitry Andric /// Map register candidates to information about how they are used.
225f22ef01cSRoman Divacky class RegUseTracker {
2262cab237bSDimitry Andric   using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
227f22ef01cSRoman Divacky 
228f22ef01cSRoman Divacky   RegUsesTy RegUsesMap;
229f22ef01cSRoman Divacky   SmallVector<const SCEV *, 16> RegSequence;
230f22ef01cSRoman Divacky 
231f22ef01cSRoman Divacky public:
2327d523365SDimitry Andric   void countRegister(const SCEV *Reg, size_t LUIdx);
2337d523365SDimitry Andric   void dropRegister(const SCEV *Reg, size_t LUIdx);
2347d523365SDimitry Andric   void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
235f22ef01cSRoman Divacky 
236f22ef01cSRoman Divacky   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
237f22ef01cSRoman Divacky 
238f22ef01cSRoman Divacky   const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
239f22ef01cSRoman Divacky 
240f22ef01cSRoman Divacky   void clear();
241f22ef01cSRoman Divacky 
2422cab237bSDimitry Andric   using iterator = SmallVectorImpl<const SCEV *>::iterator;
2432cab237bSDimitry Andric   using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
2442cab237bSDimitry Andric 
begin()245f22ef01cSRoman Divacky   iterator begin() { return RegSequence.begin(); }
end()246f22ef01cSRoman Divacky   iterator end()   { return RegSequence.end(); }
begin() const247f22ef01cSRoman Divacky   const_iterator begin() const { return RegSequence.begin(); }
end() const248f22ef01cSRoman Divacky   const_iterator end() const   { return RegSequence.end(); }
249f22ef01cSRoman Divacky };
250f22ef01cSRoman Divacky 
251d88c1a5aSDimitry Andric } // end anonymous namespace
252f22ef01cSRoman Divacky 
253f22ef01cSRoman Divacky void
countRegister(const SCEV * Reg,size_t LUIdx)2547d523365SDimitry Andric RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
255f22ef01cSRoman Divacky   std::pair<RegUsesTy::iterator, bool> Pair =
256f22ef01cSRoman Divacky     RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
257f22ef01cSRoman Divacky   RegSortData &RSD = Pair.first->second;
258f22ef01cSRoman Divacky   if (Pair.second)
259f22ef01cSRoman Divacky     RegSequence.push_back(Reg);
260f22ef01cSRoman Divacky   RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
261f22ef01cSRoman Divacky   RSD.UsedByIndices.set(LUIdx);
262f22ef01cSRoman Divacky }
263f22ef01cSRoman Divacky 
264f22ef01cSRoman Divacky void
dropRegister(const SCEV * Reg,size_t LUIdx)2657d523365SDimitry Andric RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
266f22ef01cSRoman Divacky   RegUsesTy::iterator It = RegUsesMap.find(Reg);
267f22ef01cSRoman Divacky   assert(It != RegUsesMap.end());
268f22ef01cSRoman Divacky   RegSortData &RSD = It->second;
269f22ef01cSRoman Divacky   assert(RSD.UsedByIndices.size() > LUIdx);
270f22ef01cSRoman Divacky   RSD.UsedByIndices.reset(LUIdx);
271f22ef01cSRoman Divacky }
272f22ef01cSRoman Divacky 
273f22ef01cSRoman Divacky void
swapAndDropUse(size_t LUIdx,size_t LastLUIdx)2747d523365SDimitry Andric RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
2752754fe60SDimitry Andric   assert(LUIdx <= LastLUIdx);
2762754fe60SDimitry Andric 
2772754fe60SDimitry Andric   // Update RegUses. The data structure is not optimized for this purpose;
2782754fe60SDimitry Andric   // we must iterate through it and update each of the bit vectors.
279ff0cc061SDimitry Andric   for (auto &Pair : RegUsesMap) {
280ff0cc061SDimitry Andric     SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
2812754fe60SDimitry Andric     if (LUIdx < UsedByIndices.size())
2822754fe60SDimitry Andric       UsedByIndices[LUIdx] =
283d88c1a5aSDimitry Andric         LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
2842754fe60SDimitry Andric     UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
2852754fe60SDimitry Andric   }
286f22ef01cSRoman Divacky }
287f22ef01cSRoman Divacky 
288f22ef01cSRoman Divacky bool
isRegUsedByUsesOtherThan(const SCEV * Reg,size_t LUIdx) const289f22ef01cSRoman Divacky RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
290e580952dSDimitry Andric   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
291e580952dSDimitry Andric   if (I == RegUsesMap.end())
292e580952dSDimitry Andric     return false;
293e580952dSDimitry Andric   const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
294f22ef01cSRoman Divacky   int i = UsedByIndices.find_first();
295f22ef01cSRoman Divacky   if (i == -1) return false;
296f22ef01cSRoman Divacky   if ((size_t)i != LUIdx) return true;
297f22ef01cSRoman Divacky   return UsedByIndices.find_next(i) != -1;
298f22ef01cSRoman Divacky }
299f22ef01cSRoman Divacky 
getUsedByIndices(const SCEV * Reg) const300f22ef01cSRoman Divacky const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
301f22ef01cSRoman Divacky   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
302f22ef01cSRoman Divacky   assert(I != RegUsesMap.end() && "Unknown register!");
303f22ef01cSRoman Divacky   return I->second.UsedByIndices;
304f22ef01cSRoman Divacky }
305f22ef01cSRoman Divacky 
clear()306f22ef01cSRoman Divacky void RegUseTracker::clear() {
307f22ef01cSRoman Divacky   RegUsesMap.clear();
308f22ef01cSRoman Divacky   RegSequence.clear();
309f22ef01cSRoman Divacky }
310f22ef01cSRoman Divacky 
311f22ef01cSRoman Divacky namespace {
312f22ef01cSRoman Divacky 
3137d523365SDimitry Andric /// This class holds information that describes a formula for computing
3147d523365SDimitry Andric /// satisfying a use. It may include broken-out immediates and scaled registers.
315f22ef01cSRoman Divacky struct Formula {
316139f7f9bSDimitry Andric   /// Global base address used for complex addressing.
3172cab237bSDimitry Andric   GlobalValue *BaseGV = nullptr;
318139f7f9bSDimitry Andric 
319139f7f9bSDimitry Andric   /// Base offset for complex addressing.
3202cab237bSDimitry Andric   int64_t BaseOffset = 0;
321139f7f9bSDimitry Andric 
322139f7f9bSDimitry Andric   /// Whether any complex addressing has a base register.
3232cab237bSDimitry Andric   bool HasBaseReg = false;
324139f7f9bSDimitry Andric 
325139f7f9bSDimitry Andric   /// The scale of any complex addressing.
3262cab237bSDimitry Andric   int64_t Scale = 0;
327f22ef01cSRoman Divacky 
3287d523365SDimitry Andric   /// The list of "base" registers for this use. When this is non-empty. The
3297d523365SDimitry Andric   /// canonical representation of a formula is
33091bc56edSDimitry Andric   /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
33191bc56edSDimitry Andric   /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
3327a7e6055SDimitry Andric   /// 3. The reg containing recurrent expr related with currect loop in the
3337a7e6055SDimitry Andric   /// formula should be put in the ScaledReg.
33491bc56edSDimitry Andric   /// #1 enforces that the scaled register is always used when at least two
33591bc56edSDimitry Andric   /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
33691bc56edSDimitry Andric   /// #2 enforces that 1 * reg is reg.
3377a7e6055SDimitry Andric   /// #3 ensures invariant regs with respect to current loop can be combined
3387a7e6055SDimitry Andric   /// together in LSR codegen.
3394ba319b5SDimitry Andric   /// This invariant can be temporarily broken while building a formula.
34091bc56edSDimitry Andric   /// However, every formula inserted into the LSRInstance must be in canonical
34191bc56edSDimitry Andric   /// form.
342139f7f9bSDimitry Andric   SmallVector<const SCEV *, 4> BaseRegs;
343f22ef01cSRoman Divacky 
3447d523365SDimitry Andric   /// The 'scaled' register for this use. This should be non-null when Scale is
3457d523365SDimitry Andric   /// not zero.
3462cab237bSDimitry Andric   const SCEV *ScaledReg = nullptr;
347f22ef01cSRoman Divacky 
3487d523365SDimitry Andric   /// An additional constant offset which added near the use. This requires a
3497d523365SDimitry Andric   /// temporary register, but the offset itself can live in an add immediate
3507d523365SDimitry Andric   /// field rather than a register.
3512cab237bSDimitry Andric   int64_t UnfoldedOffset = 0;
352bd5abe19SDimitry Andric 
3532cab237bSDimitry Andric   Formula() = default;
354f22ef01cSRoman Divacky 
3557d523365SDimitry Andric   void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
356f22ef01cSRoman Divacky 
3577a7e6055SDimitry Andric   bool isCanonical(const Loop &L) const;
35891bc56edSDimitry Andric 
3597a7e6055SDimitry Andric   void canonicalize(const Loop &L);
36091bc56edSDimitry Andric 
3617d523365SDimitry Andric   bool unscale();
36291bc56edSDimitry Andric 
3637a7e6055SDimitry Andric   bool hasZeroEnd() const;
3647a7e6055SDimitry Andric 
36591bc56edSDimitry Andric   size_t getNumRegs() const;
3666122f3e6SDimitry Andric   Type *getType() const;
367f22ef01cSRoman Divacky 
3687d523365SDimitry Andric   void deleteBaseReg(const SCEV *&S);
369f22ef01cSRoman Divacky 
370f22ef01cSRoman Divacky   bool referencesReg(const SCEV *S) const;
371f22ef01cSRoman Divacky   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
372f22ef01cSRoman Divacky                                   const RegUseTracker &RegUses) const;
373f22ef01cSRoman Divacky 
374f22ef01cSRoman Divacky   void print(raw_ostream &OS) const;
375f22ef01cSRoman Divacky   void dump() const;
376f22ef01cSRoman Divacky };
377f22ef01cSRoman Divacky 
378d88c1a5aSDimitry Andric } // end anonymous namespace
379f22ef01cSRoman Divacky 
3807d523365SDimitry Andric /// Recursion helper for initialMatch.
DoInitialMatch(const SCEV * S,Loop * L,SmallVectorImpl<const SCEV * > & Good,SmallVectorImpl<const SCEV * > & Bad,ScalarEvolution & SE)381f22ef01cSRoman Divacky static void DoInitialMatch(const SCEV *S, Loop *L,
382f22ef01cSRoman Divacky                            SmallVectorImpl<const SCEV *> &Good,
383f22ef01cSRoman Divacky                            SmallVectorImpl<const SCEV *> &Bad,
3842754fe60SDimitry Andric                            ScalarEvolution &SE) {
385f22ef01cSRoman Divacky   // Collect expressions which properly dominate the loop header.
3862754fe60SDimitry Andric   if (SE.properlyDominates(S, L->getHeader())) {
387f22ef01cSRoman Divacky     Good.push_back(S);
388f22ef01cSRoman Divacky     return;
389f22ef01cSRoman Divacky   }
390f22ef01cSRoman Divacky 
391f22ef01cSRoman Divacky   // Look at add operands.
392f22ef01cSRoman Divacky   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
393ff0cc061SDimitry Andric     for (const SCEV *S : Add->operands())
394ff0cc061SDimitry Andric       DoInitialMatch(S, L, Good, Bad, SE);
395f22ef01cSRoman Divacky     return;
396f22ef01cSRoman Divacky   }
397f22ef01cSRoman Divacky 
398f22ef01cSRoman Divacky   // Look at addrec operands.
399f22ef01cSRoman Divacky   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
400d88c1a5aSDimitry Andric     if (!AR->getStart()->isZero() && AR->isAffine()) {
4012754fe60SDimitry Andric       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
402f22ef01cSRoman Divacky       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
403f22ef01cSRoman Divacky                                       AR->getStepRecurrence(SE),
4043b0f4066SDimitry Andric                                       // FIXME: AR->getNoWrapFlags()
4053b0f4066SDimitry Andric                                       AR->getLoop(), SCEV::FlagAnyWrap),
4062754fe60SDimitry Andric                      L, Good, Bad, SE);
407f22ef01cSRoman Divacky       return;
408f22ef01cSRoman Divacky     }
409f22ef01cSRoman Divacky 
410f22ef01cSRoman Divacky   // Handle a multiplication by -1 (negation) if it didn't fold.
411f22ef01cSRoman Divacky   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
412f22ef01cSRoman Divacky     if (Mul->getOperand(0)->isAllOnesValue()) {
413f22ef01cSRoman Divacky       SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
414f22ef01cSRoman Divacky       const SCEV *NewMul = SE.getMulExpr(Ops);
415f22ef01cSRoman Divacky 
416f22ef01cSRoman Divacky       SmallVector<const SCEV *, 4> MyGood;
417f22ef01cSRoman Divacky       SmallVector<const SCEV *, 4> MyBad;
4182754fe60SDimitry Andric       DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
419f22ef01cSRoman Divacky       const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
420f22ef01cSRoman Divacky         SE.getEffectiveSCEVType(NewMul->getType())));
421ff0cc061SDimitry Andric       for (const SCEV *S : MyGood)
422ff0cc061SDimitry Andric         Good.push_back(SE.getMulExpr(NegOne, S));
423ff0cc061SDimitry Andric       for (const SCEV *S : MyBad)
424ff0cc061SDimitry Andric         Bad.push_back(SE.getMulExpr(NegOne, S));
425f22ef01cSRoman Divacky       return;
426f22ef01cSRoman Divacky     }
427f22ef01cSRoman Divacky 
428f22ef01cSRoman Divacky   // Ok, we can't do anything interesting. Just stuff the whole thing into a
429f22ef01cSRoman Divacky   // register and hope for the best.
430f22ef01cSRoman Divacky   Bad.push_back(S);
431f22ef01cSRoman Divacky }
432f22ef01cSRoman Divacky 
4337d523365SDimitry Andric /// Incorporate loop-variant parts of S into this Formula, attempting to keep
4347d523365SDimitry Andric /// all loop-invariant and loop-computable values in a single base register.
initialMatch(const SCEV * S,Loop * L,ScalarEvolution & SE)4357d523365SDimitry Andric void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
436f22ef01cSRoman Divacky   SmallVector<const SCEV *, 4> Good;
437f22ef01cSRoman Divacky   SmallVector<const SCEV *, 4> Bad;
4382754fe60SDimitry Andric   DoInitialMatch(S, L, Good, Bad, SE);
439f22ef01cSRoman Divacky   if (!Good.empty()) {
440f22ef01cSRoman Divacky     const SCEV *Sum = SE.getAddExpr(Good);
441f22ef01cSRoman Divacky     if (!Sum->isZero())
442f22ef01cSRoman Divacky       BaseRegs.push_back(Sum);
443139f7f9bSDimitry Andric     HasBaseReg = true;
444f22ef01cSRoman Divacky   }
445f22ef01cSRoman Divacky   if (!Bad.empty()) {
446f22ef01cSRoman Divacky     const SCEV *Sum = SE.getAddExpr(Bad);
447f22ef01cSRoman Divacky     if (!Sum->isZero())
448f22ef01cSRoman Divacky       BaseRegs.push_back(Sum);
449139f7f9bSDimitry Andric     HasBaseReg = true;
450f22ef01cSRoman Divacky   }
4517a7e6055SDimitry Andric   canonicalize(*L);
45291bc56edSDimitry Andric }
45391bc56edSDimitry Andric 
4544ba319b5SDimitry Andric /// Check whether or not this formula satisfies the canonical
45591bc56edSDimitry Andric /// representation.
45691bc56edSDimitry Andric /// \see Formula::BaseRegs.
isCanonical(const Loop & L) const4577a7e6055SDimitry Andric bool Formula::isCanonical(const Loop &L) const {
4587a7e6055SDimitry Andric   if (!ScaledReg)
45991bc56edSDimitry Andric     return BaseRegs.size() <= 1;
4607a7e6055SDimitry Andric 
4617a7e6055SDimitry Andric   if (Scale != 1)
4627a7e6055SDimitry Andric     return true;
4637a7e6055SDimitry Andric 
4647a7e6055SDimitry Andric   if (Scale == 1 && BaseRegs.empty())
4657a7e6055SDimitry Andric     return false;
4667a7e6055SDimitry Andric 
4677a7e6055SDimitry Andric   const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
4687a7e6055SDimitry Andric   if (SAR && SAR->getLoop() == &L)
4697a7e6055SDimitry Andric     return true;
4707a7e6055SDimitry Andric 
4717a7e6055SDimitry Andric   // If ScaledReg is not a recurrent expr, or it is but its loop is not current
4727a7e6055SDimitry Andric   // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
4737a7e6055SDimitry Andric   // loop, we want to swap the reg in BaseRegs with ScaledReg.
4747a7e6055SDimitry Andric   auto I =
4757a7e6055SDimitry Andric       find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) {
4767a7e6055SDimitry Andric         return isa<const SCEVAddRecExpr>(S) &&
4777a7e6055SDimitry Andric                (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
4787a7e6055SDimitry Andric       });
4797a7e6055SDimitry Andric   return I == BaseRegs.end();
48091bc56edSDimitry Andric }
48191bc56edSDimitry Andric 
4824ba319b5SDimitry Andric /// Helper method to morph a formula into its canonical representation.
48391bc56edSDimitry Andric /// \see Formula::BaseRegs.
48491bc56edSDimitry Andric /// Every formula having more than one base register, must use the ScaledReg
48591bc56edSDimitry Andric /// field. Otherwise, we would have to do special cases everywhere in LSR
48691bc56edSDimitry Andric /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
48791bc56edSDimitry Andric /// On the other hand, 1*reg should be canonicalized into reg.
canonicalize(const Loop & L)4887a7e6055SDimitry Andric void Formula::canonicalize(const Loop &L) {
4897a7e6055SDimitry Andric   if (isCanonical(L))
49091bc56edSDimitry Andric     return;
49191bc56edSDimitry Andric   // So far we did not need this case. This is easy to implement but it is
49291bc56edSDimitry Andric   // useless to maintain dead code. Beside it could hurt compile time.
49391bc56edSDimitry Andric   assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
4947a7e6055SDimitry Andric 
49591bc56edSDimitry Andric   // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
4967a7e6055SDimitry Andric   if (!ScaledReg) {
49791bc56edSDimitry Andric     ScaledReg = BaseRegs.back();
49891bc56edSDimitry Andric     BaseRegs.pop_back();
49991bc56edSDimitry Andric     Scale = 1;
5007a7e6055SDimitry Andric   }
5017a7e6055SDimitry Andric 
5027a7e6055SDimitry Andric   // If ScaledReg is an invariant with respect to L, find the reg from
5037a7e6055SDimitry Andric   // BaseRegs containing the recurrent expr related with Loop L. Swap the
5047a7e6055SDimitry Andric   // reg with ScaledReg.
5057a7e6055SDimitry Andric   const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
5067a7e6055SDimitry Andric   if (!SAR || SAR->getLoop() != &L) {
5077a7e6055SDimitry Andric     auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()),
5087a7e6055SDimitry Andric                      [&](const SCEV *S) {
5097a7e6055SDimitry Andric                        return isa<const SCEVAddRecExpr>(S) &&
5107a7e6055SDimitry Andric                               (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
5117a7e6055SDimitry Andric                      });
5127a7e6055SDimitry Andric     if (I != BaseRegs.end())
5137a7e6055SDimitry Andric       std::swap(ScaledReg, *I);
5147a7e6055SDimitry Andric   }
51591bc56edSDimitry Andric }
51691bc56edSDimitry Andric 
5174ba319b5SDimitry Andric /// Get rid of the scale in the formula.
51891bc56edSDimitry Andric /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
51991bc56edSDimitry Andric /// \return true if it was possible to get rid of the scale, false otherwise.
52091bc56edSDimitry Andric /// \note After this operation the formula may not be in the canonical form.
unscale()5217d523365SDimitry Andric bool Formula::unscale() {
52291bc56edSDimitry Andric   if (Scale != 1)
52391bc56edSDimitry Andric     return false;
52491bc56edSDimitry Andric   Scale = 0;
52591bc56edSDimitry Andric   BaseRegs.push_back(ScaledReg);
52691bc56edSDimitry Andric   ScaledReg = nullptr;
52791bc56edSDimitry Andric   return true;
528f22ef01cSRoman Divacky }
529f22ef01cSRoman Divacky 
hasZeroEnd() const5307a7e6055SDimitry Andric bool Formula::hasZeroEnd() const {
5317a7e6055SDimitry Andric   if (UnfoldedOffset || BaseOffset)
5327a7e6055SDimitry Andric     return false;
5337a7e6055SDimitry Andric   if (BaseRegs.size() != 1 || ScaledReg)
5347a7e6055SDimitry Andric     return false;
5357a7e6055SDimitry Andric   return true;
5367a7e6055SDimitry Andric }
5377a7e6055SDimitry Andric 
5387d523365SDimitry Andric /// Return the total number of register operands used by this formula. This does
5397d523365SDimitry Andric /// not include register uses implied by non-constant addrec strides.
getNumRegs() const54091bc56edSDimitry Andric size_t Formula::getNumRegs() const {
541f22ef01cSRoman Divacky   return !!ScaledReg + BaseRegs.size();
542f22ef01cSRoman Divacky }
543f22ef01cSRoman Divacky 
5447d523365SDimitry Andric /// Return the type of this formula, if it has one, or null otherwise. This type
5457d523365SDimitry Andric /// is meaningless except for the bit size.
getType() const5466122f3e6SDimitry Andric Type *Formula::getType() const {
547f22ef01cSRoman Divacky   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
548f22ef01cSRoman Divacky          ScaledReg ? ScaledReg->getType() :
549139f7f9bSDimitry Andric          BaseGV ? BaseGV->getType() :
55091bc56edSDimitry Andric          nullptr;
551f22ef01cSRoman Divacky }
552f22ef01cSRoman Divacky 
5537d523365SDimitry Andric /// Delete the given base reg from the BaseRegs list.
deleteBaseReg(const SCEV * & S)5547d523365SDimitry Andric void Formula::deleteBaseReg(const SCEV *&S) {
555f22ef01cSRoman Divacky   if (&S != &BaseRegs.back())
556f22ef01cSRoman Divacky     std::swap(S, BaseRegs.back());
557f22ef01cSRoman Divacky   BaseRegs.pop_back();
558f22ef01cSRoman Divacky }
559f22ef01cSRoman Divacky 
5607d523365SDimitry Andric /// Test if this formula references the given register.
referencesReg(const SCEV * S) const561f22ef01cSRoman Divacky bool Formula::referencesReg(const SCEV *S) const {
562d88c1a5aSDimitry Andric   return S == ScaledReg || is_contained(BaseRegs, S);
563f22ef01cSRoman Divacky }
564f22ef01cSRoman Divacky 
5657d523365SDimitry Andric /// Test whether this formula uses registers which are used by uses other than
5667d523365SDimitry Andric /// the use with the given index.
hasRegsUsedByUsesOtherThan(size_t LUIdx,const RegUseTracker & RegUses) const567f22ef01cSRoman Divacky bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
568f22ef01cSRoman Divacky                                          const RegUseTracker &RegUses) const {
569f22ef01cSRoman Divacky   if (ScaledReg)
570f22ef01cSRoman Divacky     if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
571f22ef01cSRoman Divacky       return true;
572ff0cc061SDimitry Andric   for (const SCEV *BaseReg : BaseRegs)
573ff0cc061SDimitry Andric     if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
574f22ef01cSRoman Divacky       return true;
575f22ef01cSRoman Divacky   return false;
576f22ef01cSRoman Divacky }
577f22ef01cSRoman Divacky 
5782cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const579f22ef01cSRoman Divacky void Formula::print(raw_ostream &OS) const {
580f22ef01cSRoman Divacky   bool First = true;
581139f7f9bSDimitry Andric   if (BaseGV) {
582f22ef01cSRoman Divacky     if (!First) OS << " + "; else First = false;
58391bc56edSDimitry Andric     BaseGV->printAsOperand(OS, /*PrintType=*/false);
584f22ef01cSRoman Divacky   }
585139f7f9bSDimitry Andric   if (BaseOffset != 0) {
586f22ef01cSRoman Divacky     if (!First) OS << " + "; else First = false;
587139f7f9bSDimitry Andric     OS << BaseOffset;
588f22ef01cSRoman Divacky   }
589ff0cc061SDimitry Andric   for (const SCEV *BaseReg : BaseRegs) {
590f22ef01cSRoman Divacky     if (!First) OS << " + "; else First = false;
591ff0cc061SDimitry Andric     OS << "reg(" << *BaseReg << ')';
592f22ef01cSRoman Divacky   }
593139f7f9bSDimitry Andric   if (HasBaseReg && BaseRegs.empty()) {
594f22ef01cSRoman Divacky     if (!First) OS << " + "; else First = false;
595f22ef01cSRoman Divacky     OS << "**error: HasBaseReg**";
596139f7f9bSDimitry Andric   } else if (!HasBaseReg && !BaseRegs.empty()) {
597f22ef01cSRoman Divacky     if (!First) OS << " + "; else First = false;
598f22ef01cSRoman Divacky     OS << "**error: !HasBaseReg**";
599f22ef01cSRoman Divacky   }
600139f7f9bSDimitry Andric   if (Scale != 0) {
601f22ef01cSRoman Divacky     if (!First) OS << " + "; else First = false;
602139f7f9bSDimitry Andric     OS << Scale << "*reg(";
603f22ef01cSRoman Divacky     if (ScaledReg)
604f22ef01cSRoman Divacky       OS << *ScaledReg;
605f22ef01cSRoman Divacky     else
606f22ef01cSRoman Divacky       OS << "<unknown>";
607f22ef01cSRoman Divacky     OS << ')';
608f22ef01cSRoman Divacky   }
609bd5abe19SDimitry Andric   if (UnfoldedOffset != 0) {
61091bc56edSDimitry Andric     if (!First) OS << " + ";
611bd5abe19SDimitry Andric     OS << "imm(" << UnfoldedOffset << ')';
612bd5abe19SDimitry Andric   }
613f22ef01cSRoman Divacky }
614f22ef01cSRoman Divacky 
dump() const6157a7e6055SDimitry Andric LLVM_DUMP_METHOD void Formula::dump() const {
616f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
617f22ef01cSRoman Divacky }
6187a7e6055SDimitry Andric #endif
619f22ef01cSRoman Divacky 
6207d523365SDimitry Andric /// Return true if the given addrec can be sign-extended without changing its
6217d523365SDimitry Andric /// value.
isAddRecSExtable(const SCEVAddRecExpr * AR,ScalarEvolution & SE)622f22ef01cSRoman Divacky static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
6236122f3e6SDimitry Andric   Type *WideTy =
624f22ef01cSRoman Divacky     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
625f22ef01cSRoman Divacky   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
626f22ef01cSRoman Divacky }
627f22ef01cSRoman Divacky 
6287d523365SDimitry Andric /// Return true if the given add can be sign-extended without changing its
6297d523365SDimitry Andric /// value.
isAddSExtable(const SCEVAddExpr * A,ScalarEvolution & SE)630f22ef01cSRoman Divacky static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
6316122f3e6SDimitry Andric   Type *WideTy =
632f22ef01cSRoman Divacky     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
633f22ef01cSRoman Divacky   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
634f22ef01cSRoman Divacky }
635f22ef01cSRoman Divacky 
6367d523365SDimitry Andric /// Return true if the given mul can be sign-extended without changing its
6377d523365SDimitry Andric /// value.
isMulSExtable(const SCEVMulExpr * M,ScalarEvolution & SE)638ffd1746dSEd Schouten static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
6396122f3e6SDimitry Andric   Type *WideTy =
640ffd1746dSEd Schouten     IntegerType::get(SE.getContext(),
641ffd1746dSEd Schouten                      SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
642ffd1746dSEd Schouten   return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
643f22ef01cSRoman Divacky }
644f22ef01cSRoman Divacky 
6457d523365SDimitry Andric /// Return an expression for LHS /s RHS, if it can be determined and if the
6467d523365SDimitry Andric /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
6477d523365SDimitry Andric /// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
6487d523365SDimitry Andric /// the multiplication may overflow, which is useful when the result will be
6497d523365SDimitry Andric /// used in a context where the most significant bits are ignored.
getExactSDiv(const SCEV * LHS,const SCEV * RHS,ScalarEvolution & SE,bool IgnoreSignificantBits=false)650f22ef01cSRoman Divacky static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
651f22ef01cSRoman Divacky                                 ScalarEvolution &SE,
652f22ef01cSRoman Divacky                                 bool IgnoreSignificantBits = false) {
653f22ef01cSRoman Divacky   // Handle the trivial case, which works for any SCEV type.
654f22ef01cSRoman Divacky   if (LHS == RHS)
655f22ef01cSRoman Divacky     return SE.getConstant(LHS->getType(), 1);
656f22ef01cSRoman Divacky 
657ffd1746dSEd Schouten   // Handle a few RHS special cases.
658ffd1746dSEd Schouten   const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
659ffd1746dSEd Schouten   if (RC) {
6607d523365SDimitry Andric     const APInt &RA = RC->getAPInt();
661ffd1746dSEd Schouten     // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
662ffd1746dSEd Schouten     // some folding.
663ffd1746dSEd Schouten     if (RA.isAllOnesValue())
664ffd1746dSEd Schouten       return SE.getMulExpr(LHS, RC);
665ffd1746dSEd Schouten     // Handle x /s 1 as x.
666ffd1746dSEd Schouten     if (RA == 1)
667ffd1746dSEd Schouten       return LHS;
668ffd1746dSEd Schouten   }
669f22ef01cSRoman Divacky 
670f22ef01cSRoman Divacky   // Check for a division of a constant by a constant.
671f22ef01cSRoman Divacky   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
672f22ef01cSRoman Divacky     if (!RC)
67391bc56edSDimitry Andric       return nullptr;
6747d523365SDimitry Andric     const APInt &LA = C->getAPInt();
6757d523365SDimitry Andric     const APInt &RA = RC->getAPInt();
676ffd1746dSEd Schouten     if (LA.srem(RA) != 0)
67791bc56edSDimitry Andric       return nullptr;
678ffd1746dSEd Schouten     return SE.getConstant(LA.sdiv(RA));
679f22ef01cSRoman Divacky   }
680f22ef01cSRoman Divacky 
681f22ef01cSRoman Divacky   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
682f22ef01cSRoman Divacky   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
683d88c1a5aSDimitry Andric     if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
684f22ef01cSRoman Divacky       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
685f22ef01cSRoman Divacky                                       IgnoreSignificantBits);
68691bc56edSDimitry Andric       if (!Step) return nullptr;
687e580952dSDimitry Andric       const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
688e580952dSDimitry Andric                                        IgnoreSignificantBits);
68991bc56edSDimitry Andric       if (!Start) return nullptr;
6903b0f4066SDimitry Andric       // FlagNW is independent of the start value, step direction, and is
6913b0f4066SDimitry Andric       // preserved with smaller magnitude steps.
6923b0f4066SDimitry Andric       // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
6933b0f4066SDimitry Andric       return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
694f22ef01cSRoman Divacky     }
69591bc56edSDimitry Andric     return nullptr;
696f22ef01cSRoman Divacky   }
697f22ef01cSRoman Divacky 
698f22ef01cSRoman Divacky   // Distribute the sdiv over add operands, if the add doesn't overflow.
699f22ef01cSRoman Divacky   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
700f22ef01cSRoman Divacky     if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
701f22ef01cSRoman Divacky       SmallVector<const SCEV *, 8> Ops;
702ff0cc061SDimitry Andric       for (const SCEV *S : Add->operands()) {
703ff0cc061SDimitry Andric         const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
70491bc56edSDimitry Andric         if (!Op) return nullptr;
705f22ef01cSRoman Divacky         Ops.push_back(Op);
706f22ef01cSRoman Divacky       }
707f22ef01cSRoman Divacky       return SE.getAddExpr(Ops);
708f22ef01cSRoman Divacky     }
70991bc56edSDimitry Andric     return nullptr;
710f22ef01cSRoman Divacky   }
711f22ef01cSRoman Divacky 
712f22ef01cSRoman Divacky   // Check for a multiply operand that we can pull RHS out of.
713ffd1746dSEd Schouten   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
714f22ef01cSRoman Divacky     if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
715f22ef01cSRoman Divacky       SmallVector<const SCEV *, 4> Ops;
716f22ef01cSRoman Divacky       bool Found = false;
717ff0cc061SDimitry Andric       for (const SCEV *S : Mul->operands()) {
718f22ef01cSRoman Divacky         if (!Found)
719f22ef01cSRoman Divacky           if (const SCEV *Q = getExactSDiv(S, RHS, SE,
720f22ef01cSRoman Divacky                                            IgnoreSignificantBits)) {
721f22ef01cSRoman Divacky             S = Q;
722f22ef01cSRoman Divacky             Found = true;
723f22ef01cSRoman Divacky           }
724f22ef01cSRoman Divacky         Ops.push_back(S);
725f22ef01cSRoman Divacky       }
72691bc56edSDimitry Andric       return Found ? SE.getMulExpr(Ops) : nullptr;
727f22ef01cSRoman Divacky     }
72891bc56edSDimitry Andric     return nullptr;
729ffd1746dSEd Schouten   }
730f22ef01cSRoman Divacky 
731f22ef01cSRoman Divacky   // Otherwise we don't know.
73291bc56edSDimitry Andric   return nullptr;
733f22ef01cSRoman Divacky }
734f22ef01cSRoman Divacky 
7357d523365SDimitry Andric /// If S involves the addition of a constant integer value, return that integer
7367d523365SDimitry Andric /// value, and mutate S to point to a new SCEV with that value excluded.
ExtractImmediate(const SCEV * & S,ScalarEvolution & SE)737f22ef01cSRoman Divacky static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
738f22ef01cSRoman Divacky   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
7397d523365SDimitry Andric     if (C->getAPInt().getMinSignedBits() <= 64) {
740f22ef01cSRoman Divacky       S = SE.getConstant(C->getType(), 0);
741f22ef01cSRoman Divacky       return C->getValue()->getSExtValue();
742f22ef01cSRoman Divacky     }
743f22ef01cSRoman Divacky   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
744f22ef01cSRoman Divacky     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
745f22ef01cSRoman Divacky     int64_t Result = ExtractImmediate(NewOps.front(), SE);
746e580952dSDimitry Andric     if (Result != 0)
747f22ef01cSRoman Divacky       S = SE.getAddExpr(NewOps);
748f22ef01cSRoman Divacky     return Result;
749f22ef01cSRoman Divacky   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
750f22ef01cSRoman Divacky     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
751f22ef01cSRoman Divacky     int64_t Result = ExtractImmediate(NewOps.front(), SE);
752e580952dSDimitry Andric     if (Result != 0)
7533b0f4066SDimitry Andric       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
7543b0f4066SDimitry Andric                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
7553b0f4066SDimitry Andric                            SCEV::FlagAnyWrap);
756f22ef01cSRoman Divacky     return Result;
757f22ef01cSRoman Divacky   }
758f22ef01cSRoman Divacky   return 0;
759f22ef01cSRoman Divacky }
760f22ef01cSRoman Divacky 
7617d523365SDimitry Andric /// If S involves the addition of a GlobalValue address, return that symbol, and
7627d523365SDimitry Andric /// mutate S to point to a new SCEV with that value excluded.
ExtractSymbol(const SCEV * & S,ScalarEvolution & SE)763f22ef01cSRoman Divacky static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
764f22ef01cSRoman Divacky   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
765f22ef01cSRoman Divacky     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
766f22ef01cSRoman Divacky       S = SE.getConstant(GV->getType(), 0);
767f22ef01cSRoman Divacky       return GV;
768f22ef01cSRoman Divacky     }
769f22ef01cSRoman Divacky   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
770f22ef01cSRoman Divacky     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
771f22ef01cSRoman Divacky     GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
772e580952dSDimitry Andric     if (Result)
773f22ef01cSRoman Divacky       S = SE.getAddExpr(NewOps);
774f22ef01cSRoman Divacky     return Result;
775f22ef01cSRoman Divacky   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
776f22ef01cSRoman Divacky     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
777f22ef01cSRoman Divacky     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
778e580952dSDimitry Andric     if (Result)
7793b0f4066SDimitry Andric       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
7803b0f4066SDimitry Andric                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
7813b0f4066SDimitry Andric                            SCEV::FlagAnyWrap);
782f22ef01cSRoman Divacky     return Result;
783f22ef01cSRoman Divacky   }
78491bc56edSDimitry Andric   return nullptr;
785f22ef01cSRoman Divacky }
786f22ef01cSRoman Divacky 
7877d523365SDimitry Andric /// Returns true if the specified instruction is using the specified value as an
7887d523365SDimitry Andric /// address.
isAddressUse(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)7892cab237bSDimitry Andric static bool isAddressUse(const TargetTransformInfo &TTI,
7902cab237bSDimitry Andric                          Instruction *Inst, Value *OperandVal) {
791f22ef01cSRoman Divacky   bool isAddress = isa<LoadInst>(Inst);
792f22ef01cSRoman Divacky   if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
7937a7e6055SDimitry Andric     if (SI->getPointerOperand() == OperandVal)
794f22ef01cSRoman Divacky       isAddress = true;
795f22ef01cSRoman Divacky   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
796f22ef01cSRoman Divacky     // Addressing modes can also be folded into prefetches and a variety
797f22ef01cSRoman Divacky     // of intrinsics.
798f22ef01cSRoman Divacky     switch (II->getIntrinsicID()) {
7992cab237bSDimitry Andric     case Intrinsic::memset:
800f22ef01cSRoman Divacky     case Intrinsic::prefetch:
801ffd1746dSEd Schouten       if (II->getArgOperand(0) == OperandVal)
802f22ef01cSRoman Divacky         isAddress = true;
803f22ef01cSRoman Divacky       break;
8042cab237bSDimitry Andric     case Intrinsic::memmove:
8052cab237bSDimitry Andric     case Intrinsic::memcpy:
8062cab237bSDimitry Andric       if (II->getArgOperand(0) == OperandVal ||
8072cab237bSDimitry Andric           II->getArgOperand(1) == OperandVal)
8082cab237bSDimitry Andric         isAddress = true;
8092cab237bSDimitry Andric       break;
8102cab237bSDimitry Andric     default: {
8112cab237bSDimitry Andric       MemIntrinsicInfo IntrInfo;
8122cab237bSDimitry Andric       if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
8132cab237bSDimitry Andric         if (IntrInfo.PtrVal == OperandVal)
8142cab237bSDimitry Andric           isAddress = true;
8152cab237bSDimitry Andric       }
8162cab237bSDimitry Andric     }
817f22ef01cSRoman Divacky     }
8187a7e6055SDimitry Andric   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
8197a7e6055SDimitry Andric     if (RMW->getPointerOperand() == OperandVal)
8207a7e6055SDimitry Andric       isAddress = true;
8217a7e6055SDimitry Andric   } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
8227a7e6055SDimitry Andric     if (CmpX->getPointerOperand() == OperandVal)
8237a7e6055SDimitry Andric       isAddress = true;
824f22ef01cSRoman Divacky   }
825f22ef01cSRoman Divacky   return isAddress;
826f22ef01cSRoman Divacky }
827f22ef01cSRoman Divacky 
8287d523365SDimitry Andric /// Return the type of the memory being accessed.
getAccessType(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)8292cab237bSDimitry Andric static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
8304ba319b5SDimitry Andric                                  Instruction *Inst, Value *OperandVal) {
8317d523365SDimitry Andric   MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
8327d523365SDimitry Andric   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
8337d523365SDimitry Andric     AccessTy.MemTy = SI->getOperand(0)->getType();
8347d523365SDimitry Andric     AccessTy.AddrSpace = SI->getPointerAddressSpace();
8357d523365SDimitry Andric   } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
8367d523365SDimitry Andric     AccessTy.AddrSpace = LI->getPointerAddressSpace();
8377a7e6055SDimitry Andric   } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
8387a7e6055SDimitry Andric     AccessTy.AddrSpace = RMW->getPointerAddressSpace();
8397a7e6055SDimitry Andric   } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
8407a7e6055SDimitry Andric     AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
8412cab237bSDimitry Andric   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
8422cab237bSDimitry Andric     switch (II->getIntrinsicID()) {
8432cab237bSDimitry Andric     case Intrinsic::prefetch:
8444ba319b5SDimitry Andric     case Intrinsic::memset:
8452cab237bSDimitry Andric       AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
8464ba319b5SDimitry Andric       AccessTy.MemTy = OperandVal->getType();
8474ba319b5SDimitry Andric       break;
8484ba319b5SDimitry Andric     case Intrinsic::memmove:
8494ba319b5SDimitry Andric     case Intrinsic::memcpy:
8504ba319b5SDimitry Andric       AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
8514ba319b5SDimitry Andric       AccessTy.MemTy = OperandVal->getType();
8522cab237bSDimitry Andric       break;
8532cab237bSDimitry Andric     default: {
8542cab237bSDimitry Andric       MemIntrinsicInfo IntrInfo;
8552cab237bSDimitry Andric       if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
8562cab237bSDimitry Andric         AccessTy.AddrSpace
8572cab237bSDimitry Andric           = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
8582cab237bSDimitry Andric       }
8592cab237bSDimitry Andric 
8602cab237bSDimitry Andric       break;
8612cab237bSDimitry Andric     }
8622cab237bSDimitry Andric     }
863f22ef01cSRoman Divacky   }
864f22ef01cSRoman Divacky 
865f22ef01cSRoman Divacky   // All pointers have the same requirements, so canonicalize them to an
866f22ef01cSRoman Divacky   // arbitrary pointer type to minimize variation.
8677d523365SDimitry Andric   if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
8687d523365SDimitry Andric     AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
869f22ef01cSRoman Divacky                                       PTy->getAddressSpace());
870f22ef01cSRoman Divacky 
871f22ef01cSRoman Divacky   return AccessTy;
872f22ef01cSRoman Divacky }
873f22ef01cSRoman Divacky 
8747d523365SDimitry Andric /// Return true if this AddRec is already a phi in its loop.
isExistingPhi(const SCEVAddRecExpr * AR,ScalarEvolution & SE)875dff0c46cSDimitry Andric static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
87630785c0eSDimitry Andric   for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
87730785c0eSDimitry Andric     if (SE.isSCEVable(PN.getType()) &&
87830785c0eSDimitry Andric         (SE.getEffectiveSCEVType(PN.getType()) ==
879dff0c46cSDimitry Andric          SE.getEffectiveSCEVType(AR->getType())) &&
88030785c0eSDimitry Andric         SE.getSCEV(&PN) == AR)
881dff0c46cSDimitry Andric       return true;
882dff0c46cSDimitry Andric   }
883dff0c46cSDimitry Andric   return false;
884dff0c46cSDimitry Andric }
885dff0c46cSDimitry Andric 
886dff0c46cSDimitry Andric /// Check if expanding this expression is likely to incur significant cost. This
887dff0c46cSDimitry Andric /// is tricky because SCEV doesn't track which expressions are actually computed
888dff0c46cSDimitry Andric /// by the current IR.
889dff0c46cSDimitry Andric ///
890dff0c46cSDimitry Andric /// We currently allow expansion of IV increments that involve adds,
891dff0c46cSDimitry Andric /// multiplication by constants, and AddRecs from existing phis.
892dff0c46cSDimitry Andric ///
893dff0c46cSDimitry Andric /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
894dff0c46cSDimitry Andric /// obvious multiple of the UDivExpr.
isHighCostExpansion(const SCEV * S,SmallPtrSetImpl<const SCEV * > & Processed,ScalarEvolution & SE)895dff0c46cSDimitry Andric static bool isHighCostExpansion(const SCEV *S,
89639d628a0SDimitry Andric                                 SmallPtrSetImpl<const SCEV*> &Processed,
897dff0c46cSDimitry Andric                                 ScalarEvolution &SE) {
898dff0c46cSDimitry Andric   // Zero/One operand expressions
899dff0c46cSDimitry Andric   switch (S->getSCEVType()) {
900dff0c46cSDimitry Andric   case scUnknown:
901dff0c46cSDimitry Andric   case scConstant:
902dff0c46cSDimitry Andric     return false;
903dff0c46cSDimitry Andric   case scTruncate:
904dff0c46cSDimitry Andric     return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
905dff0c46cSDimitry Andric                                Processed, SE);
906dff0c46cSDimitry Andric   case scZeroExtend:
907dff0c46cSDimitry Andric     return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
908dff0c46cSDimitry Andric                                Processed, SE);
909dff0c46cSDimitry Andric   case scSignExtend:
910dff0c46cSDimitry Andric     return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
911dff0c46cSDimitry Andric                                Processed, SE);
912dff0c46cSDimitry Andric   }
913dff0c46cSDimitry Andric 
91439d628a0SDimitry Andric   if (!Processed.insert(S).second)
915dff0c46cSDimitry Andric     return false;
916dff0c46cSDimitry Andric 
917dff0c46cSDimitry Andric   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
918ff0cc061SDimitry Andric     for (const SCEV *S : Add->operands()) {
919ff0cc061SDimitry Andric       if (isHighCostExpansion(S, Processed, SE))
920dff0c46cSDimitry Andric         return true;
921dff0c46cSDimitry Andric     }
922dff0c46cSDimitry Andric     return false;
923dff0c46cSDimitry Andric   }
924dff0c46cSDimitry Andric 
925dff0c46cSDimitry Andric   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
926dff0c46cSDimitry Andric     if (Mul->getNumOperands() == 2) {
927dff0c46cSDimitry Andric       // Multiplication by a constant is ok
928dff0c46cSDimitry Andric       if (isa<SCEVConstant>(Mul->getOperand(0)))
929dff0c46cSDimitry Andric         return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
930dff0c46cSDimitry Andric 
931dff0c46cSDimitry Andric       // If we have the value of one operand, check if an existing
932dff0c46cSDimitry Andric       // multiplication already generates this expression.
933dff0c46cSDimitry Andric       if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
934dff0c46cSDimitry Andric         Value *UVal = U->getValue();
93591bc56edSDimitry Andric         for (User *UR : UVal->users()) {
936dff0c46cSDimitry Andric           // If U is a constant, it may be used by a ConstantExpr.
93791bc56edSDimitry Andric           Instruction *UI = dyn_cast<Instruction>(UR);
93891bc56edSDimitry Andric           if (UI && UI->getOpcode() == Instruction::Mul &&
93991bc56edSDimitry Andric               SE.isSCEVable(UI->getType())) {
94091bc56edSDimitry Andric             return SE.getSCEV(UI) == Mul;
941dff0c46cSDimitry Andric           }
942dff0c46cSDimitry Andric         }
943dff0c46cSDimitry Andric       }
944dff0c46cSDimitry Andric     }
945dff0c46cSDimitry Andric   }
946dff0c46cSDimitry Andric 
947dff0c46cSDimitry Andric   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
948dff0c46cSDimitry Andric     if (isExistingPhi(AR, SE))
949dff0c46cSDimitry Andric       return false;
950dff0c46cSDimitry Andric   }
951dff0c46cSDimitry Andric 
952dff0c46cSDimitry Andric   // Fow now, consider any other type of expression (div/mul/min/max) high cost.
953dff0c46cSDimitry Andric   return true;
954dff0c46cSDimitry Andric }
955dff0c46cSDimitry Andric 
9564ba319b5SDimitry Andric /// If any of the instructions in the specified set are trivially dead, delete
9577d523365SDimitry Andric /// them and see if this makes any of their operands subsequently dead.
958f22ef01cSRoman Divacky static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> & DeadInsts)959f37b6182SDimitry Andric DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
960f22ef01cSRoman Divacky   bool Changed = false;
961f22ef01cSRoman Divacky 
962f22ef01cSRoman Divacky   while (!DeadInsts.empty()) {
9633861d79fSDimitry Andric     Value *V = DeadInsts.pop_back_val();
9643861d79fSDimitry Andric     Instruction *I = dyn_cast_or_null<Instruction>(V);
965f22ef01cSRoman Divacky 
96691bc56edSDimitry Andric     if (!I || !isInstructionTriviallyDead(I))
967f22ef01cSRoman Divacky       continue;
968f22ef01cSRoman Divacky 
969ff0cc061SDimitry Andric     for (Use &O : I->operands())
970ff0cc061SDimitry Andric       if (Instruction *U = dyn_cast<Instruction>(O)) {
971ff0cc061SDimitry Andric         O = nullptr;
972f22ef01cSRoman Divacky         if (U->use_empty())
97397bc6c73SDimitry Andric           DeadInsts.emplace_back(U);
974f22ef01cSRoman Divacky       }
975f22ef01cSRoman Divacky 
976f22ef01cSRoman Divacky     I->eraseFromParent();
977f22ef01cSRoman Divacky     Changed = true;
978f22ef01cSRoman Divacky   }
979f22ef01cSRoman Divacky 
980f22ef01cSRoman Divacky   return Changed;
981f22ef01cSRoman Divacky }
982f22ef01cSRoman Divacky 
983f22ef01cSRoman Divacky namespace {
984d88c1a5aSDimitry Andric 
985f785676fSDimitry Andric class LSRUse;
986d88c1a5aSDimitry Andric 
987d88c1a5aSDimitry Andric } // end anonymous namespace
98891bc56edSDimitry Andric 
9894ba319b5SDimitry Andric /// Check if the addressing mode defined by \p F is completely
99091bc56edSDimitry Andric /// folded in \p LU at isel time.
99191bc56edSDimitry Andric /// This includes address-mode folding and special icmp tricks.
99291bc56edSDimitry Andric /// This function returns true if \p LU can accommodate what \p F
99391bc56edSDimitry Andric /// defines and up to 1 base + 1 scaled + offset.
99491bc56edSDimitry Andric /// In other words, if \p F has several base registers, this function may
99591bc56edSDimitry Andric /// still return true. Therefore, users still need to account for
99691bc56edSDimitry Andric /// additional base registers and/or unfolded offsets to derive an
99791bc56edSDimitry Andric /// accurate cost model.
99891bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
99991bc56edSDimitry Andric                                  const LSRUse &LU, const Formula &F);
10002cab237bSDimitry Andric 
1001f785676fSDimitry Andric // Get the cost of the scaling factor used in F for LU.
1002f785676fSDimitry Andric static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
10037a7e6055SDimitry Andric                                      const LSRUse &LU, const Formula &F,
10047a7e6055SDimitry Andric                                      const Loop &L);
1005f785676fSDimitry Andric 
1006f785676fSDimitry Andric namespace {
1007f22ef01cSRoman Divacky 
10087d523365SDimitry Andric /// This class is used to measure and compare candidate formulae.
1009f22ef01cSRoman Divacky class Cost {
1010db17bf38SDimitry Andric   TargetTransformInfo::LSRCost C;
1011f22ef01cSRoman Divacky 
1012f22ef01cSRoman Divacky public:
Cost()1013db17bf38SDimitry Andric   Cost() {
1014db17bf38SDimitry Andric     C.Insns = 0;
1015db17bf38SDimitry Andric     C.NumRegs = 0;
1016db17bf38SDimitry Andric     C.AddRecCost = 0;
1017db17bf38SDimitry Andric     C.NumIVMuls = 0;
1018db17bf38SDimitry Andric     C.NumBaseAdds = 0;
1019db17bf38SDimitry Andric     C.ImmCost = 0;
1020db17bf38SDimitry Andric     C.SetupCost = 0;
1021db17bf38SDimitry Andric     C.ScaleCost = 0;
1022db17bf38SDimitry Andric   }
1023f22ef01cSRoman Divacky 
1024db17bf38SDimitry Andric   bool isLess(Cost &Other, const TargetTransformInfo &TTI);
1025f22ef01cSRoman Divacky 
102691bc56edSDimitry Andric   void Lose();
1027f22ef01cSRoman Divacky 
10286122f3e6SDimitry Andric #ifndef NDEBUG
10296122f3e6SDimitry Andric   // Once any of the metrics loses, they must all remain losers.
isValid()10306122f3e6SDimitry Andric   bool isValid() {
1031db17bf38SDimitry Andric     return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
1032db17bf38SDimitry Andric              | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
1033db17bf38SDimitry Andric       || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
1034db17bf38SDimitry Andric            & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
10356122f3e6SDimitry Andric   }
10366122f3e6SDimitry Andric #endif
10376122f3e6SDimitry Andric 
isLoser()10386122f3e6SDimitry Andric   bool isLoser() {
10396122f3e6SDimitry Andric     assert(isValid() && "invalid cost");
1040db17bf38SDimitry Andric     return C.NumRegs == ~0u;
10416122f3e6SDimitry Andric   }
10426122f3e6SDimitry Andric 
1043f785676fSDimitry Andric   void RateFormula(const TargetTransformInfo &TTI,
1044f785676fSDimitry Andric                    const Formula &F,
104539d628a0SDimitry Andric                    SmallPtrSetImpl<const SCEV *> &Regs,
1046f22ef01cSRoman Divacky                    const DenseSet<const SCEV *> &VisitedRegs,
1047f22ef01cSRoman Divacky                    const Loop *L,
1048dff0c46cSDimitry Andric                    ScalarEvolution &SE, DominatorTree &DT,
1049f785676fSDimitry Andric                    const LSRUse &LU,
105039d628a0SDimitry Andric                    SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
1051f22ef01cSRoman Divacky 
1052f22ef01cSRoman Divacky   void print(raw_ostream &OS) const;
1053f22ef01cSRoman Divacky   void dump() const;
1054f22ef01cSRoman Divacky 
1055f22ef01cSRoman Divacky private:
1056f22ef01cSRoman Divacky   void RateRegister(const SCEV *Reg,
105739d628a0SDimitry Andric                     SmallPtrSetImpl<const SCEV *> &Regs,
1058f22ef01cSRoman Divacky                     const Loop *L,
10594ba319b5SDimitry Andric                     ScalarEvolution &SE, DominatorTree &DT,
10604ba319b5SDimitry Andric                     const TargetTransformInfo &TTI);
1061f22ef01cSRoman Divacky   void RatePrimaryRegister(const SCEV *Reg,
106239d628a0SDimitry Andric                            SmallPtrSetImpl<const SCEV *> &Regs,
1063f22ef01cSRoman Divacky                            const Loop *L,
1064dff0c46cSDimitry Andric                            ScalarEvolution &SE, DominatorTree &DT,
10654ba319b5SDimitry Andric                            SmallPtrSetImpl<const SCEV *> *LoserRegs,
10664ba319b5SDimitry Andric                            const TargetTransformInfo &TTI);
1067f22ef01cSRoman Divacky };
1068f22ef01cSRoman Divacky 
1069d88c1a5aSDimitry Andric /// An operand value in an instruction which is to be replaced with some
1070d88c1a5aSDimitry Andric /// equivalent, possibly strength-reduced, replacement.
1071d88c1a5aSDimitry Andric struct LSRFixup {
1072d88c1a5aSDimitry Andric   /// The instruction which will be updated.
10732cab237bSDimitry Andric   Instruction *UserInst = nullptr;
1074d88c1a5aSDimitry Andric 
1075d88c1a5aSDimitry Andric   /// The operand of the instruction which will be replaced. The operand may be
1076d88c1a5aSDimitry Andric   /// used more than once; every instance will be replaced.
10772cab237bSDimitry Andric   Value *OperandValToReplace = nullptr;
1078d88c1a5aSDimitry Andric 
1079d88c1a5aSDimitry Andric   /// If this user is to use the post-incremented value of an induction
10802cab237bSDimitry Andric   /// variable, this set is non-empty and holds the loops associated with the
1081d88c1a5aSDimitry Andric   /// induction variable.
1082d88c1a5aSDimitry Andric   PostIncLoopSet PostIncLoops;
1083d88c1a5aSDimitry Andric 
1084d88c1a5aSDimitry Andric   /// A constant offset to be added to the LSRUse expression.  This allows
1085d88c1a5aSDimitry Andric   /// multiple fixups to share the same LSRUse with different offsets, for
1086d88c1a5aSDimitry Andric   /// example in an unrolled loop.
10872cab237bSDimitry Andric   int64_t Offset = 0;
10882cab237bSDimitry Andric 
10892cab237bSDimitry Andric   LSRFixup() = default;
1090d88c1a5aSDimitry Andric 
1091d88c1a5aSDimitry Andric   bool isUseFullyOutsideLoop(const Loop *L) const;
1092d88c1a5aSDimitry Andric 
1093d88c1a5aSDimitry Andric   void print(raw_ostream &OS) const;
1094d88c1a5aSDimitry Andric   void dump() const;
1095d88c1a5aSDimitry Andric };
1096d88c1a5aSDimitry Andric 
1097d88c1a5aSDimitry Andric /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
1098d88c1a5aSDimitry Andric /// SmallVectors of const SCEV*.
1099d88c1a5aSDimitry Andric struct UniquifierDenseMapInfo {
getEmptyKey__anon244971d90711::UniquifierDenseMapInfo1100d88c1a5aSDimitry Andric   static SmallVector<const SCEV *, 4> getEmptyKey() {
1101d88c1a5aSDimitry Andric     SmallVector<const SCEV *, 4>  V;
1102d88c1a5aSDimitry Andric     V.push_back(reinterpret_cast<const SCEV *>(-1));
1103d88c1a5aSDimitry Andric     return V;
11043dac3a9bSDimitry Andric   }
1105f22ef01cSRoman Divacky 
getTombstoneKey__anon244971d90711::UniquifierDenseMapInfo1106d88c1a5aSDimitry Andric   static SmallVector<const SCEV *, 4> getTombstoneKey() {
1107d88c1a5aSDimitry Andric     SmallVector<const SCEV *, 4> V;
1108d88c1a5aSDimitry Andric     V.push_back(reinterpret_cast<const SCEV *>(-2));
1109d88c1a5aSDimitry Andric     return V;
1110d88c1a5aSDimitry Andric   }
1111d88c1a5aSDimitry Andric 
getHashValue__anon244971d90711::UniquifierDenseMapInfo1112d88c1a5aSDimitry Andric   static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
1113d88c1a5aSDimitry Andric     return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1114d88c1a5aSDimitry Andric   }
1115d88c1a5aSDimitry Andric 
isEqual__anon244971d90711::UniquifierDenseMapInfo1116d88c1a5aSDimitry Andric   static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1117d88c1a5aSDimitry Andric                       const SmallVector<const SCEV *, 4> &RHS) {
1118d88c1a5aSDimitry Andric     return LHS == RHS;
1119d88c1a5aSDimitry Andric   }
1120d88c1a5aSDimitry Andric };
1121d88c1a5aSDimitry Andric 
1122d88c1a5aSDimitry Andric /// This class holds the state that LSR keeps for each use in IVUsers, as well
1123d88c1a5aSDimitry Andric /// as uses invented by LSR itself. It includes information about what kinds of
1124d88c1a5aSDimitry Andric /// things can be folded into the user, information about the user itself, and
1125d88c1a5aSDimitry Andric /// information about how the use may be satisfied.  TODO: Represent multiple
1126d88c1a5aSDimitry Andric /// users of the same expression in common?
1127d88c1a5aSDimitry Andric class LSRUse {
1128d88c1a5aSDimitry Andric   DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1129d88c1a5aSDimitry Andric 
1130d88c1a5aSDimitry Andric public:
1131d88c1a5aSDimitry Andric   /// An enum for a kind of use, indicating what types of scaled and immediate
1132d88c1a5aSDimitry Andric   /// operands it might support.
1133d88c1a5aSDimitry Andric   enum KindType {
1134d88c1a5aSDimitry Andric     Basic,   ///< A normal use, with no folding.
1135d88c1a5aSDimitry Andric     Special, ///< A special case of basic, allowing -1 scales.
1136d88c1a5aSDimitry Andric     Address, ///< An address use; folding according to TargetLowering
1137d88c1a5aSDimitry Andric     ICmpZero ///< An equality icmp with both operands folded into one.
1138d88c1a5aSDimitry Andric     // TODO: Add a generic icmp too?
1139d88c1a5aSDimitry Andric   };
1140d88c1a5aSDimitry Andric 
11412cab237bSDimitry Andric   using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1142d88c1a5aSDimitry Andric 
1143d88c1a5aSDimitry Andric   KindType Kind;
1144d88c1a5aSDimitry Andric   MemAccessTy AccessTy;
1145d88c1a5aSDimitry Andric 
1146d88c1a5aSDimitry Andric   /// The list of operands which are to be replaced.
1147d88c1a5aSDimitry Andric   SmallVector<LSRFixup, 8> Fixups;
1148d88c1a5aSDimitry Andric 
1149d88c1a5aSDimitry Andric   /// Keep track of the min and max offsets of the fixups.
11502cab237bSDimitry Andric   int64_t MinOffset = std::numeric_limits<int64_t>::max();
11512cab237bSDimitry Andric   int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1152d88c1a5aSDimitry Andric 
1153d88c1a5aSDimitry Andric   /// This records whether all of the fixups using this LSRUse are outside of
1154d88c1a5aSDimitry Andric   /// the loop, in which case some special-case heuristics may be used.
11552cab237bSDimitry Andric   bool AllFixupsOutsideLoop = true;
1156d88c1a5aSDimitry Andric 
1157d88c1a5aSDimitry Andric   /// RigidFormula is set to true to guarantee that this use will be associated
1158d88c1a5aSDimitry Andric   /// with a single formula--the one that initially matched. Some SCEV
1159d88c1a5aSDimitry Andric   /// expressions cannot be expanded. This allows LSR to consider the registers
1160d88c1a5aSDimitry Andric   /// used by those expressions without the need to expand them later after
1161d88c1a5aSDimitry Andric   /// changing the formula.
11622cab237bSDimitry Andric   bool RigidFormula = false;
1163d88c1a5aSDimitry Andric 
1164d88c1a5aSDimitry Andric   /// This records the widest use type for any fixup using this
1165d88c1a5aSDimitry Andric   /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1166d88c1a5aSDimitry Andric   /// fixup widths to be equivalent, because the narrower one may be relying on
1167d88c1a5aSDimitry Andric   /// the implicit truncation to truncate away bogus bits.
11682cab237bSDimitry Andric   Type *WidestFixupType = nullptr;
1169d88c1a5aSDimitry Andric 
1170d88c1a5aSDimitry Andric   /// A list of ways to build a value that can satisfy this user.  After the
1171d88c1a5aSDimitry Andric   /// list is populated, one of these is selected heuristically and used to
1172d88c1a5aSDimitry Andric   /// formulate a replacement for OperandValToReplace in UserInst.
1173d88c1a5aSDimitry Andric   SmallVector<Formula, 12> Formulae;
1174d88c1a5aSDimitry Andric 
1175d88c1a5aSDimitry Andric   /// The set of register candidates used by all formulae in this LSRUse.
1176d88c1a5aSDimitry Andric   SmallPtrSet<const SCEV *, 4> Regs;
1177d88c1a5aSDimitry Andric 
LSRUse(KindType K,MemAccessTy AT)11782cab237bSDimitry Andric   LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
1179d88c1a5aSDimitry Andric 
getNewFixup()1180d88c1a5aSDimitry Andric   LSRFixup &getNewFixup() {
1181d88c1a5aSDimitry Andric     Fixups.push_back(LSRFixup());
1182d88c1a5aSDimitry Andric     return Fixups.back();
1183d88c1a5aSDimitry Andric   }
1184d88c1a5aSDimitry Andric 
pushFixup(LSRFixup & f)1185d88c1a5aSDimitry Andric   void pushFixup(LSRFixup &f) {
1186d88c1a5aSDimitry Andric     Fixups.push_back(f);
1187d88c1a5aSDimitry Andric     if (f.Offset > MaxOffset)
1188d88c1a5aSDimitry Andric       MaxOffset = f.Offset;
1189d88c1a5aSDimitry Andric     if (f.Offset < MinOffset)
1190d88c1a5aSDimitry Andric       MinOffset = f.Offset;
1191d88c1a5aSDimitry Andric   }
1192d88c1a5aSDimitry Andric 
1193d88c1a5aSDimitry Andric   bool HasFormulaWithSameRegs(const Formula &F) const;
11947a7e6055SDimitry Andric   float getNotSelectedProbability(const SCEV *Reg) const;
11957a7e6055SDimitry Andric   bool InsertFormula(const Formula &F, const Loop &L);
1196d88c1a5aSDimitry Andric   void DeleteFormula(Formula &F);
1197d88c1a5aSDimitry Andric   void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1198d88c1a5aSDimitry Andric 
1199d88c1a5aSDimitry Andric   void print(raw_ostream &OS) const;
1200d88c1a5aSDimitry Andric   void dump() const;
1201d88c1a5aSDimitry Andric };
1202d88c1a5aSDimitry Andric 
1203d88c1a5aSDimitry Andric } // end anonymous namespace
1204d88c1a5aSDimitry Andric 
12052cab237bSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
12062cab237bSDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
12072cab237bSDimitry Andric                                  GlobalValue *BaseGV, int64_t BaseOffset,
12082cab237bSDimitry Andric                                  bool HasBaseReg, int64_t Scale,
12092cab237bSDimitry Andric                                  Instruction *Fixup = nullptr);
12102cab237bSDimitry Andric 
12117d523365SDimitry Andric /// Tally up interesting quantities from the given register.
RateRegister(const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,const TargetTransformInfo & TTI)1212f22ef01cSRoman Divacky void Cost::RateRegister(const SCEV *Reg,
121339d628a0SDimitry Andric                         SmallPtrSetImpl<const SCEV *> &Regs,
1214f22ef01cSRoman Divacky                         const Loop *L,
12154ba319b5SDimitry Andric                         ScalarEvolution &SE, DominatorTree &DT,
12164ba319b5SDimitry Andric                         const TargetTransformInfo &TTI) {
1217f22ef01cSRoman Divacky   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
12187a7e6055SDimitry Andric     // If this is an addrec for another loop, it should be an invariant
12197a7e6055SDimitry Andric     // with respect to L since L is the innermost loop (at least
12207a7e6055SDimitry Andric     // for now LSR only handles innermost loops).
1221dff0c46cSDimitry Andric     if (AR->getLoop() != L) {
1222dff0c46cSDimitry Andric       // If the AddRec exists, consider it's register free and leave it alone.
1223dff0c46cSDimitry Andric       if (isExistingPhi(AR, SE))
1224f22ef01cSRoman Divacky         return;
1225dff0c46cSDimitry Andric 
12267a7e6055SDimitry Andric       // It is bad to allow LSR for current loop to add induction variables
12277a7e6055SDimitry Andric       // for its sibling loops.
12287a7e6055SDimitry Andric       if (!AR->getLoop()->contains(L)) {
122991bc56edSDimitry Andric         Lose();
12306122f3e6SDimitry Andric         return;
12316122f3e6SDimitry Andric       }
12327a7e6055SDimitry Andric 
12337a7e6055SDimitry Andric       // Otherwise, it will be an invariant with respect to Loop L.
1234db17bf38SDimitry Andric       ++C.NumRegs;
12357a7e6055SDimitry Andric       return;
12367a7e6055SDimitry Andric     }
12374ba319b5SDimitry Andric 
12384ba319b5SDimitry Andric     unsigned LoopCost = 1;
12394ba319b5SDimitry Andric     if (TTI.shouldFavorPostInc()) {
12404ba319b5SDimitry Andric       const SCEV *LoopStep = AR->getStepRecurrence(SE);
12414ba319b5SDimitry Andric       if (isa<SCEVConstant>(LoopStep)) {
12424ba319b5SDimitry Andric         // Check if a post-indexed load/store can be used.
12434ba319b5SDimitry Andric         if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
12444ba319b5SDimitry Andric             TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
12454ba319b5SDimitry Andric           const SCEV *LoopStart = AR->getStart();
12464ba319b5SDimitry Andric           if (!isa<SCEVConstant>(LoopStart) &&
12474ba319b5SDimitry Andric             SE.isLoopInvariant(LoopStart, L))
12484ba319b5SDimitry Andric               LoopCost = 0;
12494ba319b5SDimitry Andric         }
12504ba319b5SDimitry Andric       }
12514ba319b5SDimitry Andric     }
12524ba319b5SDimitry Andric     C.AddRecCost += LoopCost;
1253f22ef01cSRoman Divacky 
1254f22ef01cSRoman Divacky     // Add the step value register, if it needs one.
1255f22ef01cSRoman Divacky     // TODO: The non-affine case isn't precisely modeled here.
12566122f3e6SDimitry Andric     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
12576122f3e6SDimitry Andric       if (!Regs.count(AR->getOperand(1))) {
12584ba319b5SDimitry Andric         RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
12596122f3e6SDimitry Andric         if (isLoser())
12606122f3e6SDimitry Andric           return;
12616122f3e6SDimitry Andric       }
12626122f3e6SDimitry Andric     }
1263f22ef01cSRoman Divacky   }
1264db17bf38SDimitry Andric   ++C.NumRegs;
1265f22ef01cSRoman Divacky 
1266f22ef01cSRoman Divacky   // Rough heuristic; favor registers which don't require extra setup
1267f22ef01cSRoman Divacky   // instructions in the preheader.
1268f22ef01cSRoman Divacky   if (!isa<SCEVUnknown>(Reg) &&
1269f22ef01cSRoman Divacky       !isa<SCEVConstant>(Reg) &&
1270f22ef01cSRoman Divacky       !(isa<SCEVAddRecExpr>(Reg) &&
1271f22ef01cSRoman Divacky         (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
1272f22ef01cSRoman Divacky          isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
1273db17bf38SDimitry Andric     ++C.SetupCost;
12742754fe60SDimitry Andric 
1275db17bf38SDimitry Andric   C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
12762754fe60SDimitry Andric                SE.hasComputableLoopEvolution(Reg, L);
1277f22ef01cSRoman Divacky }
1278f22ef01cSRoman Divacky 
12797d523365SDimitry Andric /// Record this register in the set. If we haven't seen it before, rate
12807d523365SDimitry Andric /// it. Optional LoserRegs provides a way to declare any formula that refers to
12817d523365SDimitry Andric /// one of those regs an instant loser.
RatePrimaryRegister(const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,SmallPtrSetImpl<const SCEV * > * LoserRegs,const TargetTransformInfo & TTI)1282f22ef01cSRoman Divacky void Cost::RatePrimaryRegister(const SCEV *Reg,
128339d628a0SDimitry Andric                                SmallPtrSetImpl<const SCEV *> &Regs,
1284f22ef01cSRoman Divacky                                const Loop *L,
1285dff0c46cSDimitry Andric                                ScalarEvolution &SE, DominatorTree &DT,
12864ba319b5SDimitry Andric                                SmallPtrSetImpl<const SCEV *> *LoserRegs,
12874ba319b5SDimitry Andric                                const TargetTransformInfo &TTI) {
1288dff0c46cSDimitry Andric   if (LoserRegs && LoserRegs->count(Reg)) {
128991bc56edSDimitry Andric     Lose();
1290dff0c46cSDimitry Andric     return;
1291dff0c46cSDimitry Andric   }
129239d628a0SDimitry Andric   if (Regs.insert(Reg).second) {
12934ba319b5SDimitry Andric     RateRegister(Reg, Regs, L, SE, DT, TTI);
1294139f7f9bSDimitry Andric     if (LoserRegs && isLoser())
1295dff0c46cSDimitry Andric       LoserRegs->insert(Reg);
1296dff0c46cSDimitry Andric   }
1297f22ef01cSRoman Divacky }
1298f22ef01cSRoman Divacky 
RateFormula(const TargetTransformInfo & TTI,const Formula & F,SmallPtrSetImpl<const SCEV * > & Regs,const DenseSet<const SCEV * > & VisitedRegs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,const LSRUse & LU,SmallPtrSetImpl<const SCEV * > * LoserRegs)1299f785676fSDimitry Andric void Cost::RateFormula(const TargetTransformInfo &TTI,
1300f785676fSDimitry Andric                        const Formula &F,
130139d628a0SDimitry Andric                        SmallPtrSetImpl<const SCEV *> &Regs,
1302f22ef01cSRoman Divacky                        const DenseSet<const SCEV *> &VisitedRegs,
1303f22ef01cSRoman Divacky                        const Loop *L,
1304dff0c46cSDimitry Andric                        ScalarEvolution &SE, DominatorTree &DT,
1305f785676fSDimitry Andric                        const LSRUse &LU,
130639d628a0SDimitry Andric                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
13077a7e6055SDimitry Andric   assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
1308f22ef01cSRoman Divacky   // Tally up the registers.
1309db17bf38SDimitry Andric   unsigned PrevAddRecCost = C.AddRecCost;
1310db17bf38SDimitry Andric   unsigned PrevNumRegs = C.NumRegs;
1311db17bf38SDimitry Andric   unsigned PrevNumBaseAdds = C.NumBaseAdds;
1312f22ef01cSRoman Divacky   if (const SCEV *ScaledReg = F.ScaledReg) {
1313f22ef01cSRoman Divacky     if (VisitedRegs.count(ScaledReg)) {
131491bc56edSDimitry Andric       Lose();
1315f22ef01cSRoman Divacky       return;
1316f22ef01cSRoman Divacky     }
13174ba319b5SDimitry Andric     RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
13186122f3e6SDimitry Andric     if (isLoser())
13196122f3e6SDimitry Andric       return;
1320f22ef01cSRoman Divacky   }
1321ff0cc061SDimitry Andric   for (const SCEV *BaseReg : F.BaseRegs) {
1322f22ef01cSRoman Divacky     if (VisitedRegs.count(BaseReg)) {
132391bc56edSDimitry Andric       Lose();
1324f22ef01cSRoman Divacky       return;
1325f22ef01cSRoman Divacky     }
13264ba319b5SDimitry Andric     RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
13276122f3e6SDimitry Andric     if (isLoser())
13286122f3e6SDimitry Andric       return;
1329f22ef01cSRoman Divacky   }
1330f22ef01cSRoman Divacky 
1331bd5abe19SDimitry Andric   // Determine how many (unfolded) adds we'll need inside the loop.
133291bc56edSDimitry Andric   size_t NumBaseParts = F.getNumRegs();
1333bd5abe19SDimitry Andric   if (NumBaseParts > 1)
1334f785676fSDimitry Andric     // Do not count the base and a possible second register if the target
1335f785676fSDimitry Andric     // allows to fold 2 registers.
1336db17bf38SDimitry Andric     C.NumBaseAdds +=
133791bc56edSDimitry Andric         NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1338db17bf38SDimitry Andric   C.NumBaseAdds += (F.UnfoldedOffset != 0);
1339f785676fSDimitry Andric 
1340f785676fSDimitry Andric   // Accumulate non-free scaling amounts.
1341db17bf38SDimitry Andric   C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
1342f22ef01cSRoman Divacky 
1343f22ef01cSRoman Divacky   // Tally up the non-zero immediates.
1344d88c1a5aSDimitry Andric   for (const LSRFixup &Fixup : LU.Fixups) {
1345d88c1a5aSDimitry Andric     int64_t O = Fixup.Offset;
1346ff0cc061SDimitry Andric     int64_t Offset = (uint64_t)O + F.BaseOffset;
1347139f7f9bSDimitry Andric     if (F.BaseGV)
1348db17bf38SDimitry Andric       C.ImmCost += 64; // Handle symbolic values conservatively.
1349f22ef01cSRoman Divacky                      // TODO: This should probably be the pointer size.
1350f22ef01cSRoman Divacky     else if (Offset != 0)
1351db17bf38SDimitry Andric       C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
1352d88c1a5aSDimitry Andric 
1353d88c1a5aSDimitry Andric     // Check with target if this offset with this instruction is
1354d88c1a5aSDimitry Andric     // specifically not supported.
13552cab237bSDimitry Andric     if (LU.Kind == LSRUse::Address && Offset != 0 &&
13562cab237bSDimitry Andric         !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
13572cab237bSDimitry Andric                               Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
1358db17bf38SDimitry Andric       C.NumBaseAdds++;
1359db17bf38SDimitry Andric   }
1360db17bf38SDimitry Andric 
1361db17bf38SDimitry Andric   // If we don't count instruction cost exit here.
1362db17bf38SDimitry Andric   if (!InsnsCost) {
1363db17bf38SDimitry Andric     assert(isValid() && "invalid cost");
1364db17bf38SDimitry Andric     return;
1365db17bf38SDimitry Andric   }
1366db17bf38SDimitry Andric 
1367db17bf38SDimitry Andric   // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1368db17bf38SDimitry Andric   // additional instruction (at least fill).
1369db17bf38SDimitry Andric   unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
1370db17bf38SDimitry Andric   if (C.NumRegs > TTIRegNum) {
1371db17bf38SDimitry Andric     // Cost already exceeded TTIRegNum, then only newly added register can add
1372db17bf38SDimitry Andric     // new instructions.
1373db17bf38SDimitry Andric     if (PrevNumRegs > TTIRegNum)
1374db17bf38SDimitry Andric       C.Insns += (C.NumRegs - PrevNumRegs);
1375db17bf38SDimitry Andric     else
1376db17bf38SDimitry Andric       C.Insns += (C.NumRegs - TTIRegNum);
1377f22ef01cSRoman Divacky   }
13787a7e6055SDimitry Andric 
13797a7e6055SDimitry Andric   // If ICmpZero formula ends with not 0, it could not be replaced by
13807a7e6055SDimitry Andric   // just add or sub. We'll need to compare final result of AddRec.
13814ba319b5SDimitry Andric   // That means we'll need an additional instruction. But if the target can
13824ba319b5SDimitry Andric   // macro-fuse a compare with a branch, don't count this extra instruction.
13837a7e6055SDimitry Andric   // For -10 + {0, +, 1}:
13847a7e6055SDimitry Andric   // i = i + 1;
13857a7e6055SDimitry Andric   // cmp i, 10
13867a7e6055SDimitry Andric   //
13877a7e6055SDimitry Andric   // For {-10, +, 1}:
13887a7e6055SDimitry Andric   // i = i + 1;
13894ba319b5SDimitry Andric   if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp())
1390db17bf38SDimitry Andric     C.Insns++;
13917a7e6055SDimitry Andric   // Each new AddRec adds 1 instruction to calculation.
1392db17bf38SDimitry Andric   C.Insns += (C.AddRecCost - PrevAddRecCost);
13937a7e6055SDimitry Andric 
13947a7e6055SDimitry Andric   // BaseAdds adds instructions for unfolded registers.
13957a7e6055SDimitry Andric   if (LU.Kind != LSRUse::ICmpZero)
1396db17bf38SDimitry Andric     C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
13976122f3e6SDimitry Andric   assert(isValid() && "invalid cost");
1398f22ef01cSRoman Divacky }
1399f22ef01cSRoman Divacky 
14007d523365SDimitry Andric /// Set this cost to a losing value.
Lose()140191bc56edSDimitry Andric void Cost::Lose() {
14022cab237bSDimitry Andric   C.Insns = std::numeric_limits<unsigned>::max();
14032cab237bSDimitry Andric   C.NumRegs = std::numeric_limits<unsigned>::max();
14042cab237bSDimitry Andric   C.AddRecCost = std::numeric_limits<unsigned>::max();
14052cab237bSDimitry Andric   C.NumIVMuls = std::numeric_limits<unsigned>::max();
14062cab237bSDimitry Andric   C.NumBaseAdds = std::numeric_limits<unsigned>::max();
14072cab237bSDimitry Andric   C.ImmCost = std::numeric_limits<unsigned>::max();
14082cab237bSDimitry Andric   C.SetupCost = std::numeric_limits<unsigned>::max();
14092cab237bSDimitry Andric   C.ScaleCost = std::numeric_limits<unsigned>::max();
1410f22ef01cSRoman Divacky }
1411f22ef01cSRoman Divacky 
14127d523365SDimitry Andric /// Choose the lower cost.
isLess(Cost & Other,const TargetTransformInfo & TTI)1413db17bf38SDimitry Andric bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
1414db17bf38SDimitry Andric   if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
1415db17bf38SDimitry Andric       C.Insns != Other.C.Insns)
1416db17bf38SDimitry Andric     return C.Insns < Other.C.Insns;
1417db17bf38SDimitry Andric   return TTI.isLSRCostLess(C, Other.C);
1418f22ef01cSRoman Divacky }
1419f22ef01cSRoman Divacky 
14202cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const1421f22ef01cSRoman Divacky void Cost::print(raw_ostream &OS) const {
1422db17bf38SDimitry Andric   if (InsnsCost)
1423db17bf38SDimitry Andric     OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
1424db17bf38SDimitry Andric   OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
1425db17bf38SDimitry Andric   if (C.AddRecCost != 0)
1426db17bf38SDimitry Andric     OS << ", with addrec cost " << C.AddRecCost;
1427db17bf38SDimitry Andric   if (C.NumIVMuls != 0)
1428db17bf38SDimitry Andric     OS << ", plus " << C.NumIVMuls << " IV mul"
1429db17bf38SDimitry Andric        << (C.NumIVMuls == 1 ? "" : "s");
1430db17bf38SDimitry Andric   if (C.NumBaseAdds != 0)
1431db17bf38SDimitry Andric     OS << ", plus " << C.NumBaseAdds << " base add"
1432db17bf38SDimitry Andric        << (C.NumBaseAdds == 1 ? "" : "s");
1433db17bf38SDimitry Andric   if (C.ScaleCost != 0)
1434db17bf38SDimitry Andric     OS << ", plus " << C.ScaleCost << " scale cost";
1435db17bf38SDimitry Andric   if (C.ImmCost != 0)
1436db17bf38SDimitry Andric     OS << ", plus " << C.ImmCost << " imm cost";
1437db17bf38SDimitry Andric   if (C.SetupCost != 0)
1438db17bf38SDimitry Andric     OS << ", plus " << C.SetupCost << " setup cost";
1439f22ef01cSRoman Divacky }
1440f22ef01cSRoman Divacky 
dump() const14417a7e6055SDimitry Andric LLVM_DUMP_METHOD void Cost::dump() const {
1442f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
1443f22ef01cSRoman Divacky }
14447a7e6055SDimitry Andric #endif
1445f22ef01cSRoman Divacky 
14467d523365SDimitry Andric /// Test whether this fixup always uses its value outside of the given loop.
isUseFullyOutsideLoop(const Loop * L) const1447f22ef01cSRoman Divacky bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1448f22ef01cSRoman Divacky   // PHI nodes use their value in their incoming blocks.
1449f22ef01cSRoman Divacky   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1450f22ef01cSRoman Divacky     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1451f22ef01cSRoman Divacky       if (PN->getIncomingValue(i) == OperandValToReplace &&
1452f22ef01cSRoman Divacky           L->contains(PN->getIncomingBlock(i)))
1453f22ef01cSRoman Divacky         return false;
1454f22ef01cSRoman Divacky     return true;
1455f22ef01cSRoman Divacky   }
1456f22ef01cSRoman Divacky 
1457f22ef01cSRoman Divacky   return !L->contains(UserInst);
1458f22ef01cSRoman Divacky }
1459f22ef01cSRoman Divacky 
14602cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const1461f22ef01cSRoman Divacky void LSRFixup::print(raw_ostream &OS) const {
1462f22ef01cSRoman Divacky   OS << "UserInst=";
1463f22ef01cSRoman Divacky   // Store is common and interesting enough to be worth special-casing.
1464f22ef01cSRoman Divacky   if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1465f22ef01cSRoman Divacky     OS << "store ";
146691bc56edSDimitry Andric     Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1467f22ef01cSRoman Divacky   } else if (UserInst->getType()->isVoidTy())
1468f22ef01cSRoman Divacky     OS << UserInst->getOpcodeName();
1469f22ef01cSRoman Divacky   else
147091bc56edSDimitry Andric     UserInst->printAsOperand(OS, /*PrintType=*/false);
1471f22ef01cSRoman Divacky 
1472f22ef01cSRoman Divacky   OS << ", OperandValToReplace=";
147391bc56edSDimitry Andric   OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1474f22ef01cSRoman Divacky 
1475ff0cc061SDimitry Andric   for (const Loop *PIL : PostIncLoops) {
1476f22ef01cSRoman Divacky     OS << ", PostIncLoop=";
1477ff0cc061SDimitry Andric     PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1478f22ef01cSRoman Divacky   }
1479f22ef01cSRoman Divacky 
1480f22ef01cSRoman Divacky   if (Offset != 0)
1481f22ef01cSRoman Divacky     OS << ", Offset=" << Offset;
1482f22ef01cSRoman Divacky }
1483f22ef01cSRoman Divacky 
dump() const14847a7e6055SDimitry Andric LLVM_DUMP_METHOD void LSRFixup::dump() const {
1485f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
1486f22ef01cSRoman Divacky }
14877a7e6055SDimitry Andric #endif
1488f22ef01cSRoman Divacky 
14897d523365SDimitry Andric /// Test whether this use as a formula which has the same registers as the given
14907d523365SDimitry Andric /// formula.
HasFormulaWithSameRegs(const Formula & F) const1491f22ef01cSRoman Divacky bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1492139f7f9bSDimitry Andric   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1493f22ef01cSRoman Divacky   if (F.ScaledReg) Key.push_back(F.ScaledReg);
1494f22ef01cSRoman Divacky   // Unstable sort by host order ok, because this is only used for uniquifying.
1495*b5893f02SDimitry Andric   llvm::sort(Key);
1496f22ef01cSRoman Divacky   return Uniquifier.count(Key);
1497f22ef01cSRoman Divacky }
1498f22ef01cSRoman Divacky 
14997a7e6055SDimitry Andric /// The function returns a probability of selecting formula without Reg.
getNotSelectedProbability(const SCEV * Reg) const15007a7e6055SDimitry Andric float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
15017a7e6055SDimitry Andric   unsigned FNum = 0;
15027a7e6055SDimitry Andric   for (const Formula &F : Formulae)
15037a7e6055SDimitry Andric     if (F.referencesReg(Reg))
15047a7e6055SDimitry Andric       FNum++;
15057a7e6055SDimitry Andric   return ((float)(Formulae.size() - FNum)) / Formulae.size();
15067a7e6055SDimitry Andric }
15077a7e6055SDimitry Andric 
15087d523365SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
15097d523365SDimitry Andric /// return true. Return false otherwise.  The formula must be in canonical form.
InsertFormula(const Formula & F,const Loop & L)15107a7e6055SDimitry Andric bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
15117a7e6055SDimitry Andric   assert(F.isCanonical(L) && "Invalid canonical representation");
151291bc56edSDimitry Andric 
1513f785676fSDimitry Andric   if (!Formulae.empty() && RigidFormula)
1514f785676fSDimitry Andric     return false;
1515f785676fSDimitry Andric 
1516139f7f9bSDimitry Andric   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1517f22ef01cSRoman Divacky   if (F.ScaledReg) Key.push_back(F.ScaledReg);
1518f22ef01cSRoman Divacky   // Unstable sort by host order ok, because this is only used for uniquifying.
1519*b5893f02SDimitry Andric   llvm::sort(Key);
1520f22ef01cSRoman Divacky 
1521f22ef01cSRoman Divacky   if (!Uniquifier.insert(Key).second)
1522f22ef01cSRoman Divacky     return false;
1523f22ef01cSRoman Divacky 
1524f22ef01cSRoman Divacky   // Using a register to hold the value of 0 is not profitable.
1525f22ef01cSRoman Divacky   assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1526f22ef01cSRoman Divacky          "Zero allocated in a scaled register!");
1527f22ef01cSRoman Divacky #ifndef NDEBUG
1528ff0cc061SDimitry Andric   for (const SCEV *BaseReg : F.BaseRegs)
1529ff0cc061SDimitry Andric     assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1530f22ef01cSRoman Divacky #endif
1531f22ef01cSRoman Divacky 
1532f22ef01cSRoman Divacky   // Add the formula to the list.
1533f22ef01cSRoman Divacky   Formulae.push_back(F);
1534f22ef01cSRoman Divacky 
1535f22ef01cSRoman Divacky   // Record registers now being used by this use.
1536f22ef01cSRoman Divacky   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
153791bc56edSDimitry Andric   if (F.ScaledReg)
153891bc56edSDimitry Andric     Regs.insert(F.ScaledReg);
1539f22ef01cSRoman Divacky 
1540f22ef01cSRoman Divacky   return true;
1541f22ef01cSRoman Divacky }
1542f22ef01cSRoman Divacky 
15437d523365SDimitry Andric /// Remove the given formula from this use's list.
DeleteFormula(Formula & F)1544f22ef01cSRoman Divacky void LSRUse::DeleteFormula(Formula &F) {
1545f22ef01cSRoman Divacky   if (&F != &Formulae.back())
1546f22ef01cSRoman Divacky     std::swap(F, Formulae.back());
1547f22ef01cSRoman Divacky   Formulae.pop_back();
1548f22ef01cSRoman Divacky }
1549f22ef01cSRoman Divacky 
15507d523365SDimitry Andric /// Recompute the Regs field, and update RegUses.
RecomputeRegs(size_t LUIdx,RegUseTracker & RegUses)1551f22ef01cSRoman Divacky void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1552f22ef01cSRoman Divacky   // Now that we've filtered out some formulae, recompute the Regs set.
1553ff0cc061SDimitry Andric   SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1554f22ef01cSRoman Divacky   Regs.clear();
1555ff0cc061SDimitry Andric   for (const Formula &F : Formulae) {
1556f22ef01cSRoman Divacky     if (F.ScaledReg) Regs.insert(F.ScaledReg);
1557f22ef01cSRoman Divacky     Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1558f22ef01cSRoman Divacky   }
1559f22ef01cSRoman Divacky 
1560f22ef01cSRoman Divacky   // Update the RegTracker.
156139d628a0SDimitry Andric   for (const SCEV *S : OldRegs)
156239d628a0SDimitry Andric     if (!Regs.count(S))
15637d523365SDimitry Andric       RegUses.dropRegister(S, LUIdx);
1564f22ef01cSRoman Divacky }
1565f22ef01cSRoman Divacky 
15662cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const1567f22ef01cSRoman Divacky void LSRUse::print(raw_ostream &OS) const {
1568f22ef01cSRoman Divacky   OS << "LSR Use: Kind=";
1569f22ef01cSRoman Divacky   switch (Kind) {
1570f22ef01cSRoman Divacky   case Basic:    OS << "Basic"; break;
1571f22ef01cSRoman Divacky   case Special:  OS << "Special"; break;
1572f22ef01cSRoman Divacky   case ICmpZero: OS << "ICmpZero"; break;
1573f22ef01cSRoman Divacky   case Address:
1574f22ef01cSRoman Divacky     OS << "Address of ";
15757d523365SDimitry Andric     if (AccessTy.MemTy->isPointerTy())
1576f22ef01cSRoman Divacky       OS << "pointer"; // the full pointer type could be really verbose
15777d523365SDimitry Andric     else {
15787d523365SDimitry Andric       OS << *AccessTy.MemTy;
15797d523365SDimitry Andric     }
15807d523365SDimitry Andric 
15817d523365SDimitry Andric     OS << " in addrspace(" << AccessTy.AddrSpace << ')';
1582f22ef01cSRoman Divacky   }
1583f22ef01cSRoman Divacky 
1584f22ef01cSRoman Divacky   OS << ", Offsets={";
1585ff0cc061SDimitry Andric   bool NeedComma = false;
1586d88c1a5aSDimitry Andric   for (const LSRFixup &Fixup : Fixups) {
1587ff0cc061SDimitry Andric     if (NeedComma) OS << ',';
1588d88c1a5aSDimitry Andric     OS << Fixup.Offset;
1589ff0cc061SDimitry Andric     NeedComma = true;
1590f22ef01cSRoman Divacky   }
1591f22ef01cSRoman Divacky   OS << '}';
1592f22ef01cSRoman Divacky 
1593f22ef01cSRoman Divacky   if (AllFixupsOutsideLoop)
1594f22ef01cSRoman Divacky     OS << ", all-fixups-outside-loop";
1595e580952dSDimitry Andric 
1596e580952dSDimitry Andric   if (WidestFixupType)
1597e580952dSDimitry Andric     OS << ", widest fixup type: " << *WidestFixupType;
1598f22ef01cSRoman Divacky }
1599f22ef01cSRoman Divacky 
dump() const16007a7e6055SDimitry Andric LLVM_DUMP_METHOD void LSRUse::dump() const {
1601f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
1602f22ef01cSRoman Divacky }
16037a7e6055SDimitry Andric #endif
1604f22ef01cSRoman Divacky 
isAMCompletelyFolded(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,Instruction * Fixup)160591bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
16067d523365SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
160791bc56edSDimitry Andric                                  GlobalValue *BaseGV, int64_t BaseOffset,
16082cab237bSDimitry Andric                                  bool HasBaseReg, int64_t Scale,
16092cab237bSDimitry Andric                                  Instruction *Fixup/*= nullptr*/) {
1610f22ef01cSRoman Divacky   switch (Kind) {
1611f22ef01cSRoman Divacky   case LSRUse::Address:
16127d523365SDimitry Andric     return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
16132cab237bSDimitry Andric                                      HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
1614f22ef01cSRoman Divacky 
1615f22ef01cSRoman Divacky   case LSRUse::ICmpZero:
1616f22ef01cSRoman Divacky     // There's not even a target hook for querying whether it would be legal to
1617f22ef01cSRoman Divacky     // fold a GV into an ICmp.
1618139f7f9bSDimitry Andric     if (BaseGV)
1619f22ef01cSRoman Divacky       return false;
1620f22ef01cSRoman Divacky 
1621f22ef01cSRoman Divacky     // ICmp only has two operands; don't allow more than two non-trivial parts.
1622139f7f9bSDimitry Andric     if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1623f22ef01cSRoman Divacky       return false;
1624f22ef01cSRoman Divacky 
1625f22ef01cSRoman Divacky     // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1626f22ef01cSRoman Divacky     // putting the scaled register in the other operand of the icmp.
1627139f7f9bSDimitry Andric     if (Scale != 0 && Scale != -1)
1628f22ef01cSRoman Divacky       return false;
1629f22ef01cSRoman Divacky 
1630f22ef01cSRoman Divacky     // If we have low-level target information, ask the target if it can fold an
1631f22ef01cSRoman Divacky     // integer immediate on an icmp.
1632139f7f9bSDimitry Andric     if (BaseOffset != 0) {
1633dff0c46cSDimitry Andric       // We have one of:
1634139f7f9bSDimitry Andric       // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1635139f7f9bSDimitry Andric       // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1636dff0c46cSDimitry Andric       // Offs is the ICmp immediate.
1637139f7f9bSDimitry Andric       if (Scale == 0)
16382cab237bSDimitry Andric         // The cast does the right thing with
16392cab237bSDimitry Andric         // std::numeric_limits<int64_t>::min().
1640139f7f9bSDimitry Andric         BaseOffset = -(uint64_t)BaseOffset;
1641139f7f9bSDimitry Andric       return TTI.isLegalICmpImmediate(BaseOffset);
1642f22ef01cSRoman Divacky     }
1643f22ef01cSRoman Divacky 
1644dff0c46cSDimitry Andric     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1645f22ef01cSRoman Divacky     return true;
1646f22ef01cSRoman Divacky 
1647f22ef01cSRoman Divacky   case LSRUse::Basic:
1648f22ef01cSRoman Divacky     // Only handle single-register values.
1649139f7f9bSDimitry Andric     return !BaseGV && Scale == 0 && BaseOffset == 0;
1650f22ef01cSRoman Divacky 
1651f22ef01cSRoman Divacky   case LSRUse::Special:
16527ae0e2c9SDimitry Andric     // Special case Basic to handle -1 scales.
1653139f7f9bSDimitry Andric     return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1654f22ef01cSRoman Divacky   }
1655f22ef01cSRoman Divacky 
1656dff0c46cSDimitry Andric   llvm_unreachable("Invalid LSRUse Kind!");
1657f22ef01cSRoman Divacky }
1658f22ef01cSRoman Divacky 
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)165991bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
166091bc56edSDimitry Andric                                  int64_t MinOffset, int64_t MaxOffset,
16617d523365SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
166291bc56edSDimitry Andric                                  GlobalValue *BaseGV, int64_t BaseOffset,
166391bc56edSDimitry Andric                                  bool HasBaseReg, int64_t Scale) {
1664f22ef01cSRoman Divacky   // Check for overflow.
1665139f7f9bSDimitry Andric   if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1666f22ef01cSRoman Divacky       (MinOffset > 0))
1667f22ef01cSRoman Divacky     return false;
1668139f7f9bSDimitry Andric   MinOffset = (uint64_t)BaseOffset + MinOffset;
1669139f7f9bSDimitry Andric   if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1670f22ef01cSRoman Divacky       (MaxOffset > 0))
1671f22ef01cSRoman Divacky     return false;
1672139f7f9bSDimitry Andric   MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1673139f7f9bSDimitry Andric 
167491bc56edSDimitry Andric   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
167591bc56edSDimitry Andric                               HasBaseReg, Scale) &&
167691bc56edSDimitry Andric          isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
167791bc56edSDimitry Andric                               HasBaseReg, Scale);
167891bc56edSDimitry Andric }
167991bc56edSDimitry Andric 
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F,const Loop & L)168091bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
168191bc56edSDimitry Andric                                  int64_t MinOffset, int64_t MaxOffset,
16827d523365SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
16837a7e6055SDimitry Andric                                  const Formula &F, const Loop &L) {
168491bc56edSDimitry Andric   // For the purpose of isAMCompletelyFolded either having a canonical formula
168591bc56edSDimitry Andric   // or a scale not equal to zero is correct.
168691bc56edSDimitry Andric   // Problems may arise from non canonical formulae having a scale == 0.
168791bc56edSDimitry Andric   // Strictly speaking it would best to just rely on canonical formulae.
168891bc56edSDimitry Andric   // However, when we generate the scaled formulae, we first check that the
168991bc56edSDimitry Andric   // scaling factor is profitable before computing the actual ScaledReg for
169091bc56edSDimitry Andric   // compile time sake.
16917a7e6055SDimitry Andric   assert((F.isCanonical(L) || F.Scale != 0));
169291bc56edSDimitry Andric   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
169391bc56edSDimitry Andric                               F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
169491bc56edSDimitry Andric }
169591bc56edSDimitry Andric 
16967d523365SDimitry Andric /// Test whether we know how to expand the current formula.
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)169791bc56edSDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
16987d523365SDimitry Andric                        int64_t MaxOffset, LSRUse::KindType Kind,
16997d523365SDimitry Andric                        MemAccessTy AccessTy, GlobalValue *BaseGV,
17007d523365SDimitry Andric                        int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
170191bc56edSDimitry Andric   // We know how to expand completely foldable formulae.
170291bc56edSDimitry Andric   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
170391bc56edSDimitry Andric                               BaseOffset, HasBaseReg, Scale) ||
170491bc56edSDimitry Andric          // Or formulae that use a base register produced by a sum of base
170591bc56edSDimitry Andric          // registers.
170691bc56edSDimitry Andric          (Scale == 1 &&
170791bc56edSDimitry Andric           isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
170891bc56edSDimitry Andric                                BaseGV, BaseOffset, true, 0));
1709f22ef01cSRoman Divacky }
1710f22ef01cSRoman Divacky 
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F)1711139f7f9bSDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
17127d523365SDimitry Andric                        int64_t MaxOffset, LSRUse::KindType Kind,
17137d523365SDimitry Andric                        MemAccessTy AccessTy, const Formula &F) {
1714139f7f9bSDimitry Andric   return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1715139f7f9bSDimitry Andric                     F.BaseOffset, F.HasBaseReg, F.Scale);
1716139f7f9bSDimitry Andric }
1717139f7f9bSDimitry Andric 
isAMCompletelyFolded(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F)171891bc56edSDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
171991bc56edSDimitry Andric                                  const LSRUse &LU, const Formula &F) {
17202cab237bSDimitry Andric   // Target may want to look at the user instructions.
17212cab237bSDimitry Andric   if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
17222cab237bSDimitry Andric     for (const LSRFixup &Fixup : LU.Fixups)
17232cab237bSDimitry Andric       if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
17242cab237bSDimitry Andric                                 (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
17252cab237bSDimitry Andric                                 F.Scale, Fixup.UserInst))
17262cab237bSDimitry Andric         return false;
17272cab237bSDimitry Andric     return true;
17282cab237bSDimitry Andric   }
17292cab237bSDimitry Andric 
173091bc56edSDimitry Andric   return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
173191bc56edSDimitry Andric                               LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
173291bc56edSDimitry Andric                               F.Scale);
1733f785676fSDimitry Andric }
1734f785676fSDimitry Andric 
getScalingFactorCost(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F,const Loop & L)1735f785676fSDimitry Andric static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
17367a7e6055SDimitry Andric                                      const LSRUse &LU, const Formula &F,
17377a7e6055SDimitry Andric                                      const Loop &L) {
1738f785676fSDimitry Andric   if (!F.Scale)
1739f785676fSDimitry Andric     return 0;
174091bc56edSDimitry Andric 
174191bc56edSDimitry Andric   // If the use is not completely folded in that instruction, we will have to
174291bc56edSDimitry Andric   // pay an extra cost only for scale != 1.
174391bc56edSDimitry Andric   if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
17447a7e6055SDimitry Andric                             LU.AccessTy, F, L))
174591bc56edSDimitry Andric     return F.Scale != 1;
1746f785676fSDimitry Andric 
1747f785676fSDimitry Andric   switch (LU.Kind) {
1748f785676fSDimitry Andric   case LSRUse::Address: {
1749f785676fSDimitry Andric     // Check the scaling factor cost with both the min and max offsets.
17507d523365SDimitry Andric     int ScaleCostMinOffset = TTI.getScalingFactorCost(
17517d523365SDimitry Andric         LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
17527d523365SDimitry Andric         F.Scale, LU.AccessTy.AddrSpace);
17537d523365SDimitry Andric     int ScaleCostMaxOffset = TTI.getScalingFactorCost(
17547d523365SDimitry Andric         LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
17557d523365SDimitry Andric         F.Scale, LU.AccessTy.AddrSpace);
1756f785676fSDimitry Andric 
1757f785676fSDimitry Andric     assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1758f785676fSDimitry Andric            "Legal addressing mode has an illegal cost!");
1759f785676fSDimitry Andric     return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1760f785676fSDimitry Andric   }
1761f785676fSDimitry Andric   case LSRUse::ICmpZero:
1762f785676fSDimitry Andric   case LSRUse::Basic:
1763f785676fSDimitry Andric   case LSRUse::Special:
176491bc56edSDimitry Andric     // The use is completely folded, i.e., everything is folded into the
176591bc56edSDimitry Andric     // instruction.
1766f785676fSDimitry Andric     return 0;
1767f785676fSDimitry Andric   }
1768f785676fSDimitry Andric 
1769f785676fSDimitry Andric   llvm_unreachable("Invalid LSRUse Kind!");
1770f785676fSDimitry Andric }
1771f785676fSDimitry Andric 
isAlwaysFoldable(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg)1772139f7f9bSDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
17737d523365SDimitry Andric                              LSRUse::KindType Kind, MemAccessTy AccessTy,
1774139f7f9bSDimitry Andric                              GlobalValue *BaseGV, int64_t BaseOffset,
1775139f7f9bSDimitry Andric                              bool HasBaseReg) {
1776f22ef01cSRoman Divacky   // Fast-path: zero is always foldable.
1777139f7f9bSDimitry Andric   if (BaseOffset == 0 && !BaseGV) return true;
1778f22ef01cSRoman Divacky 
1779f22ef01cSRoman Divacky   // Conservatively, create an address with an immediate and a
1780f22ef01cSRoman Divacky   // base and a scale.
1781139f7f9bSDimitry Andric   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1782f22ef01cSRoman Divacky 
1783f22ef01cSRoman Divacky   // Canonicalize a scale of 1 to a base register if the formula doesn't
1784f22ef01cSRoman Divacky   // already have a base register.
1785139f7f9bSDimitry Andric   if (!HasBaseReg && Scale == 1) {
1786139f7f9bSDimitry Andric     Scale = 0;
1787139f7f9bSDimitry Andric     HasBaseReg = true;
1788f22ef01cSRoman Divacky   }
1789f22ef01cSRoman Divacky 
179091bc56edSDimitry Andric   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
179191bc56edSDimitry Andric                               HasBaseReg, Scale);
1792f22ef01cSRoman Divacky }
1793f22ef01cSRoman Divacky 
isAlwaysFoldable(const TargetTransformInfo & TTI,ScalarEvolution & SE,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const SCEV * S,bool HasBaseReg)1794139f7f9bSDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1795139f7f9bSDimitry Andric                              ScalarEvolution &SE, int64_t MinOffset,
1796139f7f9bSDimitry Andric                              int64_t MaxOffset, LSRUse::KindType Kind,
17977d523365SDimitry Andric                              MemAccessTy AccessTy, const SCEV *S,
17987d523365SDimitry Andric                              bool HasBaseReg) {
1799f22ef01cSRoman Divacky   // Fast-path: zero is always foldable.
1800f22ef01cSRoman Divacky   if (S->isZero()) return true;
1801f22ef01cSRoman Divacky 
1802f22ef01cSRoman Divacky   // Conservatively, create an address with an immediate and a
1803f22ef01cSRoman Divacky   // base and a scale.
1804139f7f9bSDimitry Andric   int64_t BaseOffset = ExtractImmediate(S, SE);
1805f22ef01cSRoman Divacky   GlobalValue *BaseGV = ExtractSymbol(S, SE);
1806f22ef01cSRoman Divacky 
1807f22ef01cSRoman Divacky   // If there's anything else involved, it's not foldable.
1808f22ef01cSRoman Divacky   if (!S->isZero()) return false;
1809f22ef01cSRoman Divacky 
1810f22ef01cSRoman Divacky   // Fast-path: zero is always foldable.
1811139f7f9bSDimitry Andric   if (BaseOffset == 0 && !BaseGV) return true;
1812f22ef01cSRoman Divacky 
1813f22ef01cSRoman Divacky   // Conservatively, create an address with an immediate and a
1814f22ef01cSRoman Divacky   // base and a scale.
1815139f7f9bSDimitry Andric   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1816f22ef01cSRoman Divacky 
181791bc56edSDimitry Andric   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1818139f7f9bSDimitry Andric                               BaseOffset, HasBaseReg, Scale);
1819f22ef01cSRoman Divacky }
1820f22ef01cSRoman Divacky 
1821ffd1746dSEd Schouten namespace {
1822ffd1746dSEd Schouten 
18237d523365SDimitry Andric /// An individual increment in a Chain of IV increments.  Relate an IV user to
18247d523365SDimitry Andric /// an expression that computes the IV it uses from the IV used by the previous
18257d523365SDimitry Andric /// link in the Chain.
1826dff0c46cSDimitry Andric ///
1827dff0c46cSDimitry Andric /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1828dff0c46cSDimitry Andric /// original IVOperand. The head of the chain's IVOperand is only valid during
1829dff0c46cSDimitry Andric /// chain collection, before LSR replaces IV users. During chain generation,
1830dff0c46cSDimitry Andric /// IncExpr can be used to find the new IVOperand that computes the same
1831dff0c46cSDimitry Andric /// expression.
1832dff0c46cSDimitry Andric struct IVInc {
1833dff0c46cSDimitry Andric   Instruction *UserInst;
1834dff0c46cSDimitry Andric   Value* IVOperand;
1835dff0c46cSDimitry Andric   const SCEV *IncExpr;
1836dff0c46cSDimitry Andric 
IVInc__anon244971d90811::IVInc18372cab237bSDimitry Andric   IVInc(Instruction *U, Value *O, const SCEV *E)
18382cab237bSDimitry Andric       : UserInst(U), IVOperand(O), IncExpr(E) {}
1839dff0c46cSDimitry Andric };
1840dff0c46cSDimitry Andric 
18417d523365SDimitry Andric // The list of IV increments in program order.  We typically add the head of a
18427d523365SDimitry Andric // chain without finding subsequent links.
18437ae0e2c9SDimitry Andric struct IVChain {
18447ae0e2c9SDimitry Andric   SmallVector<IVInc, 1> Incs;
18452cab237bSDimitry Andric   const SCEV *ExprBase = nullptr;
18467ae0e2c9SDimitry Andric 
18472cab237bSDimitry Andric   IVChain() = default;
IVChain__anon244971d90811::IVChain18487ae0e2c9SDimitry Andric   IVChain(const IVInc &Head, const SCEV *Base)
18497ae0e2c9SDimitry Andric       : Incs(1, Head), ExprBase(Base) {}
18507ae0e2c9SDimitry Andric 
18512cab237bSDimitry Andric   using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
18527ae0e2c9SDimitry Andric 
18537d523365SDimitry Andric   // Return the first increment in the chain.
begin__anon244971d90811::IVChain18547ae0e2c9SDimitry Andric   const_iterator begin() const {
18557ae0e2c9SDimitry Andric     assert(!Incs.empty());
185691bc56edSDimitry Andric     return std::next(Incs.begin());
18577ae0e2c9SDimitry Andric   }
end__anon244971d90811::IVChain18587ae0e2c9SDimitry Andric   const_iterator end() const {
18597ae0e2c9SDimitry Andric     return Incs.end();
18607ae0e2c9SDimitry Andric   }
18617ae0e2c9SDimitry Andric 
18627d523365SDimitry Andric   // Returns true if this chain contains any increments.
hasIncs__anon244971d90811::IVChain18637ae0e2c9SDimitry Andric   bool hasIncs() const { return Incs.size() >= 2; }
18647ae0e2c9SDimitry Andric 
18657d523365SDimitry Andric   // Add an IVInc to the end of this chain.
add__anon244971d90811::IVChain18667ae0e2c9SDimitry Andric   void add(const IVInc &X) { Incs.push_back(X); }
18677ae0e2c9SDimitry Andric 
18687d523365SDimitry Andric   // Returns the last UserInst in the chain.
tailUserInst__anon244971d90811::IVChain18697ae0e2c9SDimitry Andric   Instruction *tailUserInst() const { return Incs.back().UserInst; }
18707ae0e2c9SDimitry Andric 
18717d523365SDimitry Andric   // Returns true if IncExpr can be profitably added to this chain.
18727ae0e2c9SDimitry Andric   bool isProfitableIncrement(const SCEV *OperExpr,
18737ae0e2c9SDimitry Andric                              const SCEV *IncExpr,
18747ae0e2c9SDimitry Andric                              ScalarEvolution&);
18757ae0e2c9SDimitry Andric };
1876dff0c46cSDimitry Andric 
18777d523365SDimitry Andric /// Helper for CollectChains to track multiple IV increment uses.  Distinguish
18787d523365SDimitry Andric /// between FarUsers that definitely cross IV increments and NearUsers that may
18797d523365SDimitry Andric /// be used between IV increments.
1880dff0c46cSDimitry Andric struct ChainUsers {
1881dff0c46cSDimitry Andric   SmallPtrSet<Instruction*, 4> FarUsers;
1882dff0c46cSDimitry Andric   SmallPtrSet<Instruction*, 4> NearUsers;
1883dff0c46cSDimitry Andric };
1884dff0c46cSDimitry Andric 
18857d523365SDimitry Andric /// This class holds state for the main loop strength reduction logic.
1886f22ef01cSRoman Divacky class LSRInstance {
1887f22ef01cSRoman Divacky   IVUsers &IU;
1888f22ef01cSRoman Divacky   ScalarEvolution &SE;
1889f22ef01cSRoman Divacky   DominatorTree &DT;
1890f22ef01cSRoman Divacky   LoopInfo &LI;
1891139f7f9bSDimitry Andric   const TargetTransformInfo &TTI;
1892f22ef01cSRoman Divacky   Loop *const L;
18932cab237bSDimitry Andric   bool Changed = false;
1894f22ef01cSRoman Divacky 
18957d523365SDimitry Andric   /// This is the insert position that the current loop's induction variable
18967d523365SDimitry Andric   /// increment should be placed. In simple loops, this is the latch block's
18977d523365SDimitry Andric   /// terminator. But in more complicated cases, this is a position which will
18987d523365SDimitry Andric   /// dominate all the in-loop post-increment users.
18992cab237bSDimitry Andric   Instruction *IVIncInsertPos = nullptr;
1900f22ef01cSRoman Divacky 
19017d523365SDimitry Andric   /// Interesting factors between use strides.
1902d88c1a5aSDimitry Andric   ///
1903d88c1a5aSDimitry Andric   /// We explicitly use a SetVector which contains a SmallSet, instead of the
1904d88c1a5aSDimitry Andric   /// default, a SmallDenseSet, because we need to use the full range of
1905d88c1a5aSDimitry Andric   /// int64_ts, and there's currently no good way of doing that with
1906d88c1a5aSDimitry Andric   /// SmallDenseSet.
1907d88c1a5aSDimitry Andric   SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
1908f22ef01cSRoman Divacky 
19097d523365SDimitry Andric   /// Interesting use types, to facilitate truncation reuse.
19106122f3e6SDimitry Andric   SmallSetVector<Type *, 4> Types;
1911f22ef01cSRoman Divacky 
19127d523365SDimitry Andric   /// The list of interesting uses.
1913f22ef01cSRoman Divacky   SmallVector<LSRUse, 16> Uses;
1914f22ef01cSRoman Divacky 
19157d523365SDimitry Andric   /// Track which uses use which register candidates.
1916f22ef01cSRoman Divacky   RegUseTracker RegUses;
1917f22ef01cSRoman Divacky 
1918dff0c46cSDimitry Andric   // Limit the number of chains to avoid quadratic behavior. We don't expect to
1919dff0c46cSDimitry Andric   // have more than a few IV increment chains in a loop. Missing a Chain falls
1920dff0c46cSDimitry Andric   // back to normal LSR behavior for those uses.
1921dff0c46cSDimitry Andric   static const unsigned MaxChains = 8;
1922dff0c46cSDimitry Andric 
19237d523365SDimitry Andric   /// IV users can form a chain of IV increments.
1924dff0c46cSDimitry Andric   SmallVector<IVChain, MaxChains> IVChainVec;
1925dff0c46cSDimitry Andric 
19267d523365SDimitry Andric   /// IV users that belong to profitable IVChains.
1927dff0c46cSDimitry Andric   SmallPtrSet<Use*, MaxChains> IVIncSet;
1928dff0c46cSDimitry Andric 
1929f22ef01cSRoman Divacky   void OptimizeShadowIV();
1930f22ef01cSRoman Divacky   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1931f22ef01cSRoman Divacky   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1932f22ef01cSRoman Divacky   void OptimizeLoopTermCond();
1933f22ef01cSRoman Divacky 
1934dff0c46cSDimitry Andric   void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1935dff0c46cSDimitry Andric                         SmallVectorImpl<ChainUsers> &ChainUsersVec);
1936dff0c46cSDimitry Andric   void FinalizeChain(IVChain &Chain);
1937dff0c46cSDimitry Andric   void CollectChains();
1938dff0c46cSDimitry Andric   void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1939f37b6182SDimitry Andric                        SmallVectorImpl<WeakTrackingVH> &DeadInsts);
1940dff0c46cSDimitry Andric 
1941f22ef01cSRoman Divacky   void CollectInterestingTypesAndFactors();
1942f22ef01cSRoman Divacky   void CollectFixupsAndInitialFormulae();
1943f22ef01cSRoman Divacky 
1944f22ef01cSRoman Divacky   // Support for sharing of LSRUses between LSRFixups.
19452cab237bSDimitry Andric   using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
1946f22ef01cSRoman Divacky   UseMapTy UseMap;
1947f22ef01cSRoman Divacky 
1948f22ef01cSRoman Divacky   bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
19497d523365SDimitry Andric                           LSRUse::KindType Kind, MemAccessTy AccessTy);
1950f22ef01cSRoman Divacky 
19517d523365SDimitry Andric   std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
19527d523365SDimitry Andric                                     MemAccessTy AccessTy);
1953f22ef01cSRoman Divacky 
19542754fe60SDimitry Andric   void DeleteUse(LSRUse &LU, size_t LUIdx);
1955f22ef01cSRoman Divacky 
1956f22ef01cSRoman Divacky   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1957f22ef01cSRoman Divacky 
1958f22ef01cSRoman Divacky   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1959f22ef01cSRoman Divacky   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1960f22ef01cSRoman Divacky   void CountRegisters(const Formula &F, size_t LUIdx);
1961f22ef01cSRoman Divacky   bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1962f22ef01cSRoman Divacky 
1963f22ef01cSRoman Divacky   void CollectLoopInvariantFixupsAndFormulae();
1964f22ef01cSRoman Divacky 
1965f22ef01cSRoman Divacky   void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1966f22ef01cSRoman Divacky                               unsigned Depth = 0);
196791bc56edSDimitry Andric 
196891bc56edSDimitry Andric   void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
196991bc56edSDimitry Andric                                   const Formula &Base, unsigned Depth,
197091bc56edSDimitry Andric                                   size_t Idx, bool IsScaledReg = false);
1971f22ef01cSRoman Divacky   void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
197291bc56edSDimitry Andric   void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
197391bc56edSDimitry Andric                                    const Formula &Base, size_t Idx,
197491bc56edSDimitry Andric                                    bool IsScaledReg = false);
1975f22ef01cSRoman Divacky   void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
197691bc56edSDimitry Andric   void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
197791bc56edSDimitry Andric                                    const Formula &Base,
197891bc56edSDimitry Andric                                    const SmallVectorImpl<int64_t> &Worklist,
197991bc56edSDimitry Andric                                    size_t Idx, bool IsScaledReg = false);
1980f22ef01cSRoman Divacky   void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1981f22ef01cSRoman Divacky   void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1982f22ef01cSRoman Divacky   void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1983f22ef01cSRoman Divacky   void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1984f22ef01cSRoman Divacky   void GenerateCrossUseConstantOffsets();
1985f22ef01cSRoman Divacky   void GenerateAllReuseFormulae();
1986f22ef01cSRoman Divacky 
1987f22ef01cSRoman Divacky   void FilterOutUndesirableDedicatedRegisters();
1988f22ef01cSRoman Divacky 
1989f22ef01cSRoman Divacky   size_t EstimateSearchSpaceComplexity() const;
1990e580952dSDimitry Andric   void NarrowSearchSpaceByDetectingSupersets();
1991e580952dSDimitry Andric   void NarrowSearchSpaceByCollapsingUnrolledCode();
1992e580952dSDimitry Andric   void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1993c4394386SDimitry Andric   void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
19947a7e6055SDimitry Andric   void NarrowSearchSpaceByDeletingCostlyFormulas();
1995e580952dSDimitry Andric   void NarrowSearchSpaceByPickingWinnerRegs();
1996f22ef01cSRoman Divacky   void NarrowSearchSpaceUsingHeuristics();
1997f22ef01cSRoman Divacky 
1998f22ef01cSRoman Divacky   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1999f22ef01cSRoman Divacky                     Cost &SolutionCost,
2000f22ef01cSRoman Divacky                     SmallVectorImpl<const Formula *> &Workspace,
2001f22ef01cSRoman Divacky                     const Cost &CurCost,
2002f22ef01cSRoman Divacky                     const SmallPtrSet<const SCEV *, 16> &CurRegs,
2003f22ef01cSRoman Divacky                     DenseSet<const SCEV *> &VisitedRegs) const;
2004f22ef01cSRoman Divacky   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
2005f22ef01cSRoman Divacky 
2006f22ef01cSRoman Divacky   BasicBlock::iterator
2007f22ef01cSRoman Divacky     HoistInsertPosition(BasicBlock::iterator IP,
2008f22ef01cSRoman Divacky                         const SmallVectorImpl<Instruction *> &Inputs) const;
2009dff0c46cSDimitry Andric   BasicBlock::iterator
2010dff0c46cSDimitry Andric     AdjustInsertPositionForExpand(BasicBlock::iterator IP,
2011f22ef01cSRoman Divacky                                   const LSRFixup &LF,
2012dff0c46cSDimitry Andric                                   const LSRUse &LU,
2013dff0c46cSDimitry Andric                                   SCEVExpander &Rewriter) const;
2014f22ef01cSRoman Divacky 
2015f37b6182SDimitry Andric   Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2016f37b6182SDimitry Andric                 BasicBlock::iterator IP, SCEVExpander &Rewriter,
2017f37b6182SDimitry Andric                 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2018d88c1a5aSDimitry Andric   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
2019f37b6182SDimitry Andric                      const Formula &F, SCEVExpander &Rewriter,
2020f37b6182SDimitry Andric                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2021f37b6182SDimitry Andric   void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2022f22ef01cSRoman Divacky                SCEVExpander &Rewriter,
2023f37b6182SDimitry Andric                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
20247d523365SDimitry Andric   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
2025f22ef01cSRoman Divacky 
2026dff0c46cSDimitry Andric public:
20277d523365SDimitry Andric   LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
20287d523365SDimitry Andric               LoopInfo &LI, const TargetTransformInfo &TTI);
2029f22ef01cSRoman Divacky 
getChanged() const2030f22ef01cSRoman Divacky   bool getChanged() const { return Changed; }
2031f22ef01cSRoman Divacky 
2032f22ef01cSRoman Divacky   void print_factors_and_types(raw_ostream &OS) const;
2033f22ef01cSRoman Divacky   void print_fixups(raw_ostream &OS) const;
2034f22ef01cSRoman Divacky   void print_uses(raw_ostream &OS) const;
2035f22ef01cSRoman Divacky   void print(raw_ostream &OS) const;
2036f22ef01cSRoman Divacky   void dump() const;
2037f22ef01cSRoman Divacky };
2038f22ef01cSRoman Divacky 
2039d88c1a5aSDimitry Andric } // end anonymous namespace
2040f22ef01cSRoman Divacky 
20417d523365SDimitry Andric /// If IV is used in a int-to-float cast inside the loop then try to eliminate
20427d523365SDimitry Andric /// the cast operation.
OptimizeShadowIV()2043f22ef01cSRoman Divacky void LSRInstance::OptimizeShadowIV() {
2044f22ef01cSRoman Divacky   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2045f22ef01cSRoman Divacky   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2046f22ef01cSRoman Divacky     return;
2047f22ef01cSRoman Divacky 
2048f22ef01cSRoman Divacky   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
2049f22ef01cSRoman Divacky        UI != E; /* empty */) {
2050f22ef01cSRoman Divacky     IVUsers::const_iterator CandidateUI = UI;
2051f22ef01cSRoman Divacky     ++UI;
2052f22ef01cSRoman Divacky     Instruction *ShadowUse = CandidateUI->getUser();
205391bc56edSDimitry Andric     Type *DestTy = nullptr;
20546122f3e6SDimitry Andric     bool IsSigned = false;
2055f22ef01cSRoman Divacky 
2056f22ef01cSRoman Divacky     /* If shadow use is a int->float cast then insert a second IV
2057f22ef01cSRoman Divacky        to eliminate this cast.
2058f22ef01cSRoman Divacky 
2059f22ef01cSRoman Divacky          for (unsigned i = 0; i < n; ++i)
2060f22ef01cSRoman Divacky            foo((double)i);
2061f22ef01cSRoman Divacky 
2062f22ef01cSRoman Divacky        is transformed into
2063f22ef01cSRoman Divacky 
2064f22ef01cSRoman Divacky          double d = 0.0;
2065f22ef01cSRoman Divacky          for (unsigned i = 0; i < n; ++i, ++d)
2066f22ef01cSRoman Divacky            foo(d);
2067f22ef01cSRoman Divacky     */
20686122f3e6SDimitry Andric     if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
20696122f3e6SDimitry Andric       IsSigned = false;
2070f22ef01cSRoman Divacky       DestTy = UCast->getDestTy();
20716122f3e6SDimitry Andric     }
20726122f3e6SDimitry Andric     else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
20736122f3e6SDimitry Andric       IsSigned = true;
2074f22ef01cSRoman Divacky       DestTy = SCast->getDestTy();
20756122f3e6SDimitry Andric     }
2076f22ef01cSRoman Divacky     if (!DestTy) continue;
2077f22ef01cSRoman Divacky 
2078f22ef01cSRoman Divacky     // If target does not support DestTy natively then do not apply
2079f22ef01cSRoman Divacky     // this transformation.
2080139f7f9bSDimitry Andric     if (!TTI.isTypeLegal(DestTy)) continue;
2081f22ef01cSRoman Divacky 
2082f22ef01cSRoman Divacky     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
2083f22ef01cSRoman Divacky     if (!PH) continue;
2084f22ef01cSRoman Divacky     if (PH->getNumIncomingValues() != 2) continue;
2085f22ef01cSRoman Divacky 
20862cab237bSDimitry Andric     // If the calculation in integers overflows, the result in FP type will
20872cab237bSDimitry Andric     // differ. So we only can do this transformation if we are guaranteed to not
20882cab237bSDimitry Andric     // deal with overflowing values
20892cab237bSDimitry Andric     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
20902cab237bSDimitry Andric     if (!AR) continue;
20912cab237bSDimitry Andric     if (IsSigned && !AR->hasNoSignedWrap()) continue;
20922cab237bSDimitry Andric     if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
20932cab237bSDimitry Andric 
20946122f3e6SDimitry Andric     Type *SrcTy = PH->getType();
2095f22ef01cSRoman Divacky     int Mantissa = DestTy->getFPMantissaWidth();
2096f22ef01cSRoman Divacky     if (Mantissa == -1) continue;
2097f22ef01cSRoman Divacky     if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
2098f22ef01cSRoman Divacky       continue;
2099f22ef01cSRoman Divacky 
2100f22ef01cSRoman Divacky     unsigned Entry, Latch;
2101f22ef01cSRoman Divacky     if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
2102f22ef01cSRoman Divacky       Entry = 0;
2103f22ef01cSRoman Divacky       Latch = 1;
2104f22ef01cSRoman Divacky     } else {
2105f22ef01cSRoman Divacky       Entry = 1;
2106f22ef01cSRoman Divacky       Latch = 0;
2107f22ef01cSRoman Divacky     }
2108f22ef01cSRoman Divacky 
2109f22ef01cSRoman Divacky     ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
2110f22ef01cSRoman Divacky     if (!Init) continue;
21116122f3e6SDimitry Andric     Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
21126122f3e6SDimitry Andric                                         (double)Init->getSExtValue() :
21136122f3e6SDimitry Andric                                         (double)Init->getZExtValue());
2114f22ef01cSRoman Divacky 
2115f22ef01cSRoman Divacky     BinaryOperator *Incr =
2116f22ef01cSRoman Divacky       dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
2117f22ef01cSRoman Divacky     if (!Incr) continue;
2118f22ef01cSRoman Divacky     if (Incr->getOpcode() != Instruction::Add
2119f22ef01cSRoman Divacky         && Incr->getOpcode() != Instruction::Sub)
2120f22ef01cSRoman Divacky       continue;
2121f22ef01cSRoman Divacky 
2122f22ef01cSRoman Divacky     /* Initialize new IV, double d = 0.0 in above example. */
212391bc56edSDimitry Andric     ConstantInt *C = nullptr;
2124f22ef01cSRoman Divacky     if (Incr->getOperand(0) == PH)
2125f22ef01cSRoman Divacky       C = dyn_cast<ConstantInt>(Incr->getOperand(1));
2126f22ef01cSRoman Divacky     else if (Incr->getOperand(1) == PH)
2127f22ef01cSRoman Divacky       C = dyn_cast<ConstantInt>(Incr->getOperand(0));
2128f22ef01cSRoman Divacky     else
2129f22ef01cSRoman Divacky       continue;
2130f22ef01cSRoman Divacky 
2131f22ef01cSRoman Divacky     if (!C) continue;
2132f22ef01cSRoman Divacky 
2133f22ef01cSRoman Divacky     // Ignore negative constants, as the code below doesn't handle them
2134f22ef01cSRoman Divacky     // correctly. TODO: Remove this restriction.
2135f22ef01cSRoman Divacky     if (!C->getValue().isStrictlyPositive()) continue;
2136f22ef01cSRoman Divacky 
2137f22ef01cSRoman Divacky     /* Add new PHINode. */
21383b0f4066SDimitry Andric     PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
2139f22ef01cSRoman Divacky 
2140f22ef01cSRoman Divacky     /* create new increment. '++d' in above example. */
2141f22ef01cSRoman Divacky     Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
2142f22ef01cSRoman Divacky     BinaryOperator *NewIncr =
2143f22ef01cSRoman Divacky       BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
2144f22ef01cSRoman Divacky                                Instruction::FAdd : Instruction::FSub,
2145f22ef01cSRoman Divacky                              NewPH, CFP, "IV.S.next.", Incr);
2146f22ef01cSRoman Divacky 
2147f22ef01cSRoman Divacky     NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
2148f22ef01cSRoman Divacky     NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
2149f22ef01cSRoman Divacky 
2150f22ef01cSRoman Divacky     /* Remove cast operation */
2151f22ef01cSRoman Divacky     ShadowUse->replaceAllUsesWith(NewPH);
2152f22ef01cSRoman Divacky     ShadowUse->eraseFromParent();
2153f22ef01cSRoman Divacky     Changed = true;
2154f22ef01cSRoman Divacky     break;
2155f22ef01cSRoman Divacky   }
2156f22ef01cSRoman Divacky }
2157f22ef01cSRoman Divacky 
21587d523365SDimitry Andric /// If Cond has an operand that is an expression of an IV, set the IV user and
21597d523365SDimitry Andric /// stride information and return true, otherwise return false.
FindIVUserForCond(ICmpInst * Cond,IVStrideUse * & CondUse)2160f22ef01cSRoman Divacky bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
2161ff0cc061SDimitry Andric   for (IVStrideUse &U : IU)
2162ff0cc061SDimitry Andric     if (U.getUser() == Cond) {
2163f22ef01cSRoman Divacky       // NOTE: we could handle setcc instructions with multiple uses here, but
2164f22ef01cSRoman Divacky       // InstCombine does it as well for simple uses, it's not clear that it
2165f22ef01cSRoman Divacky       // occurs enough in real life to handle.
2166ff0cc061SDimitry Andric       CondUse = &U;
2167f22ef01cSRoman Divacky       return true;
2168f22ef01cSRoman Divacky     }
2169f22ef01cSRoman Divacky   return false;
2170f22ef01cSRoman Divacky }
2171f22ef01cSRoman Divacky 
21727d523365SDimitry Andric /// Rewrite the loop's terminating condition if it uses a max computation.
2173f22ef01cSRoman Divacky ///
2174f22ef01cSRoman Divacky /// This is a narrow solution to a specific, but acute, problem. For loops
2175f22ef01cSRoman Divacky /// like this:
2176f22ef01cSRoman Divacky ///
2177f22ef01cSRoman Divacky ///   i = 0;
2178f22ef01cSRoman Divacky ///   do {
2179f22ef01cSRoman Divacky ///     p[i] = 0.0;
2180f22ef01cSRoman Divacky ///   } while (++i < n);
2181f22ef01cSRoman Divacky ///
2182f22ef01cSRoman Divacky /// the trip count isn't just 'n', because 'n' might not be positive. And
2183f22ef01cSRoman Divacky /// unfortunately this can come up even for loops where the user didn't use
2184f22ef01cSRoman Divacky /// a C do-while loop. For example, seemingly well-behaved top-test loops
2185f22ef01cSRoman Divacky /// will commonly be lowered like this:
21862cab237bSDimitry Andric ///
2187f22ef01cSRoman Divacky ///   if (n > 0) {
2188f22ef01cSRoman Divacky ///     i = 0;
2189f22ef01cSRoman Divacky ///     do {
2190f22ef01cSRoman Divacky ///       p[i] = 0.0;
2191f22ef01cSRoman Divacky ///     } while (++i < n);
2192f22ef01cSRoman Divacky ///   }
2193f22ef01cSRoman Divacky ///
2194f22ef01cSRoman Divacky /// and then it's possible for subsequent optimization to obscure the if
2195f22ef01cSRoman Divacky /// test in such a way that indvars can't find it.
2196f22ef01cSRoman Divacky ///
2197f22ef01cSRoman Divacky /// When indvars can't find the if test in loops like this, it creates a
2198f22ef01cSRoman Divacky /// max expression, which allows it to give the loop a canonical
2199f22ef01cSRoman Divacky /// induction variable:
2200f22ef01cSRoman Divacky ///
2201f22ef01cSRoman Divacky ///   i = 0;
2202f22ef01cSRoman Divacky ///   max = n < 1 ? 1 : n;
2203f22ef01cSRoman Divacky ///   do {
2204f22ef01cSRoman Divacky ///     p[i] = 0.0;
2205f22ef01cSRoman Divacky ///   } while (++i != max);
2206f22ef01cSRoman Divacky ///
2207f22ef01cSRoman Divacky /// Canonical induction variables are necessary because the loop passes
2208f22ef01cSRoman Divacky /// are designed around them. The most obvious example of this is the
2209f22ef01cSRoman Divacky /// LoopInfo analysis, which doesn't remember trip count values. It
2210f22ef01cSRoman Divacky /// expects to be able to rediscover the trip count each time it is
2211f22ef01cSRoman Divacky /// needed, and it does this using a simple analysis that only succeeds if
2212f22ef01cSRoman Divacky /// the loop has a canonical induction variable.
2213f22ef01cSRoman Divacky ///
2214f22ef01cSRoman Divacky /// However, when it comes time to generate code, the maximum operation
2215f22ef01cSRoman Divacky /// can be quite costly, especially if it's inside of an outer loop.
2216f22ef01cSRoman Divacky ///
2217f22ef01cSRoman Divacky /// This function solves this problem by detecting this type of loop and
2218f22ef01cSRoman Divacky /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2219f22ef01cSRoman Divacky /// the instructions for the maximum computation.
OptimizeMax(ICmpInst * Cond,IVStrideUse * & CondUse)2220f22ef01cSRoman Divacky ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
2221f22ef01cSRoman Divacky   // Check that the loop matches the pattern we're looking for.
2222f22ef01cSRoman Divacky   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
2223f22ef01cSRoman Divacky       Cond->getPredicate() != CmpInst::ICMP_NE)
2224f22ef01cSRoman Divacky     return Cond;
2225f22ef01cSRoman Divacky 
2226f22ef01cSRoman Divacky   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
2227f22ef01cSRoman Divacky   if (!Sel || !Sel->hasOneUse()) return Cond;
2228f22ef01cSRoman Divacky 
2229f22ef01cSRoman Divacky   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2230f22ef01cSRoman Divacky   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2231f22ef01cSRoman Divacky     return Cond;
2232f22ef01cSRoman Divacky   const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
2233f22ef01cSRoman Divacky 
2234f22ef01cSRoman Divacky   // Add one to the backedge-taken count to get the trip count.
2235e580952dSDimitry Andric   const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
2236f22ef01cSRoman Divacky   if (IterationCount != SE.getSCEV(Sel)) return Cond;
2237f22ef01cSRoman Divacky 
2238f22ef01cSRoman Divacky   // Check for a max calculation that matches the pattern. There's no check
2239f22ef01cSRoman Divacky   // for ICMP_ULE here because the comparison would be with zero, which
2240f22ef01cSRoman Divacky   // isn't interesting.
2241f22ef01cSRoman Divacky   CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
224291bc56edSDimitry Andric   const SCEVNAryExpr *Max = nullptr;
2243f22ef01cSRoman Divacky   if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2244f22ef01cSRoman Divacky     Pred = ICmpInst::ICMP_SLE;
2245f22ef01cSRoman Divacky     Max = S;
2246f22ef01cSRoman Divacky   } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2247f22ef01cSRoman Divacky     Pred = ICmpInst::ICMP_SLT;
2248f22ef01cSRoman Divacky     Max = S;
2249f22ef01cSRoman Divacky   } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2250f22ef01cSRoman Divacky     Pred = ICmpInst::ICMP_ULT;
2251f22ef01cSRoman Divacky     Max = U;
2252f22ef01cSRoman Divacky   } else {
2253f22ef01cSRoman Divacky     // No match; bail.
2254f22ef01cSRoman Divacky     return Cond;
2255f22ef01cSRoman Divacky   }
2256f22ef01cSRoman Divacky 
2257f22ef01cSRoman Divacky   // To handle a max with more than two operands, this optimization would
2258f22ef01cSRoman Divacky   // require additional checking and setup.
2259f22ef01cSRoman Divacky   if (Max->getNumOperands() != 2)
2260f22ef01cSRoman Divacky     return Cond;
2261f22ef01cSRoman Divacky 
2262f22ef01cSRoman Divacky   const SCEV *MaxLHS = Max->getOperand(0);
2263f22ef01cSRoman Divacky   const SCEV *MaxRHS = Max->getOperand(1);
2264f22ef01cSRoman Divacky 
2265f22ef01cSRoman Divacky   // ScalarEvolution canonicalizes constants to the left. For < and >, look
2266f22ef01cSRoman Divacky   // for a comparison with 1. For <= and >=, a comparison with zero.
2267f22ef01cSRoman Divacky   if (!MaxLHS ||
2268f22ef01cSRoman Divacky       (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2269f22ef01cSRoman Divacky     return Cond;
2270f22ef01cSRoman Divacky 
2271f22ef01cSRoman Divacky   // Check the relevant induction variable for conformance to
2272f22ef01cSRoman Divacky   // the pattern.
2273f22ef01cSRoman Divacky   const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2274f22ef01cSRoman Divacky   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2275f22ef01cSRoman Divacky   if (!AR || !AR->isAffine() ||
2276f22ef01cSRoman Divacky       AR->getStart() != One ||
2277f22ef01cSRoman Divacky       AR->getStepRecurrence(SE) != One)
2278f22ef01cSRoman Divacky     return Cond;
2279f22ef01cSRoman Divacky 
2280f22ef01cSRoman Divacky   assert(AR->getLoop() == L &&
2281f22ef01cSRoman Divacky          "Loop condition operand is an addrec in a different loop!");
2282f22ef01cSRoman Divacky 
2283f22ef01cSRoman Divacky   // Check the right operand of the select, and remember it, as it will
2284f22ef01cSRoman Divacky   // be used in the new comparison instruction.
228591bc56edSDimitry Andric   Value *NewRHS = nullptr;
2286f22ef01cSRoman Divacky   if (ICmpInst::isTrueWhenEqual(Pred)) {
2287f22ef01cSRoman Divacky     // Look for n+1, and grab n.
2288f22ef01cSRoman Divacky     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2289139f7f9bSDimitry Andric       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2290139f7f9bSDimitry Andric          if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2291f22ef01cSRoman Divacky            NewRHS = BO->getOperand(0);
2292f22ef01cSRoman Divacky     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2293139f7f9bSDimitry Andric       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2294139f7f9bSDimitry Andric         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2295f22ef01cSRoman Divacky           NewRHS = BO->getOperand(0);
2296f22ef01cSRoman Divacky     if (!NewRHS)
2297f22ef01cSRoman Divacky       return Cond;
2298f22ef01cSRoman Divacky   } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2299f22ef01cSRoman Divacky     NewRHS = Sel->getOperand(1);
2300f22ef01cSRoman Divacky   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2301f22ef01cSRoman Divacky     NewRHS = Sel->getOperand(2);
2302ffd1746dSEd Schouten   else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2303ffd1746dSEd Schouten     NewRHS = SU->getValue();
2304f22ef01cSRoman Divacky   else
2305ffd1746dSEd Schouten     // Max doesn't match expected pattern.
2306ffd1746dSEd Schouten     return Cond;
2307f22ef01cSRoman Divacky 
2308f22ef01cSRoman Divacky   // Determine the new comparison opcode. It may be signed or unsigned,
2309f22ef01cSRoman Divacky   // and the original comparison may be either equality or inequality.
2310f22ef01cSRoman Divacky   if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2311f22ef01cSRoman Divacky     Pred = CmpInst::getInversePredicate(Pred);
2312f22ef01cSRoman Divacky 
2313f22ef01cSRoman Divacky   // Ok, everything looks ok to change the condition into an SLT or SGE and
2314f22ef01cSRoman Divacky   // delete the max calculation.
2315f22ef01cSRoman Divacky   ICmpInst *NewCond =
2316f22ef01cSRoman Divacky     new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2317f22ef01cSRoman Divacky 
2318f22ef01cSRoman Divacky   // Delete the max calculation instructions.
2319f22ef01cSRoman Divacky   Cond->replaceAllUsesWith(NewCond);
2320f22ef01cSRoman Divacky   CondUse->setUser(NewCond);
2321f22ef01cSRoman Divacky   Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2322f22ef01cSRoman Divacky   Cond->eraseFromParent();
2323f22ef01cSRoman Divacky   Sel->eraseFromParent();
2324f22ef01cSRoman Divacky   if (Cmp->use_empty())
2325f22ef01cSRoman Divacky     Cmp->eraseFromParent();
2326f22ef01cSRoman Divacky   return NewCond;
2327f22ef01cSRoman Divacky }
2328f22ef01cSRoman Divacky 
23297d523365SDimitry Andric /// Change loop terminating condition to use the postinc iv when possible.
2330f22ef01cSRoman Divacky void
OptimizeLoopTermCond()2331f22ef01cSRoman Divacky LSRInstance::OptimizeLoopTermCond() {
2332f22ef01cSRoman Divacky   SmallPtrSet<Instruction *, 4> PostIncs;
2333f22ef01cSRoman Divacky 
2334d88c1a5aSDimitry Andric   // We need a different set of heuristics for rotated and non-rotated loops.
2335d88c1a5aSDimitry Andric   // If a loop is rotated then the latch is also the backedge, so inserting
2336d88c1a5aSDimitry Andric   // post-inc expressions just before the latch is ideal. To reduce live ranges
2337d88c1a5aSDimitry Andric   // it also makes sense to rewrite terminating conditions to use post-inc
2338d88c1a5aSDimitry Andric   // expressions.
2339d88c1a5aSDimitry Andric   //
2340d88c1a5aSDimitry Andric   // If the loop is not rotated then the latch is not a backedge; the latch
2341d88c1a5aSDimitry Andric   // check is done in the loop head. Adding post-inc expressions before the
2342d88c1a5aSDimitry Andric   // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2343d88c1a5aSDimitry Andric   // in the loop body. In this case we do *not* want to use post-inc expressions
2344d88c1a5aSDimitry Andric   // in the latch check, and we want to insert post-inc expressions before
2345d88c1a5aSDimitry Andric   // the backedge.
2346f22ef01cSRoman Divacky   BasicBlock *LatchBlock = L->getLoopLatch();
2347f22ef01cSRoman Divacky   SmallVector<BasicBlock*, 8> ExitingBlocks;
2348f22ef01cSRoman Divacky   L->getExitingBlocks(ExitingBlocks);
2349d88c1a5aSDimitry Andric   if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {
2350d88c1a5aSDimitry Andric         return LatchBlock != BB;
2351d88c1a5aSDimitry Andric       })) {
2352d88c1a5aSDimitry Andric     // The backedge doesn't exit the loop; treat this as a head-tested loop.
2353d88c1a5aSDimitry Andric     IVIncInsertPos = LatchBlock->getTerminator();
2354d88c1a5aSDimitry Andric     return;
2355d88c1a5aSDimitry Andric   }
2356f22ef01cSRoman Divacky 
2357d88c1a5aSDimitry Andric   // Otherwise treat this as a rotated loop.
2358ff0cc061SDimitry Andric   for (BasicBlock *ExitingBlock : ExitingBlocks) {
2359f22ef01cSRoman Divacky     // Get the terminating condition for the loop if possible.  If we
2360f22ef01cSRoman Divacky     // can, we want to change it to use a post-incremented version of its
2361f22ef01cSRoman Divacky     // induction variable, to allow coalescing the live ranges for the IV into
2362f22ef01cSRoman Divacky     // one register value.
2363f22ef01cSRoman Divacky 
2364f22ef01cSRoman Divacky     BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2365f22ef01cSRoman Divacky     if (!TermBr)
2366f22ef01cSRoman Divacky       continue;
2367f22ef01cSRoman Divacky     // FIXME: Overly conservative, termination condition could be an 'or' etc..
2368f22ef01cSRoman Divacky     if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2369f22ef01cSRoman Divacky       continue;
2370f22ef01cSRoman Divacky 
2371f22ef01cSRoman Divacky     // Search IVUsesByStride to find Cond's IVUse if there is one.
237291bc56edSDimitry Andric     IVStrideUse *CondUse = nullptr;
2373f22ef01cSRoman Divacky     ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2374f22ef01cSRoman Divacky     if (!FindIVUserForCond(Cond, CondUse))
2375f22ef01cSRoman Divacky       continue;
2376f22ef01cSRoman Divacky 
2377f22ef01cSRoman Divacky     // If the trip count is computed in terms of a max (due to ScalarEvolution
2378f22ef01cSRoman Divacky     // being unable to find a sufficient guard, for example), change the loop
2379f22ef01cSRoman Divacky     // comparison to use SLT or ULT instead of NE.
2380f22ef01cSRoman Divacky     // One consequence of doing this now is that it disrupts the count-down
2381f22ef01cSRoman Divacky     // optimization. That's not always a bad thing though, because in such
2382f22ef01cSRoman Divacky     // cases it may still be worthwhile to avoid a max.
2383f22ef01cSRoman Divacky     Cond = OptimizeMax(Cond, CondUse);
2384f22ef01cSRoman Divacky 
2385f22ef01cSRoman Divacky     // If this exiting block dominates the latch block, it may also use
2386f22ef01cSRoman Divacky     // the post-inc value if it won't be shared with other uses.
2387f22ef01cSRoman Divacky     // Check for dominance.
2388f22ef01cSRoman Divacky     if (!DT.dominates(ExitingBlock, LatchBlock))
2389f22ef01cSRoman Divacky       continue;
2390f22ef01cSRoman Divacky 
2391f22ef01cSRoman Divacky     // Conservatively avoid trying to use the post-inc value in non-latch
2392f22ef01cSRoman Divacky     // exits if there may be pre-inc users in intervening blocks.
2393f22ef01cSRoman Divacky     if (LatchBlock != ExitingBlock)
2394f22ef01cSRoman Divacky       for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2395f22ef01cSRoman Divacky         // Test if the use is reachable from the exiting block. This dominator
2396f22ef01cSRoman Divacky         // query is a conservative approximation of reachability.
2397f22ef01cSRoman Divacky         if (&*UI != CondUse &&
2398f22ef01cSRoman Divacky             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2399f22ef01cSRoman Divacky           // Conservatively assume there may be reuse if the quotient of their
2400f22ef01cSRoman Divacky           // strides could be a legal scale.
2401f22ef01cSRoman Divacky           const SCEV *A = IU.getStride(*CondUse, L);
2402f22ef01cSRoman Divacky           const SCEV *B = IU.getStride(*UI, L);
2403f22ef01cSRoman Divacky           if (!A || !B) continue;
2404f22ef01cSRoman Divacky           if (SE.getTypeSizeInBits(A->getType()) !=
2405f22ef01cSRoman Divacky               SE.getTypeSizeInBits(B->getType())) {
2406f22ef01cSRoman Divacky             if (SE.getTypeSizeInBits(A->getType()) >
2407f22ef01cSRoman Divacky                 SE.getTypeSizeInBits(B->getType()))
2408f22ef01cSRoman Divacky               B = SE.getSignExtendExpr(B, A->getType());
2409f22ef01cSRoman Divacky             else
2410f22ef01cSRoman Divacky               A = SE.getSignExtendExpr(A, B->getType());
2411f22ef01cSRoman Divacky           }
2412f22ef01cSRoman Divacky           if (const SCEVConstant *D =
2413f22ef01cSRoman Divacky                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2414f22ef01cSRoman Divacky             const ConstantInt *C = D->getValue();
2415f22ef01cSRoman Divacky             // Stride of one or negative one can have reuse with non-addresses.
2416c4394386SDimitry Andric             if (C->isOne() || C->isMinusOne())
2417f22ef01cSRoman Divacky               goto decline_post_inc;
2418f22ef01cSRoman Divacky             // Avoid weird situations.
2419f22ef01cSRoman Divacky             if (C->getValue().getMinSignedBits() >= 64 ||
2420f22ef01cSRoman Divacky                 C->getValue().isMinSignedValue())
2421f22ef01cSRoman Divacky               goto decline_post_inc;
2422f22ef01cSRoman Divacky             // Check for possible scaled-address reuse.
24234ba319b5SDimitry Andric             if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
24244ba319b5SDimitry Andric               MemAccessTy AccessTy = getAccessType(
24254ba319b5SDimitry Andric                   TTI, UI->getUser(), UI->getOperandValToReplace());
2426139f7f9bSDimitry Andric               int64_t Scale = C->getSExtValue();
24277d523365SDimitry Andric               if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2428139f7f9bSDimitry Andric                                             /*BaseOffset=*/0,
24297d523365SDimitry Andric                                             /*HasBaseReg=*/false, Scale,
24307d523365SDimitry Andric                                             AccessTy.AddrSpace))
2431f22ef01cSRoman Divacky                 goto decline_post_inc;
2432139f7f9bSDimitry Andric               Scale = -Scale;
24337d523365SDimitry Andric               if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2434139f7f9bSDimitry Andric                                             /*BaseOffset=*/0,
24357d523365SDimitry Andric                                             /*HasBaseReg=*/false, Scale,
24367d523365SDimitry Andric                                             AccessTy.AddrSpace))
2437f22ef01cSRoman Divacky                 goto decline_post_inc;
2438f22ef01cSRoman Divacky             }
2439f22ef01cSRoman Divacky           }
24404ba319b5SDimitry Andric         }
2441f22ef01cSRoman Divacky 
24424ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
2443f22ef01cSRoman Divacky                       << *Cond << '\n');
2444f22ef01cSRoman Divacky 
2445f22ef01cSRoman Divacky     // It's possible for the setcc instruction to be anywhere in the loop, and
2446f22ef01cSRoman Divacky     // possible for it to have multiple users.  If it is not immediately before
2447f22ef01cSRoman Divacky     // the exiting block branch, move it.
2448f22ef01cSRoman Divacky     if (&*++BasicBlock::iterator(Cond) != TermBr) {
2449f22ef01cSRoman Divacky       if (Cond->hasOneUse()) {
2450f22ef01cSRoman Divacky         Cond->moveBefore(TermBr);
2451f22ef01cSRoman Divacky       } else {
2452f22ef01cSRoman Divacky         // Clone the terminating condition and insert into the loopend.
2453f22ef01cSRoman Divacky         ICmpInst *OldCond = Cond;
2454f22ef01cSRoman Divacky         Cond = cast<ICmpInst>(Cond->clone());
2455f22ef01cSRoman Divacky         Cond->setName(L->getHeader()->getName() + ".termcond");
24567d523365SDimitry Andric         ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
2457f22ef01cSRoman Divacky 
2458f22ef01cSRoman Divacky         // Clone the IVUse, as the old use still exists!
245917a519f9SDimitry Andric         CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2460f22ef01cSRoman Divacky         TermBr->replaceUsesOfWith(OldCond, Cond);
2461f22ef01cSRoman Divacky       }
2462f22ef01cSRoman Divacky     }
2463f22ef01cSRoman Divacky 
2464f22ef01cSRoman Divacky     // If we get to here, we know that we can transform the setcc instruction to
2465f22ef01cSRoman Divacky     // use the post-incremented version of the IV, allowing us to coalesce the
2466f22ef01cSRoman Divacky     // live ranges for the IV correctly.
2467f22ef01cSRoman Divacky     CondUse->transformToPostInc(L);
2468f22ef01cSRoman Divacky     Changed = true;
2469f22ef01cSRoman Divacky 
2470f22ef01cSRoman Divacky     PostIncs.insert(Cond);
2471f22ef01cSRoman Divacky   decline_post_inc:;
2472f22ef01cSRoman Divacky   }
2473f22ef01cSRoman Divacky 
2474f22ef01cSRoman Divacky   // Determine an insertion point for the loop induction variable increment. It
2475f22ef01cSRoman Divacky   // must dominate all the post-inc comparisons we just set up, and it must
2476f22ef01cSRoman Divacky   // dominate the loop latch edge.
2477f22ef01cSRoman Divacky   IVIncInsertPos = L->getLoopLatch()->getTerminator();
247839d628a0SDimitry Andric   for (Instruction *Inst : PostIncs) {
2479f22ef01cSRoman Divacky     BasicBlock *BB =
2480f22ef01cSRoman Divacky       DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
248139d628a0SDimitry Andric                                     Inst->getParent());
248239d628a0SDimitry Andric     if (BB == Inst->getParent())
248339d628a0SDimitry Andric       IVIncInsertPos = Inst;
2484f22ef01cSRoman Divacky     else if (BB != IVIncInsertPos->getParent())
2485f22ef01cSRoman Divacky       IVIncInsertPos = BB->getTerminator();
2486f22ef01cSRoman Divacky   }
2487f22ef01cSRoman Divacky }
2488f22ef01cSRoman Divacky 
24897d523365SDimitry Andric /// Determine if the given use can accommodate a fixup at the given offset and
24907d523365SDimitry Andric /// other details. If so, update the use and return true.
reconcileNewOffset(LSRUse & LU,int64_t NewOffset,bool HasBaseReg,LSRUse::KindType Kind,MemAccessTy AccessTy)24917d523365SDimitry Andric bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
24927d523365SDimitry Andric                                      bool HasBaseReg, LSRUse::KindType Kind,
24937d523365SDimitry Andric                                      MemAccessTy AccessTy) {
2494f22ef01cSRoman Divacky   int64_t NewMinOffset = LU.MinOffset;
2495f22ef01cSRoman Divacky   int64_t NewMaxOffset = LU.MaxOffset;
24967d523365SDimitry Andric   MemAccessTy NewAccessTy = AccessTy;
2497f22ef01cSRoman Divacky 
2498f22ef01cSRoman Divacky   // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2499f22ef01cSRoman Divacky   // something conservative, however this can pessimize in the case that one of
2500f22ef01cSRoman Divacky   // the uses will have all its uses outside the loop, for example.
2501f22ef01cSRoman Divacky   if (LU.Kind != Kind)
2502f22ef01cSRoman Divacky     return false;
250391bc56edSDimitry Andric 
2504f22ef01cSRoman Divacky   // Check for a mismatched access type, and fall back conservatively as needed.
2505ffd1746dSEd Schouten   // TODO: Be less conservative when the type is similar and can use the same
2506ffd1746dSEd Schouten   // addressing modes.
25077d523365SDimitry Andric   if (Kind == LSRUse::Address) {
2508899ca3d6SDimitry Andric     if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2509899ca3d6SDimitry Andric       NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2510899ca3d6SDimitry Andric                                             AccessTy.AddrSpace);
2511899ca3d6SDimitry Andric     }
25127d523365SDimitry Andric   }
2513f22ef01cSRoman Divacky 
251491bc56edSDimitry Andric   // Conservatively assume HasBaseReg is true for now.
251591bc56edSDimitry Andric   if (NewOffset < LU.MinOffset) {
251691bc56edSDimitry Andric     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
251791bc56edSDimitry Andric                           LU.MaxOffset - NewOffset, HasBaseReg))
251891bc56edSDimitry Andric       return false;
251991bc56edSDimitry Andric     NewMinOffset = NewOffset;
252091bc56edSDimitry Andric   } else if (NewOffset > LU.MaxOffset) {
252191bc56edSDimitry Andric     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
252291bc56edSDimitry Andric                           NewOffset - LU.MinOffset, HasBaseReg))
252391bc56edSDimitry Andric       return false;
252491bc56edSDimitry Andric     NewMaxOffset = NewOffset;
252591bc56edSDimitry Andric   }
252691bc56edSDimitry Andric 
2527f22ef01cSRoman Divacky   // Update the use.
2528f22ef01cSRoman Divacky   LU.MinOffset = NewMinOffset;
2529f22ef01cSRoman Divacky   LU.MaxOffset = NewMaxOffset;
2530f22ef01cSRoman Divacky   LU.AccessTy = NewAccessTy;
2531f22ef01cSRoman Divacky   return true;
2532f22ef01cSRoman Divacky }
2533f22ef01cSRoman Divacky 
25347d523365SDimitry Andric /// Return an LSRUse index and an offset value for a fixup which needs the given
25357d523365SDimitry Andric /// expression, with the given kind and optional access type.  Either reuse an
25367d523365SDimitry Andric /// existing use or create a new one, as needed.
getUse(const SCEV * & Expr,LSRUse::KindType Kind,MemAccessTy AccessTy)25377d523365SDimitry Andric std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
25387d523365SDimitry Andric                                                LSRUse::KindType Kind,
25397d523365SDimitry Andric                                                MemAccessTy AccessTy) {
2540f22ef01cSRoman Divacky   const SCEV *Copy = Expr;
2541f22ef01cSRoman Divacky   int64_t Offset = ExtractImmediate(Expr, SE);
2542f22ef01cSRoman Divacky 
2543f22ef01cSRoman Divacky   // Basic uses can't accept any offset, for example.
254491bc56edSDimitry Andric   if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2545139f7f9bSDimitry Andric                         Offset, /*HasBaseReg=*/ true)) {
2546f22ef01cSRoman Divacky     Expr = Copy;
2547f22ef01cSRoman Divacky     Offset = 0;
2548f22ef01cSRoman Divacky   }
2549f22ef01cSRoman Divacky 
2550f22ef01cSRoman Divacky   std::pair<UseMapTy::iterator, bool> P =
255191bc56edSDimitry Andric     UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2552f22ef01cSRoman Divacky   if (!P.second) {
2553f22ef01cSRoman Divacky     // A use already existed with this base.
2554f22ef01cSRoman Divacky     size_t LUIdx = P.first->second;
2555f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
2556f22ef01cSRoman Divacky     if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2557f22ef01cSRoman Divacky       // Reuse this use.
2558f22ef01cSRoman Divacky       return std::make_pair(LUIdx, Offset);
2559f22ef01cSRoman Divacky   }
2560f22ef01cSRoman Divacky 
2561f22ef01cSRoman Divacky   // Create a new use.
2562f22ef01cSRoman Divacky   size_t LUIdx = Uses.size();
2563f22ef01cSRoman Divacky   P.first->second = LUIdx;
2564f22ef01cSRoman Divacky   Uses.push_back(LSRUse(Kind, AccessTy));
2565f22ef01cSRoman Divacky   LSRUse &LU = Uses[LUIdx];
2566f22ef01cSRoman Divacky 
2567f22ef01cSRoman Divacky   LU.MinOffset = Offset;
2568f22ef01cSRoman Divacky   LU.MaxOffset = Offset;
2569f22ef01cSRoman Divacky   return std::make_pair(LUIdx, Offset);
2570f22ef01cSRoman Divacky }
2571f22ef01cSRoman Divacky 
25727d523365SDimitry Andric /// Delete the given use from the Uses list.
DeleteUse(LSRUse & LU,size_t LUIdx)25732754fe60SDimitry Andric void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2574f22ef01cSRoman Divacky   if (&LU != &Uses.back())
2575f22ef01cSRoman Divacky     std::swap(LU, Uses.back());
2576f22ef01cSRoman Divacky   Uses.pop_back();
25772754fe60SDimitry Andric 
25782754fe60SDimitry Andric   // Update RegUses.
25797d523365SDimitry Andric   RegUses.swapAndDropUse(LUIdx, Uses.size());
2580f22ef01cSRoman Divacky }
2581f22ef01cSRoman Divacky 
25827d523365SDimitry Andric /// Look for a use distinct from OrigLU which is has a formula that has the same
25837d523365SDimitry Andric /// registers as the given formula.
2584f22ef01cSRoman Divacky LSRUse *
FindUseWithSimilarFormula(const Formula & OrigF,const LSRUse & OrigLU)2585f22ef01cSRoman Divacky LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2586f22ef01cSRoman Divacky                                        const LSRUse &OrigLU) {
2587e580952dSDimitry Andric   // Search all uses for the formula. This could be more clever.
2588f22ef01cSRoman Divacky   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2589f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
2590e580952dSDimitry Andric     // Check whether this use is close enough to OrigLU, to see whether it's
2591e580952dSDimitry Andric     // worthwhile looking through its formulae.
2592e580952dSDimitry Andric     // Ignore ICmpZero uses because they may contain formulae generated by
2593e580952dSDimitry Andric     // GenerateICmpZeroScales, in which case adding fixup offsets may
2594e580952dSDimitry Andric     // be invalid.
2595f22ef01cSRoman Divacky     if (&LU != &OrigLU &&
2596f22ef01cSRoman Divacky         LU.Kind != LSRUse::ICmpZero &&
2597f22ef01cSRoman Divacky         LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2598e580952dSDimitry Andric         LU.WidestFixupType == OrigLU.WidestFixupType &&
2599f22ef01cSRoman Divacky         LU.HasFormulaWithSameRegs(OrigF)) {
2600e580952dSDimitry Andric       // Scan through this use's formulae.
2601ff0cc061SDimitry Andric       for (const Formula &F : LU.Formulae) {
2602e580952dSDimitry Andric         // Check to see if this formula has the same registers and symbols
2603e580952dSDimitry Andric         // as OrigF.
2604f22ef01cSRoman Divacky         if (F.BaseRegs == OrigF.BaseRegs &&
2605f22ef01cSRoman Divacky             F.ScaledReg == OrigF.ScaledReg &&
2606139f7f9bSDimitry Andric             F.BaseGV == OrigF.BaseGV &&
2607139f7f9bSDimitry Andric             F.Scale == OrigF.Scale &&
2608bd5abe19SDimitry Andric             F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2609139f7f9bSDimitry Andric           if (F.BaseOffset == 0)
2610f22ef01cSRoman Divacky             return &LU;
2611e580952dSDimitry Andric           // This is the formula where all the registers and symbols matched;
2612e580952dSDimitry Andric           // there aren't going to be any others. Since we declined it, we
26137ae0e2c9SDimitry Andric           // can skip the rest of the formulae and proceed to the next LSRUse.
2614f22ef01cSRoman Divacky           break;
2615f22ef01cSRoman Divacky         }
2616f22ef01cSRoman Divacky       }
2617f22ef01cSRoman Divacky     }
2618f22ef01cSRoman Divacky   }
2619f22ef01cSRoman Divacky 
2620e580952dSDimitry Andric   // Nothing looked good.
262191bc56edSDimitry Andric   return nullptr;
2622f22ef01cSRoman Divacky }
2623f22ef01cSRoman Divacky 
CollectInterestingTypesAndFactors()2624f22ef01cSRoman Divacky void LSRInstance::CollectInterestingTypesAndFactors() {
2625f22ef01cSRoman Divacky   SmallSetVector<const SCEV *, 4> Strides;
2626f22ef01cSRoman Divacky 
2627f22ef01cSRoman Divacky   // Collect interesting types and strides.
2628f22ef01cSRoman Divacky   SmallVector<const SCEV *, 4> Worklist;
2629ff0cc061SDimitry Andric   for (const IVStrideUse &U : IU) {
2630ff0cc061SDimitry Andric     const SCEV *Expr = IU.getExpr(U);
2631f22ef01cSRoman Divacky 
2632f22ef01cSRoman Divacky     // Collect interesting types.
2633f22ef01cSRoman Divacky     Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2634f22ef01cSRoman Divacky 
2635f22ef01cSRoman Divacky     // Add strides for mentioned loops.
2636f22ef01cSRoman Divacky     Worklist.push_back(Expr);
2637f22ef01cSRoman Divacky     do {
2638f22ef01cSRoman Divacky       const SCEV *S = Worklist.pop_back_val();
2639f22ef01cSRoman Divacky       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2640dff0c46cSDimitry Andric         if (AR->getLoop() == L)
2641f22ef01cSRoman Divacky           Strides.insert(AR->getStepRecurrence(SE));
2642f22ef01cSRoman Divacky         Worklist.push_back(AR->getStart());
2643f22ef01cSRoman Divacky       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2644ffd1746dSEd Schouten         Worklist.append(Add->op_begin(), Add->op_end());
2645f22ef01cSRoman Divacky       }
2646f22ef01cSRoman Divacky     } while (!Worklist.empty());
2647f22ef01cSRoman Divacky   }
2648f22ef01cSRoman Divacky 
2649f22ef01cSRoman Divacky   // Compute interesting factors from the set of interesting strides.
2650f22ef01cSRoman Divacky   for (SmallSetVector<const SCEV *, 4>::const_iterator
2651f22ef01cSRoman Divacky        I = Strides.begin(), E = Strides.end(); I != E; ++I)
2652f22ef01cSRoman Divacky     for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
265391bc56edSDimitry Andric          std::next(I); NewStrideIter != E; ++NewStrideIter) {
2654f22ef01cSRoman Divacky       const SCEV *OldStride = *I;
2655f22ef01cSRoman Divacky       const SCEV *NewStride = *NewStrideIter;
2656f22ef01cSRoman Divacky 
2657f22ef01cSRoman Divacky       if (SE.getTypeSizeInBits(OldStride->getType()) !=
2658f22ef01cSRoman Divacky           SE.getTypeSizeInBits(NewStride->getType())) {
2659f22ef01cSRoman Divacky         if (SE.getTypeSizeInBits(OldStride->getType()) >
2660f22ef01cSRoman Divacky             SE.getTypeSizeInBits(NewStride->getType()))
2661f22ef01cSRoman Divacky           NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2662f22ef01cSRoman Divacky         else
2663f22ef01cSRoman Divacky           OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2664f22ef01cSRoman Divacky       }
2665f22ef01cSRoman Divacky       if (const SCEVConstant *Factor =
2666f22ef01cSRoman Divacky             dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2667f22ef01cSRoman Divacky                                                         SE, true))) {
26687d523365SDimitry Andric         if (Factor->getAPInt().getMinSignedBits() <= 64)
26697d523365SDimitry Andric           Factors.insert(Factor->getAPInt().getSExtValue());
2670f22ef01cSRoman Divacky       } else if (const SCEVConstant *Factor =
2671f22ef01cSRoman Divacky                    dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2672f22ef01cSRoman Divacky                                                                NewStride,
2673f22ef01cSRoman Divacky                                                                SE, true))) {
26747d523365SDimitry Andric         if (Factor->getAPInt().getMinSignedBits() <= 64)
26757d523365SDimitry Andric           Factors.insert(Factor->getAPInt().getSExtValue());
2676f22ef01cSRoman Divacky       }
2677f22ef01cSRoman Divacky     }
2678f22ef01cSRoman Divacky 
2679f22ef01cSRoman Divacky   // If all uses use the same type, don't bother looking for truncation-based
2680f22ef01cSRoman Divacky   // reuse.
2681f22ef01cSRoman Divacky   if (Types.size() == 1)
2682f22ef01cSRoman Divacky     Types.clear();
2683f22ef01cSRoman Divacky 
26844ba319b5SDimitry Andric   LLVM_DEBUG(print_factors_and_types(dbgs()));
2685f22ef01cSRoman Divacky }
2686f22ef01cSRoman Divacky 
26877d523365SDimitry Andric /// Helper for CollectChains that finds an IV operand (computed by an AddRec in
26887d523365SDimitry Andric /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
26897d523365SDimitry Andric /// IVStrideUses, we could partially skip this.
2690dff0c46cSDimitry Andric static User::op_iterator
findIVOperand(User::op_iterator OI,User::op_iterator OE,Loop * L,ScalarEvolution & SE)2691dff0c46cSDimitry Andric findIVOperand(User::op_iterator OI, User::op_iterator OE,
2692dff0c46cSDimitry Andric               Loop *L, ScalarEvolution &SE) {
2693dff0c46cSDimitry Andric   for(; OI != OE; ++OI) {
2694dff0c46cSDimitry Andric     if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2695dff0c46cSDimitry Andric       if (!SE.isSCEVable(Oper->getType()))
2696dff0c46cSDimitry Andric         continue;
2697dff0c46cSDimitry Andric 
2698dff0c46cSDimitry Andric       if (const SCEVAddRecExpr *AR =
2699dff0c46cSDimitry Andric           dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2700dff0c46cSDimitry Andric         if (AR->getLoop() == L)
2701dff0c46cSDimitry Andric           break;
2702dff0c46cSDimitry Andric       }
2703dff0c46cSDimitry Andric     }
2704dff0c46cSDimitry Andric   }
2705dff0c46cSDimitry Andric   return OI;
2706dff0c46cSDimitry Andric }
2707dff0c46cSDimitry Andric 
27084ba319b5SDimitry Andric /// IVChain logic must consistently peek base TruncInst operands, so wrap it in
27097d523365SDimitry Andric /// a convenient helper.
getWideOperand(Value * Oper)2710dff0c46cSDimitry Andric static Value *getWideOperand(Value *Oper) {
2711dff0c46cSDimitry Andric   if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2712dff0c46cSDimitry Andric     return Trunc->getOperand(0);
2713dff0c46cSDimitry Andric   return Oper;
2714dff0c46cSDimitry Andric }
2715dff0c46cSDimitry Andric 
27167d523365SDimitry Andric /// Return true if we allow an IV chain to include both types.
isCompatibleIVType(Value * LVal,Value * RVal)2717dff0c46cSDimitry Andric static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2718dff0c46cSDimitry Andric   Type *LType = LVal->getType();
2719dff0c46cSDimitry Andric   Type *RType = RVal->getType();
27207a7e6055SDimitry Andric   return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
27217a7e6055SDimitry Andric                               // Different address spaces means (possibly)
27227a7e6055SDimitry Andric                               // different types of the pointer implementation,
27237a7e6055SDimitry Andric                               // e.g. i16 vs i32 so disallow that.
27247a7e6055SDimitry Andric                               (LType->getPointerAddressSpace() ==
27257a7e6055SDimitry Andric                                RType->getPointerAddressSpace()));
2726dff0c46cSDimitry Andric }
2727dff0c46cSDimitry Andric 
27287d523365SDimitry Andric /// Return an approximation of this SCEV expression's "base", or NULL for any
27297d523365SDimitry Andric /// constant. Returning the expression itself is conservative. Returning a
27307d523365SDimitry Andric /// deeper subexpression is more precise and valid as long as it isn't less
27317d523365SDimitry Andric /// complex than another subexpression. For expressions involving multiple
27327d523365SDimitry Andric /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
27337d523365SDimitry Andric /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
27347d523365SDimitry Andric /// IVInc==b-a.
2735dff0c46cSDimitry Andric ///
2736dff0c46cSDimitry Andric /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2737dff0c46cSDimitry Andric /// SCEVUnknown, we simply return the rightmost SCEV operand.
getExprBase(const SCEV * S)2738dff0c46cSDimitry Andric static const SCEV *getExprBase(const SCEV *S) {
2739dff0c46cSDimitry Andric   switch (S->getSCEVType()) {
2740dff0c46cSDimitry Andric   default: // uncluding scUnknown.
2741dff0c46cSDimitry Andric     return S;
2742dff0c46cSDimitry Andric   case scConstant:
274391bc56edSDimitry Andric     return nullptr;
2744dff0c46cSDimitry Andric   case scTruncate:
2745dff0c46cSDimitry Andric     return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2746dff0c46cSDimitry Andric   case scZeroExtend:
2747dff0c46cSDimitry Andric     return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2748dff0c46cSDimitry Andric   case scSignExtend:
2749dff0c46cSDimitry Andric     return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2750dff0c46cSDimitry Andric   case scAddExpr: {
2751dff0c46cSDimitry Andric     // Skip over scaled operands (scMulExpr) to follow add operands as long as
2752dff0c46cSDimitry Andric     // there's nothing more complex.
2753dff0c46cSDimitry Andric     // FIXME: not sure if we want to recognize negation.
2754dff0c46cSDimitry Andric     const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2755dff0c46cSDimitry Andric     for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2756dff0c46cSDimitry Andric            E(Add->op_begin()); I != E; ++I) {
2757dff0c46cSDimitry Andric       const SCEV *SubExpr = *I;
2758dff0c46cSDimitry Andric       if (SubExpr->getSCEVType() == scAddExpr)
2759dff0c46cSDimitry Andric         return getExprBase(SubExpr);
2760dff0c46cSDimitry Andric 
2761dff0c46cSDimitry Andric       if (SubExpr->getSCEVType() != scMulExpr)
2762dff0c46cSDimitry Andric         return SubExpr;
2763dff0c46cSDimitry Andric     }
2764dff0c46cSDimitry Andric     return S; // all operands are scaled, be conservative.
2765dff0c46cSDimitry Andric   }
2766dff0c46cSDimitry Andric   case scAddRecExpr:
2767dff0c46cSDimitry Andric     return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2768dff0c46cSDimitry Andric   }
2769dff0c46cSDimitry Andric }
2770dff0c46cSDimitry Andric 
2771dff0c46cSDimitry Andric /// Return true if the chain increment is profitable to expand into a loop
2772dff0c46cSDimitry Andric /// invariant value, which may require its own register. A profitable chain
2773dff0c46cSDimitry Andric /// increment will be an offset relative to the same base. We allow such offsets
2774dff0c46cSDimitry Andric /// to potentially be used as chain increment as long as it's not obviously
2775dff0c46cSDimitry Andric /// expensive to expand using real instructions.
isProfitableIncrement(const SCEV * OperExpr,const SCEV * IncExpr,ScalarEvolution & SE)27767ae0e2c9SDimitry Andric bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
27777ae0e2c9SDimitry Andric                                     const SCEV *IncExpr,
27787ae0e2c9SDimitry Andric                                     ScalarEvolution &SE) {
27797ae0e2c9SDimitry Andric   // Aggressively form chains when -stress-ivchain.
2780dff0c46cSDimitry Andric   if (StressIVChain)
27817ae0e2c9SDimitry Andric     return true;
2782dff0c46cSDimitry Andric 
2783dff0c46cSDimitry Andric   // Do not replace a constant offset from IV head with a nonconstant IV
2784dff0c46cSDimitry Andric   // increment.
2785dff0c46cSDimitry Andric   if (!isa<SCEVConstant>(IncExpr)) {
27867ae0e2c9SDimitry Andric     const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2787dff0c46cSDimitry Andric     if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2788d88c1a5aSDimitry Andric       return false;
2789dff0c46cSDimitry Andric   }
2790dff0c46cSDimitry Andric 
2791dff0c46cSDimitry Andric   SmallPtrSet<const SCEV*, 8> Processed;
27927ae0e2c9SDimitry Andric   return !isHighCostExpansion(IncExpr, Processed, SE);
2793dff0c46cSDimitry Andric }
2794dff0c46cSDimitry Andric 
2795dff0c46cSDimitry Andric /// Return true if the number of registers needed for the chain is estimated to
2796dff0c46cSDimitry Andric /// be less than the number required for the individual IV users. First prohibit
2797dff0c46cSDimitry Andric /// any IV users that keep the IV live across increments (the Users set should
2798dff0c46cSDimitry Andric /// be empty). Next count the number and type of increments in the chain.
2799dff0c46cSDimitry Andric ///
2800dff0c46cSDimitry Andric /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2801dff0c46cSDimitry Andric /// effectively use postinc addressing modes. Only consider it profitable it the
2802dff0c46cSDimitry Andric /// increments can be computed in fewer registers when chained.
2803dff0c46cSDimitry Andric ///
2804dff0c46cSDimitry Andric /// TODO: Consider IVInc free if it's already used in another chains.
2805dff0c46cSDimitry Andric static bool
isProfitableChain(IVChain & Chain,SmallPtrSetImpl<Instruction * > & Users,ScalarEvolution & SE,const TargetTransformInfo & TTI)280639d628a0SDimitry Andric isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
2807139f7f9bSDimitry Andric                   ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2808dff0c46cSDimitry Andric   if (StressIVChain)
2809dff0c46cSDimitry Andric     return true;
2810dff0c46cSDimitry Andric 
28117ae0e2c9SDimitry Andric   if (!Chain.hasIncs())
2812dff0c46cSDimitry Andric     return false;
2813dff0c46cSDimitry Andric 
2814dff0c46cSDimitry Andric   if (!Users.empty()) {
28154ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
28164ba319b5SDimitry Andric                for (Instruction *Inst
28174ba319b5SDimitry Andric                     : Users) { dbgs() << "  " << *Inst << "\n"; });
2818dff0c46cSDimitry Andric     return false;
2819dff0c46cSDimitry Andric   }
28207ae0e2c9SDimitry Andric   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2821dff0c46cSDimitry Andric 
2822dff0c46cSDimitry Andric   // The chain itself may require a register, so intialize cost to 1.
2823dff0c46cSDimitry Andric   int cost = 1;
2824dff0c46cSDimitry Andric 
2825dff0c46cSDimitry Andric   // A complete chain likely eliminates the need for keeping the original IV in
2826dff0c46cSDimitry Andric   // a register. LSR does not currently know how to form a complete chain unless
2827dff0c46cSDimitry Andric   // the header phi already exists.
28287ae0e2c9SDimitry Andric   if (isa<PHINode>(Chain.tailUserInst())
28297ae0e2c9SDimitry Andric       && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2830dff0c46cSDimitry Andric     --cost;
2831dff0c46cSDimitry Andric   }
283291bc56edSDimitry Andric   const SCEV *LastIncExpr = nullptr;
2833dff0c46cSDimitry Andric   unsigned NumConstIncrements = 0;
2834dff0c46cSDimitry Andric   unsigned NumVarIncrements = 0;
2835dff0c46cSDimitry Andric   unsigned NumReusedIncrements = 0;
2836ff0cc061SDimitry Andric   for (const IVInc &Inc : Chain) {
2837ff0cc061SDimitry Andric     if (Inc.IncExpr->isZero())
2838dff0c46cSDimitry Andric       continue;
2839dff0c46cSDimitry Andric 
2840dff0c46cSDimitry Andric     // Incrementing by zero or some constant is neutral. We assume constants can
2841dff0c46cSDimitry Andric     // be folded into an addressing mode or an add's immediate operand.
2842ff0cc061SDimitry Andric     if (isa<SCEVConstant>(Inc.IncExpr)) {
2843dff0c46cSDimitry Andric       ++NumConstIncrements;
2844dff0c46cSDimitry Andric       continue;
2845dff0c46cSDimitry Andric     }
2846dff0c46cSDimitry Andric 
2847ff0cc061SDimitry Andric     if (Inc.IncExpr == LastIncExpr)
2848dff0c46cSDimitry Andric       ++NumReusedIncrements;
2849dff0c46cSDimitry Andric     else
2850dff0c46cSDimitry Andric       ++NumVarIncrements;
2851dff0c46cSDimitry Andric 
2852ff0cc061SDimitry Andric     LastIncExpr = Inc.IncExpr;
2853dff0c46cSDimitry Andric   }
2854dff0c46cSDimitry Andric   // An IV chain with a single increment is handled by LSR's postinc
2855dff0c46cSDimitry Andric   // uses. However, a chain with multiple increments requires keeping the IV's
2856dff0c46cSDimitry Andric   // value live longer than it needs to be if chained.
2857dff0c46cSDimitry Andric   if (NumConstIncrements > 1)
2858dff0c46cSDimitry Andric     --cost;
2859dff0c46cSDimitry Andric 
2860dff0c46cSDimitry Andric   // Materializing increment expressions in the preheader that didn't exist in
2861dff0c46cSDimitry Andric   // the original code may cost a register. For example, sign-extended array
2862dff0c46cSDimitry Andric   // indices can produce ridiculous increments like this:
2863dff0c46cSDimitry Andric   // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2864dff0c46cSDimitry Andric   cost += NumVarIncrements;
2865dff0c46cSDimitry Andric 
2866dff0c46cSDimitry Andric   // Reusing variable increments likely saves a register to hold the multiple of
2867dff0c46cSDimitry Andric   // the stride.
2868dff0c46cSDimitry Andric   cost -= NumReusedIncrements;
2869dff0c46cSDimitry Andric 
28704ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
28717ae0e2c9SDimitry Andric                     << "\n");
2872dff0c46cSDimitry Andric 
2873dff0c46cSDimitry Andric   return cost < 0;
2874dff0c46cSDimitry Andric }
2875dff0c46cSDimitry Andric 
28767d523365SDimitry Andric /// Add this IV user to an existing chain or make it the head of a new chain.
ChainInstruction(Instruction * UserInst,Instruction * IVOper,SmallVectorImpl<ChainUsers> & ChainUsersVec)2877dff0c46cSDimitry Andric void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2878dff0c46cSDimitry Andric                                    SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2879dff0c46cSDimitry Andric   // When IVs are used as types of varying widths, they are generally converted
2880dff0c46cSDimitry Andric   // to a wider type with some uses remaining narrow under a (free) trunc.
28817ae0e2c9SDimitry Andric   Value *const NextIV = getWideOperand(IVOper);
28827ae0e2c9SDimitry Andric   const SCEV *const OperExpr = SE.getSCEV(NextIV);
28837ae0e2c9SDimitry Andric   const SCEV *const OperExprBase = getExprBase(OperExpr);
2884dff0c46cSDimitry Andric 
2885dff0c46cSDimitry Andric   // Visit all existing chains. Check if its IVOper can be computed as a
2886dff0c46cSDimitry Andric   // profitable loop invariant increment from the last link in the Chain.
2887dff0c46cSDimitry Andric   unsigned ChainIdx = 0, NChains = IVChainVec.size();
288891bc56edSDimitry Andric   const SCEV *LastIncExpr = nullptr;
2889dff0c46cSDimitry Andric   for (; ChainIdx < NChains; ++ChainIdx) {
28907ae0e2c9SDimitry Andric     IVChain &Chain = IVChainVec[ChainIdx];
28917ae0e2c9SDimitry Andric 
28927ae0e2c9SDimitry Andric     // Prune the solution space aggressively by checking that both IV operands
28937ae0e2c9SDimitry Andric     // are expressions that operate on the same unscaled SCEVUnknown. This
28947ae0e2c9SDimitry Andric     // "base" will be canceled by the subsequent getMinusSCEV call. Checking
28957ae0e2c9SDimitry Andric     // first avoids creating extra SCEV expressions.
28967ae0e2c9SDimitry Andric     if (!StressIVChain && Chain.ExprBase != OperExprBase)
28977ae0e2c9SDimitry Andric       continue;
28987ae0e2c9SDimitry Andric 
28997ae0e2c9SDimitry Andric     Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2900dff0c46cSDimitry Andric     if (!isCompatibleIVType(PrevIV, NextIV))
2901dff0c46cSDimitry Andric       continue;
2902dff0c46cSDimitry Andric 
2903dff0c46cSDimitry Andric     // A phi node terminates a chain.
29047ae0e2c9SDimitry Andric     if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2905dff0c46cSDimitry Andric       continue;
2906dff0c46cSDimitry Andric 
29077ae0e2c9SDimitry Andric     // The increment must be loop-invariant so it can be kept in a register.
29087ae0e2c9SDimitry Andric     const SCEV *PrevExpr = SE.getSCEV(PrevIV);
29097ae0e2c9SDimitry Andric     const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
29107ae0e2c9SDimitry Andric     if (!SE.isLoopInvariant(IncExpr, L))
29117ae0e2c9SDimitry Andric       continue;
29127ae0e2c9SDimitry Andric 
29137ae0e2c9SDimitry Andric     if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2914dff0c46cSDimitry Andric       LastIncExpr = IncExpr;
2915dff0c46cSDimitry Andric       break;
2916dff0c46cSDimitry Andric     }
2917dff0c46cSDimitry Andric   }
2918dff0c46cSDimitry Andric   // If we haven't found a chain, create a new one, unless we hit the max. Don't
2919dff0c46cSDimitry Andric   // bother for phi nodes, because they must be last in the chain.
2920dff0c46cSDimitry Andric   if (ChainIdx == NChains) {
2921dff0c46cSDimitry Andric     if (isa<PHINode>(UserInst))
2922dff0c46cSDimitry Andric       return;
2923dff0c46cSDimitry Andric     if (NChains >= MaxChains && !StressIVChain) {
29244ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
2925dff0c46cSDimitry Andric       return;
2926dff0c46cSDimitry Andric     }
29277ae0e2c9SDimitry Andric     LastIncExpr = OperExpr;
2928dff0c46cSDimitry Andric     // IVUsers may have skipped over sign/zero extensions. We don't currently
2929dff0c46cSDimitry Andric     // attempt to form chains involving extensions unless they can be hoisted
2930dff0c46cSDimitry Andric     // into this loop's AddRec.
2931dff0c46cSDimitry Andric     if (!isa<SCEVAddRecExpr>(LastIncExpr))
2932dff0c46cSDimitry Andric       return;
2933dff0c46cSDimitry Andric     ++NChains;
29347ae0e2c9SDimitry Andric     IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
29357ae0e2c9SDimitry Andric                                  OperExprBase));
2936dff0c46cSDimitry Andric     ChainUsersVec.resize(NChains);
29374ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
29387ae0e2c9SDimitry Andric                       << ") IV=" << *LastIncExpr << "\n");
29397ae0e2c9SDimitry Andric   } else {
29404ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst
29417ae0e2c9SDimitry Andric                       << ") IV+" << *LastIncExpr << "\n");
2942dff0c46cSDimitry Andric     // Add this IV user to the end of the chain.
29437ae0e2c9SDimitry Andric     IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
29447ae0e2c9SDimitry Andric   }
2945139f7f9bSDimitry Andric   IVChain &Chain = IVChainVec[ChainIdx];
2946dff0c46cSDimitry Andric 
2947dff0c46cSDimitry Andric   SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2948dff0c46cSDimitry Andric   // This chain's NearUsers become FarUsers.
2949dff0c46cSDimitry Andric   if (!LastIncExpr->isZero()) {
2950dff0c46cSDimitry Andric     ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2951dff0c46cSDimitry Andric                                             NearUsers.end());
2952dff0c46cSDimitry Andric     NearUsers.clear();
2953dff0c46cSDimitry Andric   }
2954dff0c46cSDimitry Andric 
2955dff0c46cSDimitry Andric   // All other uses of IVOperand become near uses of the chain.
2956dff0c46cSDimitry Andric   // We currently ignore intermediate values within SCEV expressions, assuming
2957dff0c46cSDimitry Andric   // they will eventually be used be the current chain, or can be computed
2958dff0c46cSDimitry Andric   // from one of the chain increments. To be more precise we could
2959dff0c46cSDimitry Andric   // transitively follow its user and only add leaf IV users to the set.
296091bc56edSDimitry Andric   for (User *U : IVOper->users()) {
296191bc56edSDimitry Andric     Instruction *OtherUse = dyn_cast<Instruction>(U);
2962139f7f9bSDimitry Andric     if (!OtherUse)
2963dff0c46cSDimitry Andric       continue;
2964139f7f9bSDimitry Andric     // Uses in the chain will no longer be uses if the chain is formed.
2965139f7f9bSDimitry Andric     // Include the head of the chain in this iteration (not Chain.begin()).
2966139f7f9bSDimitry Andric     IVChain::const_iterator IncIter = Chain.Incs.begin();
2967139f7f9bSDimitry Andric     IVChain::const_iterator IncEnd = Chain.Incs.end();
2968139f7f9bSDimitry Andric     for( ; IncIter != IncEnd; ++IncIter) {
2969139f7f9bSDimitry Andric       if (IncIter->UserInst == OtherUse)
2970139f7f9bSDimitry Andric         break;
2971139f7f9bSDimitry Andric     }
2972139f7f9bSDimitry Andric     if (IncIter != IncEnd)
2973139f7f9bSDimitry Andric       continue;
2974139f7f9bSDimitry Andric 
2975dff0c46cSDimitry Andric     if (SE.isSCEVable(OtherUse->getType())
2976dff0c46cSDimitry Andric         && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2977dff0c46cSDimitry Andric         && IU.isIVUserOrOperand(OtherUse)) {
2978dff0c46cSDimitry Andric       continue;
2979dff0c46cSDimitry Andric     }
2980dff0c46cSDimitry Andric     NearUsers.insert(OtherUse);
2981dff0c46cSDimitry Andric   }
2982dff0c46cSDimitry Andric 
2983dff0c46cSDimitry Andric   // Since this user is part of the chain, it's no longer considered a use
2984dff0c46cSDimitry Andric   // of the chain.
2985dff0c46cSDimitry Andric   ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2986dff0c46cSDimitry Andric }
2987dff0c46cSDimitry Andric 
29887d523365SDimitry Andric /// Populate the vector of Chains.
2989dff0c46cSDimitry Andric ///
2990dff0c46cSDimitry Andric /// This decreases ILP at the architecture level. Targets with ample registers,
2991dff0c46cSDimitry Andric /// multiple memory ports, and no register renaming probably don't want
2992dff0c46cSDimitry Andric /// this. However, such targets should probably disable LSR altogether.
2993dff0c46cSDimitry Andric ///
2994dff0c46cSDimitry Andric /// The job of LSR is to make a reasonable choice of induction variables across
2995dff0c46cSDimitry Andric /// the loop. Subsequent passes can easily "unchain" computation exposing more
2996dff0c46cSDimitry Andric /// ILP *within the loop* if the target wants it.
2997dff0c46cSDimitry Andric ///
2998dff0c46cSDimitry Andric /// Finding the best IV chain is potentially a scheduling problem. Since LSR
2999dff0c46cSDimitry Andric /// will not reorder memory operations, it will recognize this as a chain, but
3000dff0c46cSDimitry Andric /// will generate redundant IV increments. Ideally this would be corrected later
3001dff0c46cSDimitry Andric /// by a smart scheduler:
3002dff0c46cSDimitry Andric ///        = A[i]
3003dff0c46cSDimitry Andric ///        = A[i+x]
3004dff0c46cSDimitry Andric /// A[i]   =
3005dff0c46cSDimitry Andric /// A[i+x] =
3006dff0c46cSDimitry Andric ///
3007dff0c46cSDimitry Andric /// TODO: Walk the entire domtree within this loop, not just the path to the
3008dff0c46cSDimitry Andric /// loop latch. This will discover chains on side paths, but requires
3009dff0c46cSDimitry Andric /// maintaining multiple copies of the Chains state.
CollectChains()3010dff0c46cSDimitry Andric void LSRInstance::CollectChains() {
30114ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
3012dff0c46cSDimitry Andric   SmallVector<ChainUsers, 8> ChainUsersVec;
3013dff0c46cSDimitry Andric 
3014dff0c46cSDimitry Andric   SmallVector<BasicBlock *,8> LatchPath;
3015dff0c46cSDimitry Andric   BasicBlock *LoopHeader = L->getHeader();
3016dff0c46cSDimitry Andric   for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
3017dff0c46cSDimitry Andric        Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3018dff0c46cSDimitry Andric     LatchPath.push_back(Rung->getBlock());
3019dff0c46cSDimitry Andric   }
3020dff0c46cSDimitry Andric   LatchPath.push_back(LoopHeader);
3021dff0c46cSDimitry Andric 
3022dff0c46cSDimitry Andric   // Walk the instruction stream from the loop header to the loop latch.
30233ca95b02SDimitry Andric   for (BasicBlock *BB : reverse(LatchPath)) {
30243ca95b02SDimitry Andric     for (Instruction &I : *BB) {
3025dff0c46cSDimitry Andric       // Skip instructions that weren't seen by IVUsers analysis.
30263ca95b02SDimitry Andric       if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
3027dff0c46cSDimitry Andric         continue;
3028dff0c46cSDimitry Andric 
3029dff0c46cSDimitry Andric       // Ignore users that are part of a SCEV expression. This way we only
3030dff0c46cSDimitry Andric       // consider leaf IV Users. This effectively rediscovers a portion of
3031dff0c46cSDimitry Andric       // IVUsers analysis but in program order this time.
30323ca95b02SDimitry Andric       if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
3033dff0c46cSDimitry Andric           continue;
3034dff0c46cSDimitry Andric 
3035dff0c46cSDimitry Andric       // Remove this instruction from any NearUsers set it may be in.
3036dff0c46cSDimitry Andric       for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
3037dff0c46cSDimitry Andric            ChainIdx < NChains; ++ChainIdx) {
30383ca95b02SDimitry Andric         ChainUsersVec[ChainIdx].NearUsers.erase(&I);
3039dff0c46cSDimitry Andric       }
3040dff0c46cSDimitry Andric       // Search for operands that can be chained.
3041dff0c46cSDimitry Andric       SmallPtrSet<Instruction*, 4> UniqueOperands;
30423ca95b02SDimitry Andric       User::op_iterator IVOpEnd = I.op_end();
30433ca95b02SDimitry Andric       User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
3044dff0c46cSDimitry Andric       while (IVOpIter != IVOpEnd) {
3045dff0c46cSDimitry Andric         Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
304639d628a0SDimitry Andric         if (UniqueOperands.insert(IVOpInst).second)
30473ca95b02SDimitry Andric           ChainInstruction(&I, IVOpInst, ChainUsersVec);
304891bc56edSDimitry Andric         IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3049dff0c46cSDimitry Andric       }
3050dff0c46cSDimitry Andric     } // Continue walking down the instructions.
3051dff0c46cSDimitry Andric   } // Continue walking down the domtree.
3052dff0c46cSDimitry Andric   // Visit phi backedges to determine if the chain can generate the IV postinc.
305330785c0eSDimitry Andric   for (PHINode &PN : L->getHeader()->phis()) {
305430785c0eSDimitry Andric     if (!SE.isSCEVable(PN.getType()))
3055dff0c46cSDimitry Andric       continue;
3056dff0c46cSDimitry Andric 
3057dff0c46cSDimitry Andric     Instruction *IncV =
305830785c0eSDimitry Andric         dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
3059dff0c46cSDimitry Andric     if (IncV)
306030785c0eSDimitry Andric       ChainInstruction(&PN, IncV, ChainUsersVec);
3061dff0c46cSDimitry Andric   }
3062dff0c46cSDimitry Andric   // Remove any unprofitable chains.
3063dff0c46cSDimitry Andric   unsigned ChainIdx = 0;
3064dff0c46cSDimitry Andric   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
3065dff0c46cSDimitry Andric        UsersIdx < NChains; ++UsersIdx) {
3066dff0c46cSDimitry Andric     if (!isProfitableChain(IVChainVec[UsersIdx],
3067139f7f9bSDimitry Andric                            ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
3068dff0c46cSDimitry Andric       continue;
3069dff0c46cSDimitry Andric     // Preserve the chain at UsesIdx.
3070dff0c46cSDimitry Andric     if (ChainIdx != UsersIdx)
3071dff0c46cSDimitry Andric       IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3072dff0c46cSDimitry Andric     FinalizeChain(IVChainVec[ChainIdx]);
3073dff0c46cSDimitry Andric     ++ChainIdx;
3074dff0c46cSDimitry Andric   }
3075dff0c46cSDimitry Andric   IVChainVec.resize(ChainIdx);
3076dff0c46cSDimitry Andric }
3077dff0c46cSDimitry Andric 
FinalizeChain(IVChain & Chain)3078dff0c46cSDimitry Andric void LSRInstance::FinalizeChain(IVChain &Chain) {
30797ae0e2c9SDimitry Andric   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
30804ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
3081dff0c46cSDimitry Andric 
3082ff0cc061SDimitry Andric   for (const IVInc &Inc : Chain) {
30834ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");
3084d88c1a5aSDimitry Andric     auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
3085ff0cc061SDimitry Andric     assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
3086dff0c46cSDimitry Andric     IVIncSet.insert(UseI);
3087dff0c46cSDimitry Andric   }
3088dff0c46cSDimitry Andric }
3089dff0c46cSDimitry Andric 
3090dff0c46cSDimitry Andric /// Return true if the IVInc can be folded into an addressing mode.
canFoldIVIncExpr(const SCEV * IncExpr,Instruction * UserInst,Value * Operand,const TargetTransformInfo & TTI)3091dff0c46cSDimitry Andric static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
3092139f7f9bSDimitry Andric                              Value *Operand, const TargetTransformInfo &TTI) {
3093dff0c46cSDimitry Andric   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
30942cab237bSDimitry Andric   if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
3095dff0c46cSDimitry Andric     return false;
3096dff0c46cSDimitry Andric 
30977d523365SDimitry Andric   if (IncConst->getAPInt().getMinSignedBits() > 64)
3098dff0c46cSDimitry Andric     return false;
3099dff0c46cSDimitry Andric 
31004ba319b5SDimitry Andric   MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
3101dff0c46cSDimitry Andric   int64_t IncOffset = IncConst->getValue()->getSExtValue();
31027d523365SDimitry Andric   if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
3103139f7f9bSDimitry Andric                         IncOffset, /*HaseBaseReg=*/false))
3104dff0c46cSDimitry Andric     return false;
3105dff0c46cSDimitry Andric 
3106dff0c46cSDimitry Andric   return true;
3107dff0c46cSDimitry Andric }
3108dff0c46cSDimitry Andric 
31097d523365SDimitry Andric /// Generate an add or subtract for each IVInc in a chain to materialize the IV
31107d523365SDimitry Andric /// user's operand from the previous IV user's operand.
GenerateIVChain(const IVChain & Chain,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)3111dff0c46cSDimitry Andric void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
3112f37b6182SDimitry Andric                                   SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3113dff0c46cSDimitry Andric   // Find the new IVOperand for the head of the chain. It may have been replaced
3114dff0c46cSDimitry Andric   // by LSR.
31157ae0e2c9SDimitry Andric   const IVInc &Head = Chain.Incs[0];
3116dff0c46cSDimitry Andric   User::op_iterator IVOpEnd = Head.UserInst->op_end();
3117139f7f9bSDimitry Andric   // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3118dff0c46cSDimitry Andric   User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
3119dff0c46cSDimitry Andric                                              IVOpEnd, L, SE);
312091bc56edSDimitry Andric   Value *IVSrc = nullptr;
3121dff0c46cSDimitry Andric   while (IVOpIter != IVOpEnd) {
3122dff0c46cSDimitry Andric     IVSrc = getWideOperand(*IVOpIter);
3123dff0c46cSDimitry Andric 
3124dff0c46cSDimitry Andric     // If this operand computes the expression that the chain needs, we may use
3125dff0c46cSDimitry Andric     // it. (Check this after setting IVSrc which is used below.)
3126dff0c46cSDimitry Andric     //
3127dff0c46cSDimitry Andric     // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3128dff0c46cSDimitry Andric     // narrow for the chain, so we can no longer use it. We do allow using a
3129dff0c46cSDimitry Andric     // wider phi, assuming the LSR checked for free truncation. In that case we
3130dff0c46cSDimitry Andric     // should already have a truncate on this operand such that
3131dff0c46cSDimitry Andric     // getSCEV(IVSrc) == IncExpr.
3132dff0c46cSDimitry Andric     if (SE.getSCEV(*IVOpIter) == Head.IncExpr
3133dff0c46cSDimitry Andric         || SE.getSCEV(IVSrc) == Head.IncExpr) {
3134dff0c46cSDimitry Andric       break;
3135dff0c46cSDimitry Andric     }
313691bc56edSDimitry Andric     IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3137dff0c46cSDimitry Andric   }
3138dff0c46cSDimitry Andric   if (IVOpIter == IVOpEnd) {
3139dff0c46cSDimitry Andric     // Gracefully give up on this chain.
31404ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
3141dff0c46cSDimitry Andric     return;
3142dff0c46cSDimitry Andric   }
3143dff0c46cSDimitry Andric 
31444ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
3145dff0c46cSDimitry Andric   Type *IVTy = IVSrc->getType();
3146dff0c46cSDimitry Andric   Type *IntTy = SE.getEffectiveSCEVType(IVTy);
314791bc56edSDimitry Andric   const SCEV *LeftOverExpr = nullptr;
3148ff0cc061SDimitry Andric   for (const IVInc &Inc : Chain) {
3149ff0cc061SDimitry Andric     Instruction *InsertPt = Inc.UserInst;
3150dff0c46cSDimitry Andric     if (isa<PHINode>(InsertPt))
3151dff0c46cSDimitry Andric       InsertPt = L->getLoopLatch()->getTerminator();
3152dff0c46cSDimitry Andric 
3153dff0c46cSDimitry Andric     // IVOper will replace the current IV User's operand. IVSrc is the IV
3154dff0c46cSDimitry Andric     // value currently held in a register.
3155dff0c46cSDimitry Andric     Value *IVOper = IVSrc;
3156ff0cc061SDimitry Andric     if (!Inc.IncExpr->isZero()) {
3157dff0c46cSDimitry Andric       // IncExpr was the result of subtraction of two narrow values, so must
3158dff0c46cSDimitry Andric       // be signed.
3159ff0cc061SDimitry Andric       const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
3160dff0c46cSDimitry Andric       LeftOverExpr = LeftOverExpr ?
3161dff0c46cSDimitry Andric         SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3162dff0c46cSDimitry Andric     }
3163dff0c46cSDimitry Andric     if (LeftOverExpr && !LeftOverExpr->isZero()) {
3164dff0c46cSDimitry Andric       // Expand the IV increment.
3165dff0c46cSDimitry Andric       Rewriter.clearPostInc();
3166dff0c46cSDimitry Andric       Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3167dff0c46cSDimitry Andric       const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
3168dff0c46cSDimitry Andric                                              SE.getUnknown(IncV));
3169dff0c46cSDimitry Andric       IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3170dff0c46cSDimitry Andric 
3171dff0c46cSDimitry Andric       // If an IV increment can't be folded, use it as the next IV value.
3172ff0cc061SDimitry Andric       if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
3173dff0c46cSDimitry Andric         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
3174dff0c46cSDimitry Andric         IVSrc = IVOper;
317591bc56edSDimitry Andric         LeftOverExpr = nullptr;
3176dff0c46cSDimitry Andric       }
3177dff0c46cSDimitry Andric     }
3178ff0cc061SDimitry Andric     Type *OperTy = Inc.IVOperand->getType();
3179dff0c46cSDimitry Andric     if (IVTy != OperTy) {
3180dff0c46cSDimitry Andric       assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
3181dff0c46cSDimitry Andric              "cannot extend a chained IV");
3182dff0c46cSDimitry Andric       IRBuilder<> Builder(InsertPt);
3183dff0c46cSDimitry Andric       IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
3184dff0c46cSDimitry Andric     }
3185ff0cc061SDimitry Andric     Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
318697bc6c73SDimitry Andric     DeadInsts.emplace_back(Inc.IVOperand);
3187dff0c46cSDimitry Andric   }
3188dff0c46cSDimitry Andric   // If LSR created a new, wider phi, we may also replace its postinc. We only
3189dff0c46cSDimitry Andric   // do this if we also found a wide value for the head of the chain.
31907ae0e2c9SDimitry Andric   if (isa<PHINode>(Chain.tailUserInst())) {
319130785c0eSDimitry Andric     for (PHINode &Phi : L->getHeader()->phis()) {
319230785c0eSDimitry Andric       if (!isCompatibleIVType(&Phi, IVSrc))
3193dff0c46cSDimitry Andric         continue;
3194dff0c46cSDimitry Andric       Instruction *PostIncV = dyn_cast<Instruction>(
319530785c0eSDimitry Andric           Phi.getIncomingValueForBlock(L->getLoopLatch()));
3196dff0c46cSDimitry Andric       if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
3197dff0c46cSDimitry Andric         continue;
3198dff0c46cSDimitry Andric       Value *IVOper = IVSrc;
3199dff0c46cSDimitry Andric       Type *PostIncTy = PostIncV->getType();
3200dff0c46cSDimitry Andric       if (IVTy != PostIncTy) {
3201dff0c46cSDimitry Andric         assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
3202dff0c46cSDimitry Andric         IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3203dff0c46cSDimitry Andric         Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3204dff0c46cSDimitry Andric         IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
3205dff0c46cSDimitry Andric       }
320630785c0eSDimitry Andric       Phi.replaceUsesOfWith(PostIncV, IVOper);
320797bc6c73SDimitry Andric       DeadInsts.emplace_back(PostIncV);
3208dff0c46cSDimitry Andric     }
3209dff0c46cSDimitry Andric   }
3210dff0c46cSDimitry Andric }
3211dff0c46cSDimitry Andric 
CollectFixupsAndInitialFormulae()3212f22ef01cSRoman Divacky void LSRInstance::CollectFixupsAndInitialFormulae() {
3213ff0cc061SDimitry Andric   for (const IVStrideUse &U : IU) {
3214ff0cc061SDimitry Andric     Instruction *UserInst = U.getUser();
3215dff0c46cSDimitry Andric     // Skip IV users that are part of profitable IV Chains.
3216d88c1a5aSDimitry Andric     User::op_iterator UseI =
3217d88c1a5aSDimitry Andric         find(UserInst->operands(), U.getOperandValToReplace());
3218dff0c46cSDimitry Andric     assert(UseI != UserInst->op_end() && "cannot find IV operand");
32197a7e6055SDimitry Andric     if (IVIncSet.count(UseI)) {
32204ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
3221dff0c46cSDimitry Andric       continue;
32227a7e6055SDimitry Andric     }
3223dff0c46cSDimitry Andric 
3224f22ef01cSRoman Divacky     LSRUse::KindType Kind = LSRUse::Basic;
32257d523365SDimitry Andric     MemAccessTy AccessTy;
32262cab237bSDimitry Andric     if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
3227f22ef01cSRoman Divacky       Kind = LSRUse::Address;
32284ba319b5SDimitry Andric       AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
3229f22ef01cSRoman Divacky     }
3230f22ef01cSRoman Divacky 
3231ff0cc061SDimitry Andric     const SCEV *S = IU.getExpr(U);
3232d88c1a5aSDimitry Andric     PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
3233f22ef01cSRoman Divacky 
3234f22ef01cSRoman Divacky     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3235f22ef01cSRoman Divacky     // (N - i == 0), and this allows (N - i) to be the expression that we work
3236f22ef01cSRoman Divacky     // with rather than just N or i, so we can consider the register
3237f22ef01cSRoman Divacky     // requirements for both N and i at the same time. Limiting this code to
3238f22ef01cSRoman Divacky     // equality icmps is not a problem because all interesting loops use
3239f22ef01cSRoman Divacky     // equality icmps, thanks to IndVarSimplify.
3240d88c1a5aSDimitry Andric     if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
3241f22ef01cSRoman Divacky       if (CI->isEquality()) {
3242f22ef01cSRoman Divacky         // Swap the operands if needed to put the OperandValToReplace on the
3243f22ef01cSRoman Divacky         // left, for consistency.
3244f22ef01cSRoman Divacky         Value *NV = CI->getOperand(1);
3245d88c1a5aSDimitry Andric         if (NV == U.getOperandValToReplace()) {
3246f22ef01cSRoman Divacky           CI->setOperand(1, CI->getOperand(0));
3247f22ef01cSRoman Divacky           CI->setOperand(0, NV);
3248f22ef01cSRoman Divacky           NV = CI->getOperand(1);
3249f22ef01cSRoman Divacky           Changed = true;
3250f22ef01cSRoman Divacky         }
3251f22ef01cSRoman Divacky 
3252f22ef01cSRoman Divacky         // x == y  -->  x - y == 0
3253f22ef01cSRoman Divacky         const SCEV *N = SE.getSCEV(NV);
3254f785676fSDimitry Andric         if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
3255bd5abe19SDimitry Andric           // S is normalized, so normalize N before folding it into S
3256bd5abe19SDimitry Andric           // to keep the result normalized.
32577a7e6055SDimitry Andric           N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
3258f22ef01cSRoman Divacky           Kind = LSRUse::ICmpZero;
3259f22ef01cSRoman Divacky           S = SE.getMinusSCEV(N, S);
3260f22ef01cSRoman Divacky         }
3261f22ef01cSRoman Divacky 
3262f22ef01cSRoman Divacky         // -1 and the negations of all interesting strides (except the negation
3263f22ef01cSRoman Divacky         // of -1) are now also interesting.
3264f22ef01cSRoman Divacky         for (size_t i = 0, e = Factors.size(); i != e; ++i)
3265f22ef01cSRoman Divacky           if (Factors[i] != -1)
3266f22ef01cSRoman Divacky             Factors.insert(-(uint64_t)Factors[i]);
3267f22ef01cSRoman Divacky         Factors.insert(-1);
3268f22ef01cSRoman Divacky       }
3269f22ef01cSRoman Divacky 
3270d88c1a5aSDimitry Andric     // Get or create an LSRUse.
3271f22ef01cSRoman Divacky     std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3272d88c1a5aSDimitry Andric     size_t LUIdx = P.first;
3273d88c1a5aSDimitry Andric     int64_t Offset = P.second;
3274d88c1a5aSDimitry Andric     LSRUse &LU = Uses[LUIdx];
3275d88c1a5aSDimitry Andric 
3276d88c1a5aSDimitry Andric     // Record the fixup.
3277d88c1a5aSDimitry Andric     LSRFixup &LF = LU.getNewFixup();
3278d88c1a5aSDimitry Andric     LF.UserInst = UserInst;
3279d88c1a5aSDimitry Andric     LF.OperandValToReplace = U.getOperandValToReplace();
3280d88c1a5aSDimitry Andric     LF.PostIncLoops = TmpPostIncLoops;
3281d88c1a5aSDimitry Andric     LF.Offset = Offset;
3282f22ef01cSRoman Divacky     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3283d88c1a5aSDimitry Andric 
3284e580952dSDimitry Andric     if (!LU.WidestFixupType ||
3285e580952dSDimitry Andric         SE.getTypeSizeInBits(LU.WidestFixupType) <
3286e580952dSDimitry Andric         SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3287e580952dSDimitry Andric       LU.WidestFixupType = LF.OperandValToReplace->getType();
3288f22ef01cSRoman Divacky 
3289f22ef01cSRoman Divacky     // If this is the first use of this LSRUse, give it a formula.
3290f22ef01cSRoman Divacky     if (LU.Formulae.empty()) {
3291d88c1a5aSDimitry Andric       InsertInitialFormula(S, LU, LUIdx);
3292d88c1a5aSDimitry Andric       CountRegisters(LU.Formulae.back(), LUIdx);
3293f22ef01cSRoman Divacky     }
3294f22ef01cSRoman Divacky   }
3295f22ef01cSRoman Divacky 
32964ba319b5SDimitry Andric   LLVM_DEBUG(print_fixups(dbgs()));
3297f22ef01cSRoman Divacky }
3298f22ef01cSRoman Divacky 
32997d523365SDimitry Andric /// Insert a formula for the given expression into the given use, separating out
33007d523365SDimitry Andric /// loop-variant portions from loop-invariant and loop-computable portions.
3301f22ef01cSRoman Divacky void
InsertInitialFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3302f22ef01cSRoman Divacky LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3303f785676fSDimitry Andric   // Mark uses whose expressions cannot be expanded.
3304f785676fSDimitry Andric   if (!isSafeToExpand(S, SE))
3305f785676fSDimitry Andric     LU.RigidFormula = true;
3306f785676fSDimitry Andric 
3307f22ef01cSRoman Divacky   Formula F;
33087d523365SDimitry Andric   F.initialMatch(S, L, SE);
3309f22ef01cSRoman Divacky   bool Inserted = InsertFormula(LU, LUIdx, F);
3310f22ef01cSRoman Divacky   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3311f22ef01cSRoman Divacky }
3312f22ef01cSRoman Divacky 
33137d523365SDimitry Andric /// Insert a simple single-register formula for the given expression into the
33147d523365SDimitry Andric /// given use.
3315f22ef01cSRoman Divacky void
InsertSupplementalFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3316f22ef01cSRoman Divacky LSRInstance::InsertSupplementalFormula(const SCEV *S,
3317f22ef01cSRoman Divacky                                        LSRUse &LU, size_t LUIdx) {
3318f22ef01cSRoman Divacky   Formula F;
3319f22ef01cSRoman Divacky   F.BaseRegs.push_back(S);
3320139f7f9bSDimitry Andric   F.HasBaseReg = true;
3321f22ef01cSRoman Divacky   bool Inserted = InsertFormula(LU, LUIdx, F);
3322f22ef01cSRoman Divacky   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3323f22ef01cSRoman Divacky }
3324f22ef01cSRoman Divacky 
33257d523365SDimitry Andric /// Note which registers are used by the given formula, updating RegUses.
CountRegisters(const Formula & F,size_t LUIdx)3326f22ef01cSRoman Divacky void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3327f22ef01cSRoman Divacky   if (F.ScaledReg)
33287d523365SDimitry Andric     RegUses.countRegister(F.ScaledReg, LUIdx);
3329ff0cc061SDimitry Andric   for (const SCEV *BaseReg : F.BaseRegs)
33307d523365SDimitry Andric     RegUses.countRegister(BaseReg, LUIdx);
3331f22ef01cSRoman Divacky }
3332f22ef01cSRoman Divacky 
33337d523365SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
33347d523365SDimitry Andric /// return true. Return false otherwise.
InsertFormula(LSRUse & LU,unsigned LUIdx,const Formula & F)3335f22ef01cSRoman Divacky bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
333691bc56edSDimitry Andric   // Do not insert formula that we will not be able to expand.
333791bc56edSDimitry Andric   assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
333891bc56edSDimitry Andric          "Formula is illegal");
33397a7e6055SDimitry Andric 
33407a7e6055SDimitry Andric   if (!LU.InsertFormula(F, *L))
3341f22ef01cSRoman Divacky     return false;
3342f22ef01cSRoman Divacky 
3343f22ef01cSRoman Divacky   CountRegisters(F, LUIdx);
3344f22ef01cSRoman Divacky   return true;
3345f22ef01cSRoman Divacky }
3346f22ef01cSRoman Divacky 
33477d523365SDimitry Andric /// Check for other uses of loop-invariant values which we're tracking. These
33487d523365SDimitry Andric /// other uses will pin these values in registers, making them less profitable
33497d523365SDimitry Andric /// for elimination.
3350f22ef01cSRoman Divacky /// TODO: This currently misses non-constant addrec step registers.
3351f22ef01cSRoman Divacky /// TODO: Should this give more weight to users inside the loop?
3352f22ef01cSRoman Divacky void
CollectLoopInvariantFixupsAndFormulae()3353f22ef01cSRoman Divacky LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3354f22ef01cSRoman Divacky   SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
335539d628a0SDimitry Andric   SmallPtrSet<const SCEV *, 32> Visited;
3356f22ef01cSRoman Divacky 
3357f22ef01cSRoman Divacky   while (!Worklist.empty()) {
3358f22ef01cSRoman Divacky     const SCEV *S = Worklist.pop_back_val();
3359f22ef01cSRoman Divacky 
336039d628a0SDimitry Andric     // Don't process the same SCEV twice
336139d628a0SDimitry Andric     if (!Visited.insert(S).second)
336239d628a0SDimitry Andric       continue;
336339d628a0SDimitry Andric 
3364f22ef01cSRoman Divacky     if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3365ffd1746dSEd Schouten       Worklist.append(N->op_begin(), N->op_end());
3366f22ef01cSRoman Divacky     else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3367f22ef01cSRoman Divacky       Worklist.push_back(C->getOperand());
3368f22ef01cSRoman Divacky     else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3369f22ef01cSRoman Divacky       Worklist.push_back(D->getLHS());
3370f22ef01cSRoman Divacky       Worklist.push_back(D->getRHS());
337191bc56edSDimitry Andric     } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
337291bc56edSDimitry Andric       const Value *V = US->getValue();
3373ffd1746dSEd Schouten       if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3374ffd1746dSEd Schouten         // Look for instructions defined outside the loop.
3375f22ef01cSRoman Divacky         if (L->contains(Inst)) continue;
3376ffd1746dSEd Schouten       } else if (isa<UndefValue>(V))
3377ffd1746dSEd Schouten         // Undef doesn't have a live range, so it doesn't matter.
3378ffd1746dSEd Schouten         continue;
337991bc56edSDimitry Andric       for (const Use &U : V->uses()) {
338091bc56edSDimitry Andric         const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3381f22ef01cSRoman Divacky         // Ignore non-instructions.
3382f22ef01cSRoman Divacky         if (!UserInst)
3383f22ef01cSRoman Divacky           continue;
3384f22ef01cSRoman Divacky         // Ignore instructions in other functions (as can happen with
3385f22ef01cSRoman Divacky         // Constants).
3386f22ef01cSRoman Divacky         if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3387f22ef01cSRoman Divacky           continue;
3388f22ef01cSRoman Divacky         // Ignore instructions not dominated by the loop.
3389f22ef01cSRoman Divacky         const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3390f22ef01cSRoman Divacky           UserInst->getParent() :
3391f22ef01cSRoman Divacky           cast<PHINode>(UserInst)->getIncomingBlock(
339291bc56edSDimitry Andric             PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3393f22ef01cSRoman Divacky         if (!DT.dominates(L->getHeader(), UseBB))
3394f22ef01cSRoman Divacky           continue;
33957d523365SDimitry Andric         // Don't bother if the instruction is in a BB which ends in an EHPad.
33967d523365SDimitry Andric         if (UseBB->getTerminator()->isEHPad())
33977d523365SDimitry Andric           continue;
339894c53d40SDimitry Andric         // Don't bother rewriting PHIs in catchswitch blocks.
339994c53d40SDimitry Andric         if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
340094c53d40SDimitry Andric           continue;
3401f22ef01cSRoman Divacky         // Ignore uses which are part of other SCEV expressions, to avoid
3402f22ef01cSRoman Divacky         // analyzing them multiple times.
3403f22ef01cSRoman Divacky         if (SE.isSCEVable(UserInst->getType())) {
3404f22ef01cSRoman Divacky           const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3405f22ef01cSRoman Divacky           // If the user is a no-op, look through to its uses.
3406f22ef01cSRoman Divacky           if (!isa<SCEVUnknown>(UserS))
3407f22ef01cSRoman Divacky             continue;
340891bc56edSDimitry Andric           if (UserS == US) {
3409f22ef01cSRoman Divacky             Worklist.push_back(
3410f22ef01cSRoman Divacky               SE.getUnknown(const_cast<Instruction *>(UserInst)));
3411f22ef01cSRoman Divacky             continue;
3412f22ef01cSRoman Divacky           }
3413f22ef01cSRoman Divacky         }
3414f22ef01cSRoman Divacky         // Ignore icmp instructions which are already being analyzed.
3415f22ef01cSRoman Divacky         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
341691bc56edSDimitry Andric           unsigned OtherIdx = !U.getOperandNo();
3417f22ef01cSRoman Divacky           Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
34182754fe60SDimitry Andric           if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3419f22ef01cSRoman Divacky             continue;
3420f22ef01cSRoman Divacky         }
3421f22ef01cSRoman Divacky 
34227d523365SDimitry Andric         std::pair<size_t, int64_t> P = getUse(
34237d523365SDimitry Andric             S, LSRUse::Basic, MemAccessTy());
3424d88c1a5aSDimitry Andric         size_t LUIdx = P.first;
3425d88c1a5aSDimitry Andric         int64_t Offset = P.second;
3426d88c1a5aSDimitry Andric         LSRUse &LU = Uses[LUIdx];
3427d88c1a5aSDimitry Andric         LSRFixup &LF = LU.getNewFixup();
3428d88c1a5aSDimitry Andric         LF.UserInst = const_cast<Instruction *>(UserInst);
3429d88c1a5aSDimitry Andric         LF.OperandValToReplace = U;
3430d88c1a5aSDimitry Andric         LF.Offset = Offset;
3431f22ef01cSRoman Divacky         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3432e580952dSDimitry Andric         if (!LU.WidestFixupType ||
3433e580952dSDimitry Andric             SE.getTypeSizeInBits(LU.WidestFixupType) <
3434e580952dSDimitry Andric             SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3435e580952dSDimitry Andric           LU.WidestFixupType = LF.OperandValToReplace->getType();
3436d88c1a5aSDimitry Andric         InsertSupplementalFormula(US, LU, LUIdx);
3437f22ef01cSRoman Divacky         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3438f22ef01cSRoman Divacky         break;
3439f22ef01cSRoman Divacky       }
3440f22ef01cSRoman Divacky     }
3441f22ef01cSRoman Divacky   }
3442f22ef01cSRoman Divacky }
3443f22ef01cSRoman Divacky 
34447d523365SDimitry Andric /// Split S into subexpressions which can be pulled out into separate
34457d523365SDimitry Andric /// registers. If C is non-null, multiply each subexpression by C.
34467ae0e2c9SDimitry Andric ///
34477ae0e2c9SDimitry Andric /// Return remainder expression after factoring the subexpressions captured by
34487ae0e2c9SDimitry Andric /// Ops. If Ops is complete, return NULL.
CollectSubexprs(const SCEV * S,const SCEVConstant * C,SmallVectorImpl<const SCEV * > & Ops,const Loop * L,ScalarEvolution & SE,unsigned Depth=0)34497ae0e2c9SDimitry Andric static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3450f22ef01cSRoman Divacky                                    SmallVectorImpl<const SCEV *> &Ops,
3451ffd1746dSEd Schouten                                    const Loop *L,
34527ae0e2c9SDimitry Andric                                    ScalarEvolution &SE,
34537ae0e2c9SDimitry Andric                                    unsigned Depth = 0) {
34547ae0e2c9SDimitry Andric   // Arbitrarily cap recursion to protect compile time.
34557ae0e2c9SDimitry Andric   if (Depth >= 3)
34567ae0e2c9SDimitry Andric     return S;
34577ae0e2c9SDimitry Andric 
3458f22ef01cSRoman Divacky   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3459f22ef01cSRoman Divacky     // Break out add operands.
3460ff0cc061SDimitry Andric     for (const SCEV *S : Add->operands()) {
3461ff0cc061SDimitry Andric       const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
34627ae0e2c9SDimitry Andric       if (Remainder)
34637ae0e2c9SDimitry Andric         Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
34647ae0e2c9SDimitry Andric     }
346591bc56edSDimitry Andric     return nullptr;
3466f22ef01cSRoman Divacky   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3467f22ef01cSRoman Divacky     // Split a non-zero base out of an addrec.
3468d88c1a5aSDimitry Andric     if (AR->getStart()->isZero() || !AR->isAffine())
34697ae0e2c9SDimitry Andric       return S;
34707ae0e2c9SDimitry Andric 
34717ae0e2c9SDimitry Andric     const SCEV *Remainder = CollectSubexprs(AR->getStart(),
34727ae0e2c9SDimitry Andric                                             C, Ops, L, SE, Depth+1);
34737ae0e2c9SDimitry Andric     // Split the non-zero AddRec unless it is part of a nested recurrence that
34747ae0e2c9SDimitry Andric     // does not pertain to this loop.
34757ae0e2c9SDimitry Andric     if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
34767ae0e2c9SDimitry Andric       Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
347791bc56edSDimitry Andric       Remainder = nullptr;
34787ae0e2c9SDimitry Andric     }
34797ae0e2c9SDimitry Andric     if (Remainder != AR->getStart()) {
34807ae0e2c9SDimitry Andric       if (!Remainder)
34817ae0e2c9SDimitry Andric         Remainder = SE.getConstant(AR->getType(), 0);
34827ae0e2c9SDimitry Andric       return SE.getAddRecExpr(Remainder,
3483f22ef01cSRoman Divacky                               AR->getStepRecurrence(SE),
34843b0f4066SDimitry Andric                               AR->getLoop(),
34853b0f4066SDimitry Andric                               //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
34867ae0e2c9SDimitry Andric                               SCEV::FlagAnyWrap);
3487f22ef01cSRoman Divacky     }
3488f22ef01cSRoman Divacky   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3489f22ef01cSRoman Divacky     // Break (C * (a + b + c)) into C*a + C*b + C*c.
34907ae0e2c9SDimitry Andric     if (Mul->getNumOperands() != 2)
34917ae0e2c9SDimitry Andric       return S;
3492f22ef01cSRoman Divacky     if (const SCEVConstant *Op0 =
3493f22ef01cSRoman Divacky         dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
34947ae0e2c9SDimitry Andric       C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
34957ae0e2c9SDimitry Andric       const SCEV *Remainder =
34967ae0e2c9SDimitry Andric         CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
34977ae0e2c9SDimitry Andric       if (Remainder)
34987ae0e2c9SDimitry Andric         Ops.push_back(SE.getMulExpr(C, Remainder));
349991bc56edSDimitry Andric       return nullptr;
3500f22ef01cSRoman Divacky     }
3501f22ef01cSRoman Divacky   }
35027ae0e2c9SDimitry Andric   return S;
3503f22ef01cSRoman Divacky }
3504f22ef01cSRoman Divacky 
35054ba319b5SDimitry Andric /// Return true if the SCEV represents a value that may end up as a
35064ba319b5SDimitry Andric /// post-increment operation.
mayUsePostIncMode(const TargetTransformInfo & TTI,LSRUse & LU,const SCEV * S,const Loop * L,ScalarEvolution & SE)35074ba319b5SDimitry Andric static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
35084ba319b5SDimitry Andric                               LSRUse &LU, const SCEV *S, const Loop *L,
35094ba319b5SDimitry Andric                               ScalarEvolution &SE) {
35104ba319b5SDimitry Andric   if (LU.Kind != LSRUse::Address ||
35114ba319b5SDimitry Andric       !LU.AccessTy.getType()->isIntOrIntVectorTy())
35124ba319b5SDimitry Andric     return false;
35134ba319b5SDimitry Andric   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
35144ba319b5SDimitry Andric   if (!AR)
35154ba319b5SDimitry Andric     return false;
35164ba319b5SDimitry Andric   const SCEV *LoopStep = AR->getStepRecurrence(SE);
35174ba319b5SDimitry Andric   if (!isa<SCEVConstant>(LoopStep))
35184ba319b5SDimitry Andric     return false;
35194ba319b5SDimitry Andric   if (LU.AccessTy.getType()->getScalarSizeInBits() !=
35204ba319b5SDimitry Andric       LoopStep->getType()->getScalarSizeInBits())
35214ba319b5SDimitry Andric     return false;
35224ba319b5SDimitry Andric   // Check if a post-indexed load/store can be used.
35234ba319b5SDimitry Andric   if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
35244ba319b5SDimitry Andric       TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
35254ba319b5SDimitry Andric     const SCEV *LoopStart = AR->getStart();
35264ba319b5SDimitry Andric     if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
35274ba319b5SDimitry Andric       return true;
35284ba319b5SDimitry Andric   }
35294ba319b5SDimitry Andric   return false;
35304ba319b5SDimitry Andric }
35314ba319b5SDimitry Andric 
35324ba319b5SDimitry Andric /// Helper function for LSRInstance::GenerateReassociations.
GenerateReassociationsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,unsigned Depth,size_t Idx,bool IsScaledReg)353391bc56edSDimitry Andric void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
353491bc56edSDimitry Andric                                              const Formula &Base,
353591bc56edSDimitry Andric                                              unsigned Depth, size_t Idx,
353691bc56edSDimitry Andric                                              bool IsScaledReg) {
353791bc56edSDimitry Andric   const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
35384ba319b5SDimitry Andric   // Don't generate reassociations for the base register of a value that
35394ba319b5SDimitry Andric   // may generate a post-increment operator. The reason is that the
35404ba319b5SDimitry Andric   // reassociations cause extra base+register formula to be created,
35414ba319b5SDimitry Andric   // and possibly chosen, but the post-increment is more efficient.
35424ba319b5SDimitry Andric   if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
35434ba319b5SDimitry Andric     return;
3544e580952dSDimitry Andric   SmallVector<const SCEV *, 8> AddOps;
354591bc56edSDimitry Andric   const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
35467ae0e2c9SDimitry Andric   if (Remainder)
35477ae0e2c9SDimitry Andric     AddOps.push_back(Remainder);
3548ffd1746dSEd Schouten 
354991bc56edSDimitry Andric   if (AddOps.size() == 1)
355091bc56edSDimitry Andric     return;
3551f22ef01cSRoman Divacky 
3552f22ef01cSRoman Divacky   for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
355391bc56edSDimitry Andric                                                      JE = AddOps.end();
355491bc56edSDimitry Andric        J != JE; ++J) {
3555e580952dSDimitry Andric     // Loop-variant "unknown" values are uninteresting; we won't be able to
3556e580952dSDimitry Andric     // do anything meaningful with them.
35572754fe60SDimitry Andric     if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3558e580952dSDimitry Andric       continue;
3559e580952dSDimitry Andric 
3560f22ef01cSRoman Divacky     // Don't pull a constant into a register if the constant could be folded
3561f22ef01cSRoman Divacky     // into an immediate field.
3562139f7f9bSDimitry Andric     if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3563139f7f9bSDimitry Andric                          LU.AccessTy, *J, Base.getNumRegs() > 1))
3564f22ef01cSRoman Divacky       continue;
3565f22ef01cSRoman Divacky 
3566f22ef01cSRoman Divacky     // Collect all operands except *J.
356791bc56edSDimitry Andric     SmallVector<const SCEV *, 8> InnerAddOps(
356891bc56edSDimitry Andric         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
356991bc56edSDimitry Andric     InnerAddOps.append(std::next(J),
357091bc56edSDimitry Andric                        ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3571f22ef01cSRoman Divacky 
3572f22ef01cSRoman Divacky     // Don't leave just a constant behind in a register if the constant could
3573f22ef01cSRoman Divacky     // be folded into an immediate field.
3574f22ef01cSRoman Divacky     if (InnerAddOps.size() == 1 &&
3575139f7f9bSDimitry Andric         isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3576139f7f9bSDimitry Andric                          LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3577f22ef01cSRoman Divacky       continue;
3578f22ef01cSRoman Divacky 
3579f22ef01cSRoman Divacky     const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3580f22ef01cSRoman Divacky     if (InnerSum->isZero())
3581f22ef01cSRoman Divacky       continue;
3582f22ef01cSRoman Divacky     Formula F = Base;
3583bd5abe19SDimitry Andric 
3584bd5abe19SDimitry Andric     // Add the remaining pieces of the add back into the new formula.
3585bd5abe19SDimitry Andric     const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
358691bc56edSDimitry Andric     if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3587139f7f9bSDimitry Andric         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3588bd5abe19SDimitry Andric                                 InnerSumSC->getValue()->getZExtValue())) {
358991bc56edSDimitry Andric       F.UnfoldedOffset =
359091bc56edSDimitry Andric           (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
359191bc56edSDimitry Andric       if (IsScaledReg)
359291bc56edSDimitry Andric         F.ScaledReg = nullptr;
359391bc56edSDimitry Andric       else
359491bc56edSDimitry Andric         F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
359591bc56edSDimitry Andric     } else if (IsScaledReg)
359691bc56edSDimitry Andric       F.ScaledReg = InnerSum;
359791bc56edSDimitry Andric     else
359891bc56edSDimitry Andric       F.BaseRegs[Idx] = InnerSum;
3599bd5abe19SDimitry Andric 
3600bd5abe19SDimitry Andric     // Add J as its own register, or an unfolded immediate.
3601bd5abe19SDimitry Andric     const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3602139f7f9bSDimitry Andric     if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3603139f7f9bSDimitry Andric         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3604bd5abe19SDimitry Andric                                 SC->getValue()->getZExtValue()))
360591bc56edSDimitry Andric       F.UnfoldedOffset =
360691bc56edSDimitry Andric           (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3607bd5abe19SDimitry Andric     else
3608f22ef01cSRoman Divacky       F.BaseRegs.push_back(*J);
360991bc56edSDimitry Andric     // We may have changed the number of register in base regs, adjust the
361091bc56edSDimitry Andric     // formula accordingly.
36117a7e6055SDimitry Andric     F.canonicalize(*L);
3612bd5abe19SDimitry Andric 
3613f22ef01cSRoman Divacky     if (InsertFormula(LU, LUIdx, F))
3614f22ef01cSRoman Divacky       // If that formula hadn't been seen before, recurse to find more like
3615f22ef01cSRoman Divacky       // it.
36164ba319b5SDimitry Andric       // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
36174ba319b5SDimitry Andric       // Because just Depth is not enough to bound compile time.
36184ba319b5SDimitry Andric       // This means that every time AddOps.size() is greater 16^x we will add
36194ba319b5SDimitry Andric       // x to Depth.
36204ba319b5SDimitry Andric       GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
36214ba319b5SDimitry Andric                              Depth + 1 + (Log2_32(AddOps.size()) >> 2));
3622f22ef01cSRoman Divacky   }
3623f22ef01cSRoman Divacky }
362491bc56edSDimitry Andric 
36257d523365SDimitry Andric /// Split out subexpressions from adds and the bases of addrecs.
GenerateReassociations(LSRUse & LU,unsigned LUIdx,Formula Base,unsigned Depth)362691bc56edSDimitry Andric void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
362791bc56edSDimitry Andric                                          Formula Base, unsigned Depth) {
36287a7e6055SDimitry Andric   assert(Base.isCanonical(*L) && "Input must be in the canonical form");
362991bc56edSDimitry Andric   // Arbitrarily cap recursion to protect compile time.
363091bc56edSDimitry Andric   if (Depth >= 3)
363191bc56edSDimitry Andric     return;
363291bc56edSDimitry Andric 
363391bc56edSDimitry Andric   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
363491bc56edSDimitry Andric     GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
363591bc56edSDimitry Andric 
363691bc56edSDimitry Andric   if (Base.Scale == 1)
363791bc56edSDimitry Andric     GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
363891bc56edSDimitry Andric                                /* Idx */ -1, /* IsScaledReg */ true);
3639f22ef01cSRoman Divacky }
3640f22ef01cSRoman Divacky 
36417d523365SDimitry Andric ///  Generate a formula consisting of all of the loop-dominating registers added
36427d523365SDimitry Andric /// into a single register.
GenerateCombinations(LSRUse & LU,unsigned LUIdx,Formula Base)3643f22ef01cSRoman Divacky void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3644f22ef01cSRoman Divacky                                        Formula Base) {
3645f22ef01cSRoman Divacky   // This method is only interesting on a plurality of registers.
3646*b5893f02SDimitry Andric   if (Base.BaseRegs.size() + (Base.Scale == 1) +
3647*b5893f02SDimitry Andric       (Base.UnfoldedOffset != 0) <= 1)
364891bc56edSDimitry Andric     return;
3649f22ef01cSRoman Divacky 
365091bc56edSDimitry Andric   // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
365191bc56edSDimitry Andric   // processing the formula.
36527d523365SDimitry Andric   Base.unscale();
3653f22ef01cSRoman Divacky   SmallVector<const SCEV *, 4> Ops;
3654*b5893f02SDimitry Andric   Formula NewBase = Base;
3655*b5893f02SDimitry Andric   NewBase.BaseRegs.clear();
3656*b5893f02SDimitry Andric   Type *CombinedIntegerType = nullptr;
3657ff0cc061SDimitry Andric   for (const SCEV *BaseReg : Base.BaseRegs) {
36582754fe60SDimitry Andric     if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3659*b5893f02SDimitry Andric         !SE.hasComputableLoopEvolution(BaseReg, L)) {
3660*b5893f02SDimitry Andric       if (!CombinedIntegerType)
3661*b5893f02SDimitry Andric         CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
3662f22ef01cSRoman Divacky       Ops.push_back(BaseReg);
3663f22ef01cSRoman Divacky     }
3664*b5893f02SDimitry Andric     else
3665*b5893f02SDimitry Andric       NewBase.BaseRegs.push_back(BaseReg);
3666*b5893f02SDimitry Andric   }
3667*b5893f02SDimitry Andric 
3668*b5893f02SDimitry Andric   // If no register is relevant, we're done.
3669*b5893f02SDimitry Andric   if (Ops.size() == 0)
3670*b5893f02SDimitry Andric     return;
3671*b5893f02SDimitry Andric 
3672*b5893f02SDimitry Andric   // Utility function for generating the required variants of the combined
3673*b5893f02SDimitry Andric   // registers.
3674*b5893f02SDimitry Andric   auto GenerateFormula = [&](const SCEV *Sum) {
3675*b5893f02SDimitry Andric     Formula F = NewBase;
3676*b5893f02SDimitry Andric 
3677f22ef01cSRoman Divacky     // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3678f22ef01cSRoman Divacky     // opportunity to fold something. For now, just ignore such cases
3679f22ef01cSRoman Divacky     // rather than proceed with zero in a register.
3680*b5893f02SDimitry Andric     if (Sum->isZero())
3681*b5893f02SDimitry Andric       return;
3682*b5893f02SDimitry Andric 
3683f22ef01cSRoman Divacky     F.BaseRegs.push_back(Sum);
36847a7e6055SDimitry Andric     F.canonicalize(*L);
3685f22ef01cSRoman Divacky     (void)InsertFormula(LU, LUIdx, F);
3686*b5893f02SDimitry Andric   };
3687*b5893f02SDimitry Andric 
3688*b5893f02SDimitry Andric   // If we collected at least two registers, generate a formula combining them.
3689*b5893f02SDimitry Andric   if (Ops.size() > 1) {
3690*b5893f02SDimitry Andric     SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
3691*b5893f02SDimitry Andric     GenerateFormula(SE.getAddExpr(OpsCopy));
3692f22ef01cSRoman Divacky   }
3693*b5893f02SDimitry Andric 
3694*b5893f02SDimitry Andric   // If we have an unfolded offset, generate a formula combining it with the
3695*b5893f02SDimitry Andric   // registers collected.
3696*b5893f02SDimitry Andric   if (NewBase.UnfoldedOffset) {
3697*b5893f02SDimitry Andric     assert(CombinedIntegerType && "Missing a type for the unfolded offset");
3698*b5893f02SDimitry Andric     Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
3699*b5893f02SDimitry Andric                                  true));
3700*b5893f02SDimitry Andric     NewBase.UnfoldedOffset = 0;
3701*b5893f02SDimitry Andric     GenerateFormula(SE.getAddExpr(Ops));
3702f22ef01cSRoman Divacky   }
3703f22ef01cSRoman Divacky }
3704f22ef01cSRoman Divacky 
37054ba319b5SDimitry Andric /// Helper function for LSRInstance::GenerateSymbolicOffsets.
GenerateSymbolicOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,size_t Idx,bool IsScaledReg)370691bc56edSDimitry Andric void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
370791bc56edSDimitry Andric                                               const Formula &Base, size_t Idx,
370891bc56edSDimitry Andric                                               bool IsScaledReg) {
370991bc56edSDimitry Andric   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
371091bc56edSDimitry Andric   GlobalValue *GV = ExtractSymbol(G, SE);
371191bc56edSDimitry Andric   if (G->isZero() || !GV)
371291bc56edSDimitry Andric     return;
371391bc56edSDimitry Andric   Formula F = Base;
371491bc56edSDimitry Andric   F.BaseGV = GV;
371591bc56edSDimitry Andric   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
371691bc56edSDimitry Andric     return;
371791bc56edSDimitry Andric   if (IsScaledReg)
371891bc56edSDimitry Andric     F.ScaledReg = G;
371991bc56edSDimitry Andric   else
372091bc56edSDimitry Andric     F.BaseRegs[Idx] = G;
372191bc56edSDimitry Andric   (void)InsertFormula(LU, LUIdx, F);
372291bc56edSDimitry Andric }
372391bc56edSDimitry Andric 
37247d523365SDimitry Andric /// Generate reuse formulae using symbolic offsets.
GenerateSymbolicOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)3725f22ef01cSRoman Divacky void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3726f22ef01cSRoman Divacky                                           Formula Base) {
3727f22ef01cSRoman Divacky   // We can't add a symbolic offset if the address already contains one.
3728139f7f9bSDimitry Andric   if (Base.BaseGV) return;
3729f22ef01cSRoman Divacky 
373091bc56edSDimitry Andric   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
373191bc56edSDimitry Andric     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
373291bc56edSDimitry Andric   if (Base.Scale == 1)
373391bc56edSDimitry Andric     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
373491bc56edSDimitry Andric                                 /* IsScaledReg */ true);
373591bc56edSDimitry Andric }
373691bc56edSDimitry Andric 
37374ba319b5SDimitry Andric /// Helper function for LSRInstance::GenerateConstantOffsets.
GenerateConstantOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,const SmallVectorImpl<int64_t> & Worklist,size_t Idx,bool IsScaledReg)373891bc56edSDimitry Andric void LSRInstance::GenerateConstantOffsetsImpl(
373991bc56edSDimitry Andric     LSRUse &LU, unsigned LUIdx, const Formula &Base,
374091bc56edSDimitry Andric     const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
374191bc56edSDimitry Andric   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3742ff0cc061SDimitry Andric   for (int64_t Offset : Worklist) {
3743f22ef01cSRoman Divacky     Formula F = Base;
3744ff0cc061SDimitry Andric     F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3745ff0cc061SDimitry Andric     if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
374691bc56edSDimitry Andric                    LU.AccessTy, F)) {
374791bc56edSDimitry Andric       // Add the offset to the base register.
3748ff0cc061SDimitry Andric       const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
374991bc56edSDimitry Andric       // If it cancelled out, drop the base register, otherwise update it.
375091bc56edSDimitry Andric       if (NewG->isZero()) {
375191bc56edSDimitry Andric         if (IsScaledReg) {
375291bc56edSDimitry Andric           F.Scale = 0;
375391bc56edSDimitry Andric           F.ScaledReg = nullptr;
375491bc56edSDimitry Andric         } else
37557d523365SDimitry Andric           F.deleteBaseReg(F.BaseRegs[Idx]);
37567a7e6055SDimitry Andric         F.canonicalize(*L);
375791bc56edSDimitry Andric       } else if (IsScaledReg)
375891bc56edSDimitry Andric         F.ScaledReg = NewG;
375991bc56edSDimitry Andric       else
376091bc56edSDimitry Andric         F.BaseRegs[Idx] = NewG;
376191bc56edSDimitry Andric 
3762f22ef01cSRoman Divacky       (void)InsertFormula(LU, LUIdx, F);
3763f22ef01cSRoman Divacky     }
3764f22ef01cSRoman Divacky   }
3765f22ef01cSRoman Divacky 
376691bc56edSDimitry Andric   int64_t Imm = ExtractImmediate(G, SE);
376791bc56edSDimitry Andric   if (G->isZero() || Imm == 0)
376891bc56edSDimitry Andric     return;
376991bc56edSDimitry Andric   Formula F = Base;
377091bc56edSDimitry Andric   F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
377191bc56edSDimitry Andric   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
377291bc56edSDimitry Andric     return;
377391bc56edSDimitry Andric   if (IsScaledReg)
377491bc56edSDimitry Andric     F.ScaledReg = G;
377591bc56edSDimitry Andric   else
377691bc56edSDimitry Andric     F.BaseRegs[Idx] = G;
377791bc56edSDimitry Andric   (void)InsertFormula(LU, LUIdx, F);
377891bc56edSDimitry Andric }
377991bc56edSDimitry Andric 
3780f22ef01cSRoman Divacky /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
GenerateConstantOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)3781f22ef01cSRoman Divacky void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3782f22ef01cSRoman Divacky                                           Formula Base) {
3783f22ef01cSRoman Divacky   // TODO: For now, just add the min and max offset, because it usually isn't
3784f22ef01cSRoman Divacky   // worthwhile looking at everything inbetween.
3785ffd1746dSEd Schouten   SmallVector<int64_t, 2> Worklist;
3786f22ef01cSRoman Divacky   Worklist.push_back(LU.MinOffset);
3787f22ef01cSRoman Divacky   if (LU.MaxOffset != LU.MinOffset)
3788f22ef01cSRoman Divacky     Worklist.push_back(LU.MaxOffset);
3789f22ef01cSRoman Divacky 
379091bc56edSDimitry Andric   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
379191bc56edSDimitry Andric     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
379291bc56edSDimitry Andric   if (Base.Scale == 1)
379391bc56edSDimitry Andric     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
379491bc56edSDimitry Andric                                 /* IsScaledReg */ true);
3795f22ef01cSRoman Divacky }
3796f22ef01cSRoman Divacky 
37977d523365SDimitry Andric /// For ICmpZero, check to see if we can scale up the comparison. For example, x
37987d523365SDimitry Andric /// == y -> x*c == y*c.
GenerateICmpZeroScales(LSRUse & LU,unsigned LUIdx,Formula Base)3799f22ef01cSRoman Divacky void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3800f22ef01cSRoman Divacky                                          Formula Base) {
3801f22ef01cSRoman Divacky   if (LU.Kind != LSRUse::ICmpZero) return;
3802f22ef01cSRoman Divacky 
3803f22ef01cSRoman Divacky   // Determine the integer type for the base formula.
38046122f3e6SDimitry Andric   Type *IntTy = Base.getType();
3805f22ef01cSRoman Divacky   if (!IntTy) return;
3806f22ef01cSRoman Divacky   if (SE.getTypeSizeInBits(IntTy) > 64) return;
3807f22ef01cSRoman Divacky 
3808f22ef01cSRoman Divacky   // Don't do this if there is more than one offset.
3809f22ef01cSRoman Divacky   if (LU.MinOffset != LU.MaxOffset) return;
3810f22ef01cSRoman Divacky 
38112cab237bSDimitry Andric   // Check if transformation is valid. It is illegal to multiply pointer.
38122cab237bSDimitry Andric   if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
38132cab237bSDimitry Andric     return;
38142cab237bSDimitry Andric   for (const SCEV *BaseReg : Base.BaseRegs)
38152cab237bSDimitry Andric     if (BaseReg->getType()->isPointerTy())
38162cab237bSDimitry Andric       return;
3817139f7f9bSDimitry Andric   assert(!Base.BaseGV && "ICmpZero use is not legal!");
3818f22ef01cSRoman Divacky 
3819f22ef01cSRoman Divacky   // Check each interesting stride.
3820ff0cc061SDimitry Andric   for (int64_t Factor : Factors) {
3821f22ef01cSRoman Divacky     // Check that the multiplication doesn't overflow.
38222cab237bSDimitry Andric     if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
3823f22ef01cSRoman Divacky       continue;
3824139f7f9bSDimitry Andric     int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3825139f7f9bSDimitry Andric     if (NewBaseOffset / Factor != Base.BaseOffset)
3826f22ef01cSRoman Divacky       continue;
382785d60e68SDimitry Andric     // If the offset will be truncated at this use, check that it is in bounds.
382885d60e68SDimitry Andric     if (!IntTy->isPointerTy() &&
382985d60e68SDimitry Andric         !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
383085d60e68SDimitry Andric       continue;
3831f22ef01cSRoman Divacky 
3832f22ef01cSRoman Divacky     // Check that multiplying with the use offset doesn't overflow.
3833f22ef01cSRoman Divacky     int64_t Offset = LU.MinOffset;
38342cab237bSDimitry Andric     if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
3835f22ef01cSRoman Divacky       continue;
3836f22ef01cSRoman Divacky     Offset = (uint64_t)Offset * Factor;
3837f22ef01cSRoman Divacky     if (Offset / Factor != LU.MinOffset)
3838f22ef01cSRoman Divacky       continue;
383985d60e68SDimitry Andric     // If the offset will be truncated at this use, check that it is in bounds.
384085d60e68SDimitry Andric     if (!IntTy->isPointerTy() &&
384185d60e68SDimitry Andric         !ConstantInt::isValueValidForType(IntTy, Offset))
384285d60e68SDimitry Andric       continue;
3843f22ef01cSRoman Divacky 
3844ffd1746dSEd Schouten     Formula F = Base;
3845139f7f9bSDimitry Andric     F.BaseOffset = NewBaseOffset;
3846ffd1746dSEd Schouten 
3847f22ef01cSRoman Divacky     // Check that this scale is legal.
3848139f7f9bSDimitry Andric     if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3849f22ef01cSRoman Divacky       continue;
3850f22ef01cSRoman Divacky 
3851f22ef01cSRoman Divacky     // Compensate for the use having MinOffset built into it.
3852139f7f9bSDimitry Andric     F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3853f22ef01cSRoman Divacky 
3854f22ef01cSRoman Divacky     const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3855f22ef01cSRoman Divacky 
3856f22ef01cSRoman Divacky     // Check that multiplying with each base register doesn't overflow.
3857f22ef01cSRoman Divacky     for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3858f22ef01cSRoman Divacky       F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3859f22ef01cSRoman Divacky       if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3860f22ef01cSRoman Divacky         goto next;
3861f22ef01cSRoman Divacky     }
3862f22ef01cSRoman Divacky 
3863f22ef01cSRoman Divacky     // Check that multiplying with the scaled register doesn't overflow.
3864f22ef01cSRoman Divacky     if (F.ScaledReg) {
3865f22ef01cSRoman Divacky       F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3866f22ef01cSRoman Divacky       if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3867f22ef01cSRoman Divacky         continue;
3868f22ef01cSRoman Divacky     }
3869f22ef01cSRoman Divacky 
3870bd5abe19SDimitry Andric     // Check that multiplying with the unfolded offset doesn't overflow.
3871bd5abe19SDimitry Andric     if (F.UnfoldedOffset != 0) {
38722cab237bSDimitry Andric       if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
38732cab237bSDimitry Andric           Factor == -1)
3874bd5abe19SDimitry Andric         continue;
3875bd5abe19SDimitry Andric       F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3876bd5abe19SDimitry Andric       if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3877bd5abe19SDimitry Andric         continue;
387885d60e68SDimitry Andric       // If the offset will be truncated, check that it is in bounds.
387985d60e68SDimitry Andric       if (!IntTy->isPointerTy() &&
388085d60e68SDimitry Andric           !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
388185d60e68SDimitry Andric         continue;
3882bd5abe19SDimitry Andric     }
3883bd5abe19SDimitry Andric 
3884f22ef01cSRoman Divacky     // If we make it here and it's legal, add it.
3885f22ef01cSRoman Divacky     (void)InsertFormula(LU, LUIdx, F);
3886f22ef01cSRoman Divacky   next:;
3887f22ef01cSRoman Divacky   }
3888f22ef01cSRoman Divacky }
3889f22ef01cSRoman Divacky 
38907d523365SDimitry Andric /// Generate stride factor reuse formulae by making use of scaled-offset address
38917d523365SDimitry Andric /// modes, for example.
GenerateScales(LSRUse & LU,unsigned LUIdx,Formula Base)3892f22ef01cSRoman Divacky void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3893f22ef01cSRoman Divacky   // Determine the integer type for the base formula.
38946122f3e6SDimitry Andric   Type *IntTy = Base.getType();
3895f22ef01cSRoman Divacky   if (!IntTy) return;
3896f22ef01cSRoman Divacky 
3897f22ef01cSRoman Divacky   // If this Formula already has a scaled register, we can't add another one.
389891bc56edSDimitry Andric   // Try to unscale the formula to generate a better scale.
38997d523365SDimitry Andric   if (Base.Scale != 0 && !Base.unscale())
390091bc56edSDimitry Andric     return;
390191bc56edSDimitry Andric 
39027d523365SDimitry Andric   assert(Base.Scale == 0 && "unscale did not did its job!");
3903f22ef01cSRoman Divacky 
3904f22ef01cSRoman Divacky   // Check each interesting stride.
3905ff0cc061SDimitry Andric   for (int64_t Factor : Factors) {
3906139f7f9bSDimitry Andric     Base.Scale = Factor;
3907139f7f9bSDimitry Andric     Base.HasBaseReg = Base.BaseRegs.size() > 1;
3908f22ef01cSRoman Divacky     // Check whether this scale is going to be legal.
3909139f7f9bSDimitry Andric     if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3910139f7f9bSDimitry Andric                     Base)) {
3911f22ef01cSRoman Divacky       // As a special-case, handle special out-of-loop Basic users specially.
3912f22ef01cSRoman Divacky       // TODO: Reconsider this special case.
3913f22ef01cSRoman Divacky       if (LU.Kind == LSRUse::Basic &&
3914139f7f9bSDimitry Andric           isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3915139f7f9bSDimitry Andric                      LU.AccessTy, Base) &&
3916f22ef01cSRoman Divacky           LU.AllFixupsOutsideLoop)
3917f22ef01cSRoman Divacky         LU.Kind = LSRUse::Special;
3918f22ef01cSRoman Divacky       else
3919f22ef01cSRoman Divacky         continue;
3920f22ef01cSRoman Divacky     }
3921f22ef01cSRoman Divacky     // For an ICmpZero, negating a solitary base register won't lead to
3922f22ef01cSRoman Divacky     // new solutions.
3923f22ef01cSRoman Divacky     if (LU.Kind == LSRUse::ICmpZero &&
3924139f7f9bSDimitry Andric         !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3925f22ef01cSRoman Divacky       continue;
39267a7e6055SDimitry Andric     // For each addrec base reg, if its loop is current loop, apply the scale.
39277a7e6055SDimitry Andric     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
39287a7e6055SDimitry Andric       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
39297a7e6055SDimitry Andric       if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
3930f22ef01cSRoman Divacky         const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3931f22ef01cSRoman Divacky         if (FactorS->isZero())
3932f22ef01cSRoman Divacky           continue;
3933f22ef01cSRoman Divacky         // Divide out the factor, ignoring high bits, since we'll be
3934f22ef01cSRoman Divacky         // scaling the value back up in the end.
3935f22ef01cSRoman Divacky         if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3936f22ef01cSRoman Divacky           // TODO: This could be optimized to avoid all the copying.
3937f22ef01cSRoman Divacky           Formula F = Base;
3938f22ef01cSRoman Divacky           F.ScaledReg = Quotient;
39397d523365SDimitry Andric           F.deleteBaseReg(F.BaseRegs[i]);
394091bc56edSDimitry Andric           // The canonical representation of 1*reg is reg, which is already in
394191bc56edSDimitry Andric           // Base. In that case, do not try to insert the formula, it will be
394291bc56edSDimitry Andric           // rejected anyway.
39437a7e6055SDimitry Andric           if (F.Scale == 1 && (F.BaseRegs.empty() ||
39447a7e6055SDimitry Andric                                (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
394591bc56edSDimitry Andric             continue;
39467a7e6055SDimitry Andric           // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
39477a7e6055SDimitry Andric           // non canonical Formula with ScaledReg's loop not being L.
39487a7e6055SDimitry Andric           if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
39497a7e6055SDimitry Andric             F.canonicalize(*L);
3950f22ef01cSRoman Divacky           (void)InsertFormula(LU, LUIdx, F);
3951f22ef01cSRoman Divacky         }
3952f22ef01cSRoman Divacky       }
3953f22ef01cSRoman Divacky     }
3954f22ef01cSRoman Divacky   }
39557a7e6055SDimitry Andric }
3956f22ef01cSRoman Divacky 
39577d523365SDimitry Andric /// Generate reuse formulae from different IV types.
GenerateTruncates(LSRUse & LU,unsigned LUIdx,Formula Base)3958f22ef01cSRoman Divacky void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3959f22ef01cSRoman Divacky   // Don't bother truncating symbolic values.
3960139f7f9bSDimitry Andric   if (Base.BaseGV) return;
3961f22ef01cSRoman Divacky 
3962f22ef01cSRoman Divacky   // Determine the integer type for the base formula.
39636122f3e6SDimitry Andric   Type *DstTy = Base.getType();
3964f22ef01cSRoman Divacky   if (!DstTy) return;
3965f22ef01cSRoman Divacky   DstTy = SE.getEffectiveSCEVType(DstTy);
3966f22ef01cSRoman Divacky 
3967ff0cc061SDimitry Andric   for (Type *SrcTy : Types) {
3968139f7f9bSDimitry Andric     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3969f22ef01cSRoman Divacky       Formula F = Base;
3970f22ef01cSRoman Divacky 
3971ff0cc061SDimitry Andric       if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
3972ff0cc061SDimitry Andric       for (const SCEV *&BaseReg : F.BaseRegs)
3973ff0cc061SDimitry Andric         BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
3974f22ef01cSRoman Divacky 
3975f22ef01cSRoman Divacky       // TODO: This assumes we've done basic processing on all uses and
3976f22ef01cSRoman Divacky       // have an idea what the register usage is.
3977f22ef01cSRoman Divacky       if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3978f22ef01cSRoman Divacky         continue;
3979f22ef01cSRoman Divacky 
3980d8866befSDimitry Andric       F.canonicalize(*L);
3981f22ef01cSRoman Divacky       (void)InsertFormula(LU, LUIdx, F);
3982f22ef01cSRoman Divacky     }
3983f22ef01cSRoman Divacky   }
3984f22ef01cSRoman Divacky }
3985f22ef01cSRoman Divacky 
3986f22ef01cSRoman Divacky namespace {
3987f22ef01cSRoman Divacky 
39887d523365SDimitry Andric /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
39897d523365SDimitry Andric /// modifications so that the search phase doesn't have to worry about the data
39907d523365SDimitry Andric /// structures moving underneath it.
3991f22ef01cSRoman Divacky struct WorkItem {
3992f22ef01cSRoman Divacky   size_t LUIdx;
3993f22ef01cSRoman Divacky   int64_t Imm;
3994f22ef01cSRoman Divacky   const SCEV *OrigReg;
3995f22ef01cSRoman Divacky 
WorkItem__anon244971d90b11::WorkItem3996f22ef01cSRoman Divacky   WorkItem(size_t LI, int64_t I, const SCEV *R)
3997f22ef01cSRoman Divacky       : LUIdx(LI), Imm(I), OrigReg(R) {}
3998f22ef01cSRoman Divacky 
3999f22ef01cSRoman Divacky   void print(raw_ostream &OS) const;
4000f22ef01cSRoman Divacky   void dump() const;
4001f22ef01cSRoman Divacky };
4002f22ef01cSRoman Divacky 
4003d88c1a5aSDimitry Andric } // end anonymous namespace
4004f22ef01cSRoman Divacky 
40052cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const4006f22ef01cSRoman Divacky void WorkItem::print(raw_ostream &OS) const {
4007f22ef01cSRoman Divacky   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
4008f22ef01cSRoman Divacky      << " , add offset " << Imm;
4009f22ef01cSRoman Divacky }
4010f22ef01cSRoman Divacky 
dump() const40117a7e6055SDimitry Andric LLVM_DUMP_METHOD void WorkItem::dump() const {
4012f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
4013f22ef01cSRoman Divacky }
40147a7e6055SDimitry Andric #endif
4015f22ef01cSRoman Divacky 
40167d523365SDimitry Andric /// Look for registers which are a constant distance apart and try to form reuse
40177d523365SDimitry Andric /// opportunities between them.
GenerateCrossUseConstantOffsets()4018f22ef01cSRoman Divacky void LSRInstance::GenerateCrossUseConstantOffsets() {
4019f22ef01cSRoman Divacky   // Group the registers by their value without any added constant offset.
40202cab237bSDimitry Andric   using ImmMapTy = std::map<int64_t, const SCEV *>;
40212cab237bSDimitry Andric 
4022ff0cc061SDimitry Andric   DenseMap<const SCEV *, ImmMapTy> Map;
4023f22ef01cSRoman Divacky   DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4024f22ef01cSRoman Divacky   SmallVector<const SCEV *, 8> Sequence;
4025ff0cc061SDimitry Andric   for (const SCEV *Use : RegUses) {
4026ff0cc061SDimitry Andric     const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
4027f22ef01cSRoman Divacky     int64_t Imm = ExtractImmediate(Reg, SE);
4028ff0cc061SDimitry Andric     auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
4029f22ef01cSRoman Divacky     if (Pair.second)
4030f22ef01cSRoman Divacky       Sequence.push_back(Reg);
4031ff0cc061SDimitry Andric     Pair.first->second.insert(std::make_pair(Imm, Use));
4032ff0cc061SDimitry Andric     UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
4033f22ef01cSRoman Divacky   }
4034f22ef01cSRoman Divacky 
4035f22ef01cSRoman Divacky   // Now examine each set of registers with the same base value. Build up
4036f22ef01cSRoman Divacky   // a list of work to do and do the work in a separate step so that we're
4037f22ef01cSRoman Divacky   // not adding formulae and register counts while we're searching.
4038f22ef01cSRoman Divacky   SmallVector<WorkItem, 32> WorkItems;
4039f22ef01cSRoman Divacky   SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
4040ff0cc061SDimitry Andric   for (const SCEV *Reg : Sequence) {
4041f22ef01cSRoman Divacky     const ImmMapTy &Imms = Map.find(Reg)->second;
4042f22ef01cSRoman Divacky 
4043f22ef01cSRoman Divacky     // It's not worthwhile looking for reuse if there's only one offset.
4044f22ef01cSRoman Divacky     if (Imms.size() == 1)
4045f22ef01cSRoman Divacky       continue;
4046f22ef01cSRoman Divacky 
40474ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
40484ba319b5SDimitry Andric                for (const auto &Entry
40494ba319b5SDimitry Andric                     : Imms) dbgs()
40504ba319b5SDimitry Andric                << ' ' << Entry.first;
4051f22ef01cSRoman Divacky                dbgs() << '\n');
4052f22ef01cSRoman Divacky 
4053f22ef01cSRoman Divacky     // Examine each offset.
4054f22ef01cSRoman Divacky     for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4055f22ef01cSRoman Divacky          J != JE; ++J) {
4056f22ef01cSRoman Divacky       const SCEV *OrigReg = J->second;
4057f22ef01cSRoman Divacky 
4058f22ef01cSRoman Divacky       int64_t JImm = J->first;
4059f22ef01cSRoman Divacky       const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4060f22ef01cSRoman Divacky 
4061f22ef01cSRoman Divacky       if (!isa<SCEVConstant>(OrigReg) &&
4062f22ef01cSRoman Divacky           UsedByIndicesMap[Reg].count() == 1) {
40634ba319b5SDimitry Andric         LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
40644ba319b5SDimitry Andric                           << '\n');
4065f22ef01cSRoman Divacky         continue;
4066f22ef01cSRoman Divacky       }
4067f22ef01cSRoman Divacky 
4068f22ef01cSRoman Divacky       // Conservatively examine offsets between this orig reg a few selected
4069f22ef01cSRoman Divacky       // other orig regs.
4070f22ef01cSRoman Divacky       ImmMapTy::const_iterator OtherImms[] = {
407191bc56edSDimitry Andric         Imms.begin(), std::prev(Imms.end()),
407291bc56edSDimitry Andric         Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
407391bc56edSDimitry Andric                          2)
4074f22ef01cSRoman Divacky       };
4075f22ef01cSRoman Divacky       for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
4076f22ef01cSRoman Divacky         ImmMapTy::const_iterator M = OtherImms[i];
4077f22ef01cSRoman Divacky         if (M == J || M == JE) continue;
4078f22ef01cSRoman Divacky 
4079f22ef01cSRoman Divacky         // Compute the difference between the two.
4080f22ef01cSRoman Divacky         int64_t Imm = (uint64_t)JImm - M->first;
408160ff8e32SDimitry Andric         for (unsigned LUIdx : UsedByIndices.set_bits())
4082f22ef01cSRoman Divacky           // Make a memo of this use, offset, and register tuple.
408339d628a0SDimitry Andric           if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
4084f22ef01cSRoman Divacky             WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
4085f22ef01cSRoman Divacky       }
4086f22ef01cSRoman Divacky     }
4087f22ef01cSRoman Divacky   }
4088f22ef01cSRoman Divacky 
4089f22ef01cSRoman Divacky   Map.clear();
4090f22ef01cSRoman Divacky   Sequence.clear();
4091f22ef01cSRoman Divacky   UsedByIndicesMap.clear();
4092f22ef01cSRoman Divacky   UniqueItems.clear();
4093f22ef01cSRoman Divacky 
4094f22ef01cSRoman Divacky   // Now iterate through the worklist and add new formulae.
4095ff0cc061SDimitry Andric   for (const WorkItem &WI : WorkItems) {
4096f22ef01cSRoman Divacky     size_t LUIdx = WI.LUIdx;
4097f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
4098f22ef01cSRoman Divacky     int64_t Imm = WI.Imm;
4099f22ef01cSRoman Divacky     const SCEV *OrigReg = WI.OrigReg;
4100f22ef01cSRoman Divacky 
41016122f3e6SDimitry Andric     Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
4102f22ef01cSRoman Divacky     const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
4103f22ef01cSRoman Divacky     unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
4104f22ef01cSRoman Divacky 
4105f22ef01cSRoman Divacky     // TODO: Use a more targeted data structure.
4106f22ef01cSRoman Divacky     for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
410791bc56edSDimitry Andric       Formula F = LU.Formulae[L];
410891bc56edSDimitry Andric       // FIXME: The code for the scaled and unscaled registers looks
410991bc56edSDimitry Andric       // very similar but slightly different. Investigate if they
411091bc56edSDimitry Andric       // could be merged. That way, we would not have to unscale the
411191bc56edSDimitry Andric       // Formula.
41127d523365SDimitry Andric       F.unscale();
4113f22ef01cSRoman Divacky       // Use the immediate in the scaled register.
4114f22ef01cSRoman Divacky       if (F.ScaledReg == OrigReg) {
4115139f7f9bSDimitry Andric         int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
4116f22ef01cSRoman Divacky         // Don't create 50 + reg(-50).
4117f22ef01cSRoman Divacky         if (F.referencesReg(SE.getSCEV(
4118139f7f9bSDimitry Andric                    ConstantInt::get(IntTy, -(uint64_t)Offset))))
4119f22ef01cSRoman Divacky           continue;
4120f22ef01cSRoman Divacky         Formula NewF = F;
4121139f7f9bSDimitry Andric         NewF.BaseOffset = Offset;
4122139f7f9bSDimitry Andric         if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4123139f7f9bSDimitry Andric                         NewF))
4124f22ef01cSRoman Divacky           continue;
4125f22ef01cSRoman Divacky         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
4126f22ef01cSRoman Divacky 
4127f22ef01cSRoman Divacky         // If the new scale is a constant in a register, and adding the constant
4128f22ef01cSRoman Divacky         // value to the immediate would produce a value closer to zero than the
4129f22ef01cSRoman Divacky         // immediate itself, then the formula isn't worthwhile.
4130f22ef01cSRoman Divacky         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
41317d523365SDimitry Andric           if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
41327d523365SDimitry Andric               (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
4133ff0cc061SDimitry Andric                   .ule(std::abs(NewF.BaseOffset)))
4134f22ef01cSRoman Divacky             continue;
4135f22ef01cSRoman Divacky 
4136f22ef01cSRoman Divacky         // OK, looks good.
41377a7e6055SDimitry Andric         NewF.canonicalize(*this->L);
4138f22ef01cSRoman Divacky         (void)InsertFormula(LU, LUIdx, NewF);
4139f22ef01cSRoman Divacky       } else {
4140f22ef01cSRoman Divacky         // Use the immediate in a base register.
4141f22ef01cSRoman Divacky         for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
4142f22ef01cSRoman Divacky           const SCEV *BaseReg = F.BaseRegs[N];
4143f22ef01cSRoman Divacky           if (BaseReg != OrigReg)
4144f22ef01cSRoman Divacky             continue;
4145f22ef01cSRoman Divacky           Formula NewF = F;
4146139f7f9bSDimitry Andric           NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
4147139f7f9bSDimitry Andric           if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
4148139f7f9bSDimitry Andric                           LU.Kind, LU.AccessTy, NewF)) {
41494ba319b5SDimitry Andric             if (TTI.shouldFavorPostInc() &&
41504ba319b5SDimitry Andric                 mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
41514ba319b5SDimitry Andric               continue;
4152139f7f9bSDimitry Andric             if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
4153f22ef01cSRoman Divacky               continue;
4154bd5abe19SDimitry Andric             NewF = F;
4155bd5abe19SDimitry Andric             NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
4156bd5abe19SDimitry Andric           }
4157f22ef01cSRoman Divacky           NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
4158f22ef01cSRoman Divacky 
4159f22ef01cSRoman Divacky           // If the new formula has a constant in a register, and adding the
4160f22ef01cSRoman Divacky           // constant value to the immediate would produce a value closer to
4161f22ef01cSRoman Divacky           // zero than the immediate itself, then the formula isn't worthwhile.
4162ff0cc061SDimitry Andric           for (const SCEV *NewReg : NewF.BaseRegs)
4163ff0cc061SDimitry Andric             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
41647d523365SDimitry Andric               if ((C->getAPInt() + NewF.BaseOffset)
41657d523365SDimitry Andric                       .abs()
41667d523365SDimitry Andric                       .slt(std::abs(NewF.BaseOffset)) &&
41677d523365SDimitry Andric                   (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
4168f785676fSDimitry Andric                       countTrailingZeros<uint64_t>(NewF.BaseOffset))
4169f22ef01cSRoman Divacky                 goto skip_formula;
4170f22ef01cSRoman Divacky 
4171f22ef01cSRoman Divacky           // Ok, looks good.
41727a7e6055SDimitry Andric           NewF.canonicalize(*this->L);
4173f22ef01cSRoman Divacky           (void)InsertFormula(LU, LUIdx, NewF);
4174f22ef01cSRoman Divacky           break;
4175f22ef01cSRoman Divacky         skip_formula:;
4176f22ef01cSRoman Divacky         }
4177f22ef01cSRoman Divacky       }
4178f22ef01cSRoman Divacky     }
4179f22ef01cSRoman Divacky   }
4180f22ef01cSRoman Divacky }
4181f22ef01cSRoman Divacky 
41827d523365SDimitry Andric /// Generate formulae for each use.
4183f22ef01cSRoman Divacky void
GenerateAllReuseFormulae()4184f22ef01cSRoman Divacky LSRInstance::GenerateAllReuseFormulae() {
4185f22ef01cSRoman Divacky   // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4186f22ef01cSRoman Divacky   // queries are more precise.
4187f22ef01cSRoman Divacky   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4188f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
4189f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4190f22ef01cSRoman Divacky       GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4191f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4192f22ef01cSRoman Divacky       GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4193f22ef01cSRoman Divacky   }
4194f22ef01cSRoman Divacky   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4195f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
4196f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4197f22ef01cSRoman Divacky       GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4198f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4199f22ef01cSRoman Divacky       GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4200f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4201f22ef01cSRoman Divacky       GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4202f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4203f22ef01cSRoman Divacky       GenerateScales(LU, LUIdx, LU.Formulae[i]);
4204f22ef01cSRoman Divacky   }
4205f22ef01cSRoman Divacky   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4206f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
4207f22ef01cSRoman Divacky     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4208f22ef01cSRoman Divacky       GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4209f22ef01cSRoman Divacky   }
4210f22ef01cSRoman Divacky 
4211f22ef01cSRoman Divacky   GenerateCrossUseConstantOffsets();
4212e580952dSDimitry Andric 
42134ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n"
4214e580952dSDimitry Andric                        "After generating reuse formulae:\n";
4215e580952dSDimitry Andric              print_uses(dbgs()));
4216f22ef01cSRoman Divacky }
4217f22ef01cSRoman Divacky 
42182754fe60SDimitry Andric /// If there are multiple formulae with the same set of registers used
4219f22ef01cSRoman Divacky /// by other uses, pick the best one and delete the others.
FilterOutUndesirableDedicatedRegisters()4220f22ef01cSRoman Divacky void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
42212754fe60SDimitry Andric   DenseSet<const SCEV *> VisitedRegs;
42222754fe60SDimitry Andric   SmallPtrSet<const SCEV *, 16> Regs;
4223dff0c46cSDimitry Andric   SmallPtrSet<const SCEV *, 16> LoserRegs;
4224f22ef01cSRoman Divacky #ifndef NDEBUG
4225f22ef01cSRoman Divacky   bool ChangedFormulae = false;
4226f22ef01cSRoman Divacky #endif
4227f22ef01cSRoman Divacky 
4228f22ef01cSRoman Divacky   // Collect the best formula for each unique set of shared registers. This
4229f22ef01cSRoman Divacky   // is reset for each use.
42302cab237bSDimitry Andric   using BestFormulaeTy =
42312cab237bSDimitry Andric       DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
42322cab237bSDimitry Andric 
4233f22ef01cSRoman Divacky   BestFormulaeTy BestFormulae;
4234f22ef01cSRoman Divacky 
4235f22ef01cSRoman Divacky   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4236f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
42374ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
42384ba319b5SDimitry Andric                dbgs() << '\n');
4239f22ef01cSRoman Divacky 
4240f22ef01cSRoman Divacky     bool Any = false;
4241f22ef01cSRoman Divacky     for (size_t FIdx = 0, NumForms = LU.Formulae.size();
4242f22ef01cSRoman Divacky          FIdx != NumForms; ++FIdx) {
4243f22ef01cSRoman Divacky       Formula &F = LU.Formulae[FIdx];
4244f22ef01cSRoman Divacky 
4245dff0c46cSDimitry Andric       // Some formulas are instant losers. For example, they may depend on
4246dff0c46cSDimitry Andric       // nonexistent AddRecs from other loops. These need to be filtered
4247dff0c46cSDimitry Andric       // immediately, otherwise heuristics could choose them over others leading
4248dff0c46cSDimitry Andric       // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4249dff0c46cSDimitry Andric       // avoids the need to recompute this information across formulae using the
4250dff0c46cSDimitry Andric       // same bad AddRec. Passing LoserRegs is also essential unless we remove
4251dff0c46cSDimitry Andric       // the corresponding bad register from the Regs set.
4252dff0c46cSDimitry Andric       Cost CostF;
4253dff0c46cSDimitry Andric       Regs.clear();
4254d88c1a5aSDimitry Andric       CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
4255dff0c46cSDimitry Andric       if (CostF.isLoser()) {
4256dff0c46cSDimitry Andric         // During initial formula generation, undesirable formulae are generated
4257dff0c46cSDimitry Andric         // by uses within other loops that have some non-trivial address mode or
4258dff0c46cSDimitry Andric         // use the postinc form of the IV. LSR needs to provide these formulae
4259dff0c46cSDimitry Andric         // as the basis of rediscovering the desired formula that uses an AddRec
4260dff0c46cSDimitry Andric         // corresponding to the existing phi. Once all formulae have been
4261dff0c46cSDimitry Andric         // generated, these initial losers may be pruned.
42624ba319b5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
4263dff0c46cSDimitry Andric                    dbgs() << "\n");
4264dff0c46cSDimitry Andric       }
4265dff0c46cSDimitry Andric       else {
4266139f7f9bSDimitry Andric         SmallVector<const SCEV *, 4> Key;
4267ff0cc061SDimitry Andric         for (const SCEV *Reg : F.BaseRegs) {
4268f22ef01cSRoman Divacky           if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4269f22ef01cSRoman Divacky             Key.push_back(Reg);
4270f22ef01cSRoman Divacky         }
4271f22ef01cSRoman Divacky         if (F.ScaledReg &&
4272f22ef01cSRoman Divacky             RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
4273f22ef01cSRoman Divacky           Key.push_back(F.ScaledReg);
4274f22ef01cSRoman Divacky         // Unstable sort by host order ok, because this is only used for
4275f22ef01cSRoman Divacky         // uniquifying.
4276*b5893f02SDimitry Andric         llvm::sort(Key);
4277f22ef01cSRoman Divacky 
4278f22ef01cSRoman Divacky         std::pair<BestFormulaeTy::const_iterator, bool> P =
4279f22ef01cSRoman Divacky           BestFormulae.insert(std::make_pair(Key, FIdx));
4280dff0c46cSDimitry Andric         if (P.second)
4281dff0c46cSDimitry Andric           continue;
4282dff0c46cSDimitry Andric 
4283f22ef01cSRoman Divacky         Formula &Best = LU.Formulae[P.first->second];
42842754fe60SDimitry Andric 
42852754fe60SDimitry Andric         Cost CostBest;
42862754fe60SDimitry Andric         Regs.clear();
4287d88c1a5aSDimitry Andric         CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
4288db17bf38SDimitry Andric         if (CostF.isLess(CostBest, TTI))
4289f22ef01cSRoman Divacky           std::swap(F, Best);
42904ba319b5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
4291f22ef01cSRoman Divacky                    dbgs() << "\n"
42924ba319b5SDimitry Andric                              "    in favor of formula ";
42934ba319b5SDimitry Andric                    Best.print(dbgs()); dbgs() << '\n');
4294dff0c46cSDimitry Andric       }
4295f22ef01cSRoman Divacky #ifndef NDEBUG
4296f22ef01cSRoman Divacky       ChangedFormulae = true;
4297f22ef01cSRoman Divacky #endif
4298f22ef01cSRoman Divacky       LU.DeleteFormula(F);
4299f22ef01cSRoman Divacky       --FIdx;
4300f22ef01cSRoman Divacky       --NumForms;
4301f22ef01cSRoman Divacky       Any = true;
4302f22ef01cSRoman Divacky     }
4303f22ef01cSRoman Divacky 
4304f22ef01cSRoman Divacky     // Now that we've filtered out some formulae, recompute the Regs set.
4305f22ef01cSRoman Divacky     if (Any)
4306f22ef01cSRoman Divacky       LU.RecomputeRegs(LUIdx, RegUses);
4307f22ef01cSRoman Divacky 
4308f22ef01cSRoman Divacky     // Reset this to prepare for the next use.
4309f22ef01cSRoman Divacky     BestFormulae.clear();
4310f22ef01cSRoman Divacky   }
4311f22ef01cSRoman Divacky 
43124ba319b5SDimitry Andric   LLVM_DEBUG(if (ChangedFormulae) {
4313f22ef01cSRoman Divacky     dbgs() << "\n"
4314f22ef01cSRoman Divacky               "After filtering out undesirable candidates:\n";
4315f22ef01cSRoman Divacky     print_uses(dbgs());
4316f22ef01cSRoman Divacky   });
4317f22ef01cSRoman Divacky }
4318f22ef01cSRoman Divacky 
43197d523365SDimitry Andric /// Estimate the worst-case number of solutions the solver might have to
43207d523365SDimitry Andric /// consider. It almost never considers this many solutions because it prune the
43217d523365SDimitry Andric /// search space, but the pruning isn't always sufficient.
EstimateSearchSpaceComplexity() const4322f22ef01cSRoman Divacky size_t LSRInstance::EstimateSearchSpaceComplexity() const {
43232754fe60SDimitry Andric   size_t Power = 1;
4324ff0cc061SDimitry Andric   for (const LSRUse &LU : Uses) {
4325ff0cc061SDimitry Andric     size_t FSize = LU.Formulae.size();
4326f22ef01cSRoman Divacky     if (FSize >= ComplexityLimit) {
4327f22ef01cSRoman Divacky       Power = ComplexityLimit;
4328f22ef01cSRoman Divacky       break;
4329f22ef01cSRoman Divacky     }
4330f22ef01cSRoman Divacky     Power *= FSize;
4331f22ef01cSRoman Divacky     if (Power >= ComplexityLimit)
4332f22ef01cSRoman Divacky       break;
4333f22ef01cSRoman Divacky   }
4334f22ef01cSRoman Divacky   return Power;
4335f22ef01cSRoman Divacky }
4336f22ef01cSRoman Divacky 
43377d523365SDimitry Andric /// When one formula uses a superset of the registers of another formula, it
43387d523365SDimitry Andric /// won't help reduce register pressure (though it may not necessarily hurt
43397d523365SDimitry Andric /// register pressure); remove it to simplify the system.
NarrowSearchSpaceByDetectingSupersets()4340e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4341f22ef01cSRoman Divacky   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
43424ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4343f22ef01cSRoman Divacky 
43444ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
4345f22ef01cSRoman Divacky                          "which use a superset of registers used by other "
4346f22ef01cSRoman Divacky                          "formulae.\n");
4347f22ef01cSRoman Divacky 
4348f22ef01cSRoman Divacky     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4349f22ef01cSRoman Divacky       LSRUse &LU = Uses[LUIdx];
4350f22ef01cSRoman Divacky       bool Any = false;
4351f22ef01cSRoman Divacky       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4352f22ef01cSRoman Divacky         Formula &F = LU.Formulae[i];
4353f22ef01cSRoman Divacky         // Look for a formula with a constant or GV in a register. If the use
4354f22ef01cSRoman Divacky         // also has a formula with that same value in an immediate field,
4355f22ef01cSRoman Divacky         // delete the one that uses a register.
4356f22ef01cSRoman Divacky         for (SmallVectorImpl<const SCEV *>::const_iterator
4357f22ef01cSRoman Divacky              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4358f22ef01cSRoman Divacky           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4359f22ef01cSRoman Divacky             Formula NewF = F;
4360139f7f9bSDimitry Andric             NewF.BaseOffset += C->getValue()->getSExtValue();
4361f22ef01cSRoman Divacky             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4362f22ef01cSRoman Divacky                                 (I - F.BaseRegs.begin()));
4363f22ef01cSRoman Divacky             if (LU.HasFormulaWithSameRegs(NewF)) {
43644ba319b5SDimitry Andric               LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
43654ba319b5SDimitry Andric                          dbgs() << '\n');
4366f22ef01cSRoman Divacky               LU.DeleteFormula(F);
4367f22ef01cSRoman Divacky               --i;
4368f22ef01cSRoman Divacky               --e;
4369f22ef01cSRoman Divacky               Any = true;
4370f22ef01cSRoman Divacky               break;
4371f22ef01cSRoman Divacky             }
4372f22ef01cSRoman Divacky           } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4373f22ef01cSRoman Divacky             if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4374139f7f9bSDimitry Andric               if (!F.BaseGV) {
4375f22ef01cSRoman Divacky                 Formula NewF = F;
4376139f7f9bSDimitry Andric                 NewF.BaseGV = GV;
4377f22ef01cSRoman Divacky                 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4378f22ef01cSRoman Divacky                                     (I - F.BaseRegs.begin()));
4379f22ef01cSRoman Divacky                 if (LU.HasFormulaWithSameRegs(NewF)) {
43804ba319b5SDimitry Andric                   LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
4381f22ef01cSRoman Divacky                              dbgs() << '\n');
4382f22ef01cSRoman Divacky                   LU.DeleteFormula(F);
4383f22ef01cSRoman Divacky                   --i;
4384f22ef01cSRoman Divacky                   --e;
4385f22ef01cSRoman Divacky                   Any = true;
4386f22ef01cSRoman Divacky                   break;
4387f22ef01cSRoman Divacky                 }
4388f22ef01cSRoman Divacky               }
4389f22ef01cSRoman Divacky           }
4390f22ef01cSRoman Divacky         }
4391f22ef01cSRoman Divacky       }
4392f22ef01cSRoman Divacky       if (Any)
4393f22ef01cSRoman Divacky         LU.RecomputeRegs(LUIdx, RegUses);
4394f22ef01cSRoman Divacky     }
4395f22ef01cSRoman Divacky 
43964ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4397f22ef01cSRoman Divacky   }
4398e580952dSDimitry Andric }
4399f22ef01cSRoman Divacky 
44007d523365SDimitry Andric /// When there are many registers for expressions like A, A+1, A+2, etc.,
44017d523365SDimitry Andric /// allocate a single register for them.
NarrowSearchSpaceByCollapsingUnrolledCode()4402e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4403139f7f9bSDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4404139f7f9bSDimitry Andric     return;
4405f22ef01cSRoman Divacky 
44064ba319b5SDimitry Andric   LLVM_DEBUG(
44074ba319b5SDimitry Andric       dbgs() << "The search space is too complex.\n"
4408139f7f9bSDimitry Andric                 "Narrowing the search space by assuming that uses separated "
4409139f7f9bSDimitry Andric                 "by a constant offset will use the same registers.\n");
4410f22ef01cSRoman Divacky 
4411f22ef01cSRoman Divacky   // This is especially useful for unrolled loops.
4412f22ef01cSRoman Divacky 
4413f22ef01cSRoman Divacky   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4414f22ef01cSRoman Divacky     LSRUse &LU = Uses[LUIdx];
4415ff0cc061SDimitry Andric     for (const Formula &F : LU.Formulae) {
441691bc56edSDimitry Andric       if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4417139f7f9bSDimitry Andric         continue;
4418139f7f9bSDimitry Andric 
4419139f7f9bSDimitry Andric       LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4420139f7f9bSDimitry Andric       if (!LUThatHas)
4421139f7f9bSDimitry Andric         continue;
4422139f7f9bSDimitry Andric 
4423139f7f9bSDimitry Andric       if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4424139f7f9bSDimitry Andric                               LU.Kind, LU.AccessTy))
4425139f7f9bSDimitry Andric         continue;
4426139f7f9bSDimitry Andric 
44274ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
4428f22ef01cSRoman Divacky 
4429f22ef01cSRoman Divacky       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4430f22ef01cSRoman Divacky 
4431d88c1a5aSDimitry Andric       // Transfer the fixups of LU to LUThatHas.
4432d88c1a5aSDimitry Andric       for (LSRFixup &Fixup : LU.Fixups) {
4433139f7f9bSDimitry Andric         Fixup.Offset += F.BaseOffset;
4434d88c1a5aSDimitry Andric         LUThatHas->pushFixup(Fixup);
44354ba319b5SDimitry Andric         LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
44362754fe60SDimitry Andric       }
44372754fe60SDimitry Andric 
4438f22ef01cSRoman Divacky       // Delete formulae from the new use which are no longer legal.
4439f22ef01cSRoman Divacky       bool Any = false;
4440f22ef01cSRoman Divacky       for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4441f22ef01cSRoman Divacky         Formula &F = LUThatHas->Formulae[i];
4442139f7f9bSDimitry Andric         if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4443139f7f9bSDimitry Andric                         LUThatHas->Kind, LUThatHas->AccessTy, F)) {
44444ba319b5SDimitry Andric           LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
4445f22ef01cSRoman Divacky           LUThatHas->DeleteFormula(F);
4446f22ef01cSRoman Divacky           --i;
4447f22ef01cSRoman Divacky           --e;
4448f22ef01cSRoman Divacky           Any = true;
4449f22ef01cSRoman Divacky         }
4450f22ef01cSRoman Divacky       }
4451139f7f9bSDimitry Andric 
4452f22ef01cSRoman Divacky       if (Any)
4453f22ef01cSRoman Divacky         LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4454f22ef01cSRoman Divacky 
4455f22ef01cSRoman Divacky       // Delete the old use.
44562754fe60SDimitry Andric       DeleteUse(LU, LUIdx);
4457f22ef01cSRoman Divacky       --LUIdx;
4458f22ef01cSRoman Divacky       --NumUses;
4459f22ef01cSRoman Divacky       break;
4460f22ef01cSRoman Divacky     }
4461f22ef01cSRoman Divacky   }
4462f22ef01cSRoman Divacky 
44634ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4464e580952dSDimitry Andric }
4465f22ef01cSRoman Divacky 
44667d523365SDimitry Andric /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4467e580952dSDimitry Andric /// we've done more filtering, as it may be able to find more formulae to
4468e580952dSDimitry Andric /// eliminate.
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters()4469e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4470e580952dSDimitry Andric   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
44714ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4472e580952dSDimitry Andric 
44734ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4474e580952dSDimitry Andric                          "undesirable dedicated registers.\n");
4475e580952dSDimitry Andric 
4476e580952dSDimitry Andric     FilterOutUndesirableDedicatedRegisters();
4477e580952dSDimitry Andric 
44784ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4479e580952dSDimitry Andric   }
4480e580952dSDimitry Andric }
4481e580952dSDimitry Andric 
4482c4394386SDimitry Andric /// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
4483c4394386SDimitry Andric /// Pick the best one and delete the others.
4484c4394386SDimitry Andric /// This narrowing heuristic is to keep as many formulae with different
4485c4394386SDimitry Andric /// Scale and ScaledReg pair as possible while narrowing the search space.
4486c4394386SDimitry Andric /// The benefit is that it is more likely to find out a better solution
4487c4394386SDimitry Andric /// from a formulae set with more Scale and ScaledReg variations than
4488c4394386SDimitry Andric /// a formulae set with the same Scale and ScaledReg. The picking winner
44894ba319b5SDimitry Andric /// reg heuristic will often keep the formulae with the same Scale and
4490c4394386SDimitry Andric /// ScaledReg and filter others, and we want to avoid that if possible.
NarrowSearchSpaceByFilterFormulaWithSameScaledReg()4491c4394386SDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4492c4394386SDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4493c4394386SDimitry Andric     return;
4494c4394386SDimitry Andric 
44954ba319b5SDimitry Andric   LLVM_DEBUG(
44964ba319b5SDimitry Andric       dbgs() << "The search space is too complex.\n"
4497c4394386SDimitry Andric                 "Narrowing the search space by choosing the best Formula "
4498c4394386SDimitry Andric                 "from the Formulae with the same Scale and ScaledReg.\n");
4499c4394386SDimitry Andric 
4500c4394386SDimitry Andric   // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
45012cab237bSDimitry Andric   using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
45022cab237bSDimitry Andric 
4503c4394386SDimitry Andric   BestFormulaeTy BestFormulae;
4504c4394386SDimitry Andric #ifndef NDEBUG
4505c4394386SDimitry Andric   bool ChangedFormulae = false;
4506c4394386SDimitry Andric #endif
4507c4394386SDimitry Andric   DenseSet<const SCEV *> VisitedRegs;
4508c4394386SDimitry Andric   SmallPtrSet<const SCEV *, 16> Regs;
4509c4394386SDimitry Andric 
4510c4394386SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4511c4394386SDimitry Andric     LSRUse &LU = Uses[LUIdx];
45124ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
45134ba319b5SDimitry Andric                dbgs() << '\n');
4514c4394386SDimitry Andric 
4515c4394386SDimitry Andric     // Return true if Formula FA is better than Formula FB.
4516c4394386SDimitry Andric     auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4517c4394386SDimitry Andric       // First we will try to choose the Formula with fewer new registers.
4518c4394386SDimitry Andric       // For a register used by current Formula, the more the register is
4519c4394386SDimitry Andric       // shared among LSRUses, the less we increase the register number
4520c4394386SDimitry Andric       // counter of the formula.
4521c4394386SDimitry Andric       size_t FARegNum = 0;
4522c4394386SDimitry Andric       for (const SCEV *Reg : FA.BaseRegs) {
4523c4394386SDimitry Andric         const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4524c4394386SDimitry Andric         FARegNum += (NumUses - UsedByIndices.count() + 1);
4525c4394386SDimitry Andric       }
4526c4394386SDimitry Andric       size_t FBRegNum = 0;
4527c4394386SDimitry Andric       for (const SCEV *Reg : FB.BaseRegs) {
4528c4394386SDimitry Andric         const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4529c4394386SDimitry Andric         FBRegNum += (NumUses - UsedByIndices.count() + 1);
4530c4394386SDimitry Andric       }
4531c4394386SDimitry Andric       if (FARegNum != FBRegNum)
4532c4394386SDimitry Andric         return FARegNum < FBRegNum;
4533c4394386SDimitry Andric 
4534c4394386SDimitry Andric       // If the new register numbers are the same, choose the Formula with
4535c4394386SDimitry Andric       // less Cost.
4536c4394386SDimitry Andric       Cost CostFA, CostFB;
4537c4394386SDimitry Andric       Regs.clear();
4538c4394386SDimitry Andric       CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
4539c4394386SDimitry Andric       Regs.clear();
4540c4394386SDimitry Andric       CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
4541c4394386SDimitry Andric       return CostFA.isLess(CostFB, TTI);
4542c4394386SDimitry Andric     };
4543c4394386SDimitry Andric 
4544c4394386SDimitry Andric     bool Any = false;
4545c4394386SDimitry Andric     for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4546c4394386SDimitry Andric          ++FIdx) {
4547c4394386SDimitry Andric       Formula &F = LU.Formulae[FIdx];
4548c4394386SDimitry Andric       if (!F.ScaledReg)
4549c4394386SDimitry Andric         continue;
4550c4394386SDimitry Andric       auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
4551c4394386SDimitry Andric       if (P.second)
4552c4394386SDimitry Andric         continue;
4553c4394386SDimitry Andric 
4554c4394386SDimitry Andric       Formula &Best = LU.Formulae[P.first->second];
4555c4394386SDimitry Andric       if (IsBetterThan(F, Best))
4556c4394386SDimitry Andric         std::swap(F, Best);
45574ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
4558c4394386SDimitry Andric                  dbgs() << "\n"
4559c4394386SDimitry Andric                            "    in favor of formula ";
4560c4394386SDimitry Andric                  Best.print(dbgs()); dbgs() << '\n');
4561c4394386SDimitry Andric #ifndef NDEBUG
4562c4394386SDimitry Andric       ChangedFormulae = true;
4563c4394386SDimitry Andric #endif
4564c4394386SDimitry Andric       LU.DeleteFormula(F);
4565c4394386SDimitry Andric       --FIdx;
4566c4394386SDimitry Andric       --NumForms;
4567c4394386SDimitry Andric       Any = true;
4568c4394386SDimitry Andric     }
4569c4394386SDimitry Andric     if (Any)
4570c4394386SDimitry Andric       LU.RecomputeRegs(LUIdx, RegUses);
4571c4394386SDimitry Andric 
4572c4394386SDimitry Andric     // Reset this to prepare for the next use.
4573c4394386SDimitry Andric     BestFormulae.clear();
4574c4394386SDimitry Andric   }
4575c4394386SDimitry Andric 
45764ba319b5SDimitry Andric   LLVM_DEBUG(if (ChangedFormulae) {
4577c4394386SDimitry Andric     dbgs() << "\n"
4578c4394386SDimitry Andric               "After filtering out undesirable candidates:\n";
4579c4394386SDimitry Andric     print_uses(dbgs());
4580c4394386SDimitry Andric   });
4581c4394386SDimitry Andric }
4582c4394386SDimitry Andric 
45837a7e6055SDimitry Andric /// The function delete formulas with high registers number expectation.
45847a7e6055SDimitry Andric /// Assuming we don't know the value of each formula (already delete
45857a7e6055SDimitry Andric /// all inefficient), generate probability of not selecting for each
45867a7e6055SDimitry Andric /// register.
45877a7e6055SDimitry Andric /// For example,
45887a7e6055SDimitry Andric /// Use1:
45897a7e6055SDimitry Andric ///  reg(a) + reg({0,+,1})
45907a7e6055SDimitry Andric ///  reg(a) + reg({-1,+,1}) + 1
45917a7e6055SDimitry Andric ///  reg({a,+,1})
45927a7e6055SDimitry Andric /// Use2:
45937a7e6055SDimitry Andric ///  reg(b) + reg({0,+,1})
45947a7e6055SDimitry Andric ///  reg(b) + reg({-1,+,1}) + 1
45957a7e6055SDimitry Andric ///  reg({b,+,1})
45967a7e6055SDimitry Andric /// Use3:
45977a7e6055SDimitry Andric ///  reg(c) + reg(b) + reg({0,+,1})
45987a7e6055SDimitry Andric ///  reg(c) + reg({b,+,1})
45997a7e6055SDimitry Andric ///
46007a7e6055SDimitry Andric /// Probability of not selecting
46017a7e6055SDimitry Andric ///                 Use1   Use2    Use3
46027a7e6055SDimitry Andric /// reg(a)         (1/3) *   1   *   1
46037a7e6055SDimitry Andric /// reg(b)           1   * (1/3) * (1/2)
46047a7e6055SDimitry Andric /// reg({0,+,1})   (2/3) * (2/3) * (1/2)
46057a7e6055SDimitry Andric /// reg({-1,+,1})  (2/3) * (2/3) *   1
46067a7e6055SDimitry Andric /// reg({a,+,1})   (2/3) *   1   *   1
46077a7e6055SDimitry Andric /// reg({b,+,1})     1   * (2/3) * (2/3)
46087a7e6055SDimitry Andric /// reg(c)           1   *   1   *   0
46097a7e6055SDimitry Andric ///
46107a7e6055SDimitry Andric /// Now count registers number mathematical expectation for each formula:
46117a7e6055SDimitry Andric /// Note that for each use we exclude probability if not selecting for the use.
46127a7e6055SDimitry Andric /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
46137a7e6055SDimitry Andric /// probabilty 1/3 of not selecting for Use1).
46147a7e6055SDimitry Andric /// Use1:
46157a7e6055SDimitry Andric ///  reg(a) + reg({0,+,1})          1 + 1/3       -- to be deleted
46167a7e6055SDimitry Andric ///  reg(a) + reg({-1,+,1}) + 1     1 + 4/9       -- to be deleted
46177a7e6055SDimitry Andric ///  reg({a,+,1})                   1
46187a7e6055SDimitry Andric /// Use2:
46197a7e6055SDimitry Andric ///  reg(b) + reg({0,+,1})          1/2 + 1/3     -- to be deleted
46207a7e6055SDimitry Andric ///  reg(b) + reg({-1,+,1}) + 1     1/2 + 2/3     -- to be deleted
46217a7e6055SDimitry Andric ///  reg({b,+,1})                   2/3
46227a7e6055SDimitry Andric /// Use3:
46237a7e6055SDimitry Andric ///  reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
46247a7e6055SDimitry Andric ///  reg(c) + reg({b,+,1})          1 + 2/3
NarrowSearchSpaceByDeletingCostlyFormulas()46257a7e6055SDimitry Andric void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
46267a7e6055SDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
46277a7e6055SDimitry Andric     return;
46287a7e6055SDimitry Andric   // Ok, we have too many of formulae on our hands to conveniently handle.
46297a7e6055SDimitry Andric   // Use a rough heuristic to thin out the list.
46307a7e6055SDimitry Andric 
46317a7e6055SDimitry Andric   // Set of Regs wich will be 100% used in final solution.
46327a7e6055SDimitry Andric   // Used in each formula of a solution (in example above this is reg(c)).
46337a7e6055SDimitry Andric   // We can skip them in calculations.
46347a7e6055SDimitry Andric   SmallPtrSet<const SCEV *, 4> UniqRegs;
46354ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
46367a7e6055SDimitry Andric 
46377a7e6055SDimitry Andric   // Map each register to probability of not selecting
46387a7e6055SDimitry Andric   DenseMap <const SCEV *, float> RegNumMap;
46397a7e6055SDimitry Andric   for (const SCEV *Reg : RegUses) {
46407a7e6055SDimitry Andric     if (UniqRegs.count(Reg))
46417a7e6055SDimitry Andric       continue;
46427a7e6055SDimitry Andric     float PNotSel = 1;
46437a7e6055SDimitry Andric     for (const LSRUse &LU : Uses) {
46447a7e6055SDimitry Andric       if (!LU.Regs.count(Reg))
46457a7e6055SDimitry Andric         continue;
46467a7e6055SDimitry Andric       float P = LU.getNotSelectedProbability(Reg);
46477a7e6055SDimitry Andric       if (P != 0.0)
46487a7e6055SDimitry Andric         PNotSel *= P;
46497a7e6055SDimitry Andric       else
46507a7e6055SDimitry Andric         UniqRegs.insert(Reg);
46517a7e6055SDimitry Andric     }
46527a7e6055SDimitry Andric     RegNumMap.insert(std::make_pair(Reg, PNotSel));
46537a7e6055SDimitry Andric   }
46547a7e6055SDimitry Andric 
46554ba319b5SDimitry Andric   LLVM_DEBUG(
46564ba319b5SDimitry Andric       dbgs() << "Narrowing the search space by deleting costly formulas\n");
46577a7e6055SDimitry Andric 
46587a7e6055SDimitry Andric   // Delete formulas where registers number expectation is high.
46597a7e6055SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
46607a7e6055SDimitry Andric     LSRUse &LU = Uses[LUIdx];
46617a7e6055SDimitry Andric     // If nothing to delete - continue.
46627a7e6055SDimitry Andric     if (LU.Formulae.size() < 2)
46637a7e6055SDimitry Andric       continue;
46647a7e6055SDimitry Andric     // This is temporary solution to test performance. Float should be
46657a7e6055SDimitry Andric     // replaced with round independent type (based on integers) to avoid
46667a7e6055SDimitry Andric     // different results for different target builds.
46677a7e6055SDimitry Andric     float FMinRegNum = LU.Formulae[0].getNumRegs();
46687a7e6055SDimitry Andric     float FMinARegNum = LU.Formulae[0].getNumRegs();
46697a7e6055SDimitry Andric     size_t MinIdx = 0;
46707a7e6055SDimitry Andric     for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
46717a7e6055SDimitry Andric       Formula &F = LU.Formulae[i];
46727a7e6055SDimitry Andric       float FRegNum = 0;
46737a7e6055SDimitry Andric       float FARegNum = 0;
46747a7e6055SDimitry Andric       for (const SCEV *BaseReg : F.BaseRegs) {
46757a7e6055SDimitry Andric         if (UniqRegs.count(BaseReg))
46767a7e6055SDimitry Andric           continue;
46777a7e6055SDimitry Andric         FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
46787a7e6055SDimitry Andric         if (isa<SCEVAddRecExpr>(BaseReg))
46797a7e6055SDimitry Andric           FARegNum +=
46807a7e6055SDimitry Andric               RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
46817a7e6055SDimitry Andric       }
46827a7e6055SDimitry Andric       if (const SCEV *ScaledReg = F.ScaledReg) {
46837a7e6055SDimitry Andric         if (!UniqRegs.count(ScaledReg)) {
46847a7e6055SDimitry Andric           FRegNum +=
46857a7e6055SDimitry Andric               RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
46867a7e6055SDimitry Andric           if (isa<SCEVAddRecExpr>(ScaledReg))
46877a7e6055SDimitry Andric             FARegNum +=
46887a7e6055SDimitry Andric                 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
46897a7e6055SDimitry Andric         }
46907a7e6055SDimitry Andric       }
46917a7e6055SDimitry Andric       if (FMinRegNum > FRegNum ||
46927a7e6055SDimitry Andric           (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
46937a7e6055SDimitry Andric         FMinRegNum = FRegNum;
46947a7e6055SDimitry Andric         FMinARegNum = FARegNum;
46957a7e6055SDimitry Andric         MinIdx = i;
46967a7e6055SDimitry Andric       }
46977a7e6055SDimitry Andric     }
46984ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "  The formula "; LU.Formulae[MinIdx].print(dbgs());
46997a7e6055SDimitry Andric                dbgs() << " with min reg num " << FMinRegNum << '\n');
47007a7e6055SDimitry Andric     if (MinIdx != 0)
47017a7e6055SDimitry Andric       std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
47027a7e6055SDimitry Andric     while (LU.Formulae.size() != 1) {
47034ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Deleting "; LU.Formulae.back().print(dbgs());
47047a7e6055SDimitry Andric                  dbgs() << '\n');
47057a7e6055SDimitry Andric       LU.Formulae.pop_back();
47067a7e6055SDimitry Andric     }
47077a7e6055SDimitry Andric     LU.RecomputeRegs(LUIdx, RegUses);
47087a7e6055SDimitry Andric     assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
47097a7e6055SDimitry Andric     Formula &F = LU.Formulae[0];
47104ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Leaving only "; F.print(dbgs()); dbgs() << '\n');
47117a7e6055SDimitry Andric     // When we choose the formula, the regs become unique.
47127a7e6055SDimitry Andric     UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
47137a7e6055SDimitry Andric     if (F.ScaledReg)
47147a7e6055SDimitry Andric       UniqRegs.insert(F.ScaledReg);
47157a7e6055SDimitry Andric   }
47164ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
47177a7e6055SDimitry Andric }
47187a7e6055SDimitry Andric 
47197d523365SDimitry Andric /// Pick a register which seems likely to be profitable, and then in any use
47207d523365SDimitry Andric /// which has any reference to that register, delete all formulae which do not
47217d523365SDimitry Andric /// reference that register.
NarrowSearchSpaceByPickingWinnerRegs()4722e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4723f22ef01cSRoman Divacky   // With all other options exhausted, loop until the system is simple
4724f22ef01cSRoman Divacky   // enough to handle.
4725f22ef01cSRoman Divacky   SmallPtrSet<const SCEV *, 4> Taken;
4726f22ef01cSRoman Divacky   while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4727f22ef01cSRoman Divacky     // Ok, we have too many of formulae on our hands to conveniently handle.
4728f22ef01cSRoman Divacky     // Use a rough heuristic to thin out the list.
47294ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4730f22ef01cSRoman Divacky 
4731f22ef01cSRoman Divacky     // Pick the register which is used by the most LSRUses, which is likely
4732f22ef01cSRoman Divacky     // to be a good reuse register candidate.
473391bc56edSDimitry Andric     const SCEV *Best = nullptr;
4734f22ef01cSRoman Divacky     unsigned BestNum = 0;
4735ff0cc061SDimitry Andric     for (const SCEV *Reg : RegUses) {
4736f22ef01cSRoman Divacky       if (Taken.count(Reg))
4737f22ef01cSRoman Divacky         continue;
4738d88c1a5aSDimitry Andric       if (!Best) {
4739f22ef01cSRoman Divacky         Best = Reg;
4740d88c1a5aSDimitry Andric         BestNum = RegUses.getUsedByIndices(Reg).count();
4741d88c1a5aSDimitry Andric       } else {
4742f22ef01cSRoman Divacky         unsigned Count = RegUses.getUsedByIndices(Reg).count();
4743f22ef01cSRoman Divacky         if (Count > BestNum) {
4744f22ef01cSRoman Divacky           Best = Reg;
4745f22ef01cSRoman Divacky           BestNum = Count;
4746f22ef01cSRoman Divacky         }
4747f22ef01cSRoman Divacky       }
4748f22ef01cSRoman Divacky     }
4749f22ef01cSRoman Divacky 
47504ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
4751f22ef01cSRoman Divacky                       << " will yield profitable reuse.\n");
4752f22ef01cSRoman Divacky     Taken.insert(Best);
4753f22ef01cSRoman Divacky 
4754f22ef01cSRoman Divacky     // In any use with formulae which references this register, delete formulae
4755f22ef01cSRoman Divacky     // which don't reference it.
4756f22ef01cSRoman Divacky     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4757f22ef01cSRoman Divacky       LSRUse &LU = Uses[LUIdx];
4758f22ef01cSRoman Divacky       if (!LU.Regs.count(Best)) continue;
4759f22ef01cSRoman Divacky 
4760f22ef01cSRoman Divacky       bool Any = false;
4761f22ef01cSRoman Divacky       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4762f22ef01cSRoman Divacky         Formula &F = LU.Formulae[i];
4763f22ef01cSRoman Divacky         if (!F.referencesReg(Best)) {
47644ba319b5SDimitry Andric           LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
4765f22ef01cSRoman Divacky           LU.DeleteFormula(F);
4766f22ef01cSRoman Divacky           --e;
4767f22ef01cSRoman Divacky           --i;
4768f22ef01cSRoman Divacky           Any = true;
4769f22ef01cSRoman Divacky           assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
4770f22ef01cSRoman Divacky           continue;
4771f22ef01cSRoman Divacky         }
4772f22ef01cSRoman Divacky       }
4773f22ef01cSRoman Divacky 
4774f22ef01cSRoman Divacky       if (Any)
4775f22ef01cSRoman Divacky         LU.RecomputeRegs(LUIdx, RegUses);
4776f22ef01cSRoman Divacky     }
4777f22ef01cSRoman Divacky 
47784ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4779f22ef01cSRoman Divacky   }
4780f22ef01cSRoman Divacky }
4781f22ef01cSRoman Divacky 
47827d523365SDimitry Andric /// If there are an extraordinary number of formulae to choose from, use some
47837d523365SDimitry Andric /// rough heuristics to prune down the number of formulae. This keeps the main
47847d523365SDimitry Andric /// solver from taking an extraordinary amount of time in some worst-case
47857d523365SDimitry Andric /// scenarios.
NarrowSearchSpaceUsingHeuristics()4786e580952dSDimitry Andric void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4787e580952dSDimitry Andric   NarrowSearchSpaceByDetectingSupersets();
4788e580952dSDimitry Andric   NarrowSearchSpaceByCollapsingUnrolledCode();
4789e580952dSDimitry Andric   NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4790c4394386SDimitry Andric   if (FilterSameScaledReg)
4791c4394386SDimitry Andric     NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
47927a7e6055SDimitry Andric   if (LSRExpNarrow)
47937a7e6055SDimitry Andric     NarrowSearchSpaceByDeletingCostlyFormulas();
47947a7e6055SDimitry Andric   else
4795e580952dSDimitry Andric     NarrowSearchSpaceByPickingWinnerRegs();
4796e580952dSDimitry Andric }
4797e580952dSDimitry Andric 
47987d523365SDimitry Andric /// This is the recursive solver.
SolveRecurse(SmallVectorImpl<const Formula * > & Solution,Cost & SolutionCost,SmallVectorImpl<const Formula * > & Workspace,const Cost & CurCost,const SmallPtrSet<const SCEV *,16> & CurRegs,DenseSet<const SCEV * > & VisitedRegs) const4799f22ef01cSRoman Divacky void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4800f22ef01cSRoman Divacky                                Cost &SolutionCost,
4801f22ef01cSRoman Divacky                                SmallVectorImpl<const Formula *> &Workspace,
4802f22ef01cSRoman Divacky                                const Cost &CurCost,
4803f22ef01cSRoman Divacky                                const SmallPtrSet<const SCEV *, 16> &CurRegs,
4804f22ef01cSRoman Divacky                                DenseSet<const SCEV *> &VisitedRegs) const {
4805f22ef01cSRoman Divacky   // Some ideas:
4806f22ef01cSRoman Divacky   //  - prune more:
4807f22ef01cSRoman Divacky   //    - use more aggressive filtering
4808f22ef01cSRoman Divacky   //    - sort the formula so that the most profitable solutions are found first
4809f22ef01cSRoman Divacky   //    - sort the uses too
4810f22ef01cSRoman Divacky   //  - search faster:
4811f22ef01cSRoman Divacky   //    - don't compute a cost, and then compare. compare while computing a cost
4812f22ef01cSRoman Divacky   //      and bail early.
4813f22ef01cSRoman Divacky   //    - track register sets with SmallBitVector
4814f22ef01cSRoman Divacky 
4815f22ef01cSRoman Divacky   const LSRUse &LU = Uses[Workspace.size()];
4816f22ef01cSRoman Divacky 
4817f22ef01cSRoman Divacky   // If this use references any register that's already a part of the
4818f22ef01cSRoman Divacky   // in-progress solution, consider it a requirement that a formula must
4819f22ef01cSRoman Divacky   // reference that register in order to be considered. This prunes out
4820f22ef01cSRoman Divacky   // unprofitable searching.
4821f22ef01cSRoman Divacky   SmallSetVector<const SCEV *, 4> ReqRegs;
482239d628a0SDimitry Andric   for (const SCEV *S : CurRegs)
482339d628a0SDimitry Andric     if (LU.Regs.count(S))
482439d628a0SDimitry Andric       ReqRegs.insert(S);
4825f22ef01cSRoman Divacky 
4826f22ef01cSRoman Divacky   SmallPtrSet<const SCEV *, 16> NewRegs;
4827f22ef01cSRoman Divacky   Cost NewCost;
4828ff0cc061SDimitry Andric   for (const Formula &F : LU.Formulae) {
482991bc56edSDimitry Andric     // Ignore formulae which may not be ideal in terms of register reuse of
483091bc56edSDimitry Andric     // ReqRegs.  The formula should use all required registers before
483191bc56edSDimitry Andric     // introducing new ones.
483291bc56edSDimitry Andric     int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4833ff0cc061SDimitry Andric     for (const SCEV *Reg : ReqRegs) {
483491bc56edSDimitry Andric       if ((F.ScaledReg && F.ScaledReg == Reg) ||
4835d88c1a5aSDimitry Andric           is_contained(F.BaseRegs, Reg)) {
483691bc56edSDimitry Andric         --NumReqRegsToFind;
483791bc56edSDimitry Andric         if (NumReqRegsToFind == 0)
4838dff0c46cSDimitry Andric           break;
4839f22ef01cSRoman Divacky       }
4840dff0c46cSDimitry Andric     }
484191bc56edSDimitry Andric     if (NumReqRegsToFind != 0) {
4842dff0c46cSDimitry Andric       // If none of the formulae satisfied the required registers, then we could
4843dff0c46cSDimitry Andric       // clear ReqRegs and try again. Currently, we simply give up in this case.
4844dff0c46cSDimitry Andric       continue;
4845dff0c46cSDimitry Andric     }
4846f22ef01cSRoman Divacky 
4847f22ef01cSRoman Divacky     // Evaluate the cost of the current formula. If it's already worse than
4848f22ef01cSRoman Divacky     // the current best, prune the search at that point.
4849f22ef01cSRoman Divacky     NewCost = CurCost;
4850f22ef01cSRoman Divacky     NewRegs = CurRegs;
4851d88c1a5aSDimitry Andric     NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
4852db17bf38SDimitry Andric     if (NewCost.isLess(SolutionCost, TTI)) {
4853f22ef01cSRoman Divacky       Workspace.push_back(&F);
4854f22ef01cSRoman Divacky       if (Workspace.size() != Uses.size()) {
4855f22ef01cSRoman Divacky         SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4856f22ef01cSRoman Divacky                      NewRegs, VisitedRegs);
4857f22ef01cSRoman Divacky         if (F.getNumRegs() == 1 && Workspace.size() == 1)
4858f22ef01cSRoman Divacky           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4859f22ef01cSRoman Divacky       } else {
48604ba319b5SDimitry Andric         LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
48614ba319b5SDimitry Andric                    dbgs() << ".\n Regs:"; for (const SCEV *S
48624ba319b5SDimitry Andric                                                : NewRegs) dbgs()
48634ba319b5SDimitry Andric                                           << ' ' << *S;
4864f22ef01cSRoman Divacky                    dbgs() << '\n');
4865f22ef01cSRoman Divacky 
4866f22ef01cSRoman Divacky         SolutionCost = NewCost;
4867f22ef01cSRoman Divacky         Solution = Workspace;
4868f22ef01cSRoman Divacky       }
4869f22ef01cSRoman Divacky       Workspace.pop_back();
4870f22ef01cSRoman Divacky     }
4871f22ef01cSRoman Divacky   }
4872f22ef01cSRoman Divacky }
4873f22ef01cSRoman Divacky 
48747d523365SDimitry Andric /// Choose one formula from each use. Return the results in the given Solution
48757d523365SDimitry Andric /// vector.
Solve(SmallVectorImpl<const Formula * > & Solution) const4876f22ef01cSRoman Divacky void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4877f22ef01cSRoman Divacky   SmallVector<const Formula *, 8> Workspace;
4878f22ef01cSRoman Divacky   Cost SolutionCost;
487991bc56edSDimitry Andric   SolutionCost.Lose();
4880f22ef01cSRoman Divacky   Cost CurCost;
4881f22ef01cSRoman Divacky   SmallPtrSet<const SCEV *, 16> CurRegs;
4882f22ef01cSRoman Divacky   DenseSet<const SCEV *> VisitedRegs;
4883f22ef01cSRoman Divacky   Workspace.reserve(Uses.size());
4884f22ef01cSRoman Divacky 
4885f22ef01cSRoman Divacky   // SolveRecurse does all the work.
4886f22ef01cSRoman Divacky   SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4887f22ef01cSRoman Divacky                CurRegs, VisitedRegs);
48886122f3e6SDimitry Andric   if (Solution.empty()) {
48894ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
48906122f3e6SDimitry Andric     return;
48916122f3e6SDimitry Andric   }
4892f22ef01cSRoman Divacky 
4893f22ef01cSRoman Divacky   // Ok, we've now made all our decisions.
48944ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n"
48954ba319b5SDimitry Andric                        "The chosen solution requires ";
48964ba319b5SDimitry Andric              SolutionCost.print(dbgs()); dbgs() << ":\n";
4897f22ef01cSRoman Divacky              for (size_t i = 0, e = Uses.size(); i != e; ++i) {
4898f22ef01cSRoman Divacky                dbgs() << "  ";
4899f22ef01cSRoman Divacky                Uses[i].print(dbgs());
4900f22ef01cSRoman Divacky                dbgs() << "\n"
4901f22ef01cSRoman Divacky                          "    ";
4902f22ef01cSRoman Divacky                Solution[i]->print(dbgs());
4903f22ef01cSRoman Divacky                dbgs() << '\n';
4904f22ef01cSRoman Divacky              });
4905f22ef01cSRoman Divacky 
4906f22ef01cSRoman Divacky   assert(Solution.size() == Uses.size() && "Malformed solution!");
4907f22ef01cSRoman Divacky }
4908f22ef01cSRoman Divacky 
49097d523365SDimitry Andric /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
49107d523365SDimitry Andric /// we can go while still being dominated by the input positions. This helps
49117d523365SDimitry Andric /// canonicalize the insert position, which encourages sharing.
4912f22ef01cSRoman Divacky BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,const SmallVectorImpl<Instruction * > & Inputs) const4913f22ef01cSRoman Divacky LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4914f22ef01cSRoman Divacky                                  const SmallVectorImpl<Instruction *> &Inputs)
4915f22ef01cSRoman Divacky                                                                          const {
49163ca95b02SDimitry Andric   Instruction *Tentative = &*IP;
4917d88c1a5aSDimitry Andric   while (true) {
49183ca95b02SDimitry Andric     bool AllDominate = true;
49193ca95b02SDimitry Andric     Instruction *BetterPos = nullptr;
49203ca95b02SDimitry Andric     // Don't bother attempting to insert before a catchswitch, their basic block
49213ca95b02SDimitry Andric     // cannot have other non-PHI instructions.
49223ca95b02SDimitry Andric     if (isa<CatchSwitchInst>(Tentative))
49233ca95b02SDimitry Andric       return IP;
49243ca95b02SDimitry Andric 
49253ca95b02SDimitry Andric     for (Instruction *Inst : Inputs) {
49263ca95b02SDimitry Andric       if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
49273ca95b02SDimitry Andric         AllDominate = false;
49283ca95b02SDimitry Andric         break;
49293ca95b02SDimitry Andric       }
49303ca95b02SDimitry Andric       // Attempt to find an insert position in the middle of the block,
49313ca95b02SDimitry Andric       // instead of at the end, so that it can be used for other expansions.
49323ca95b02SDimitry Andric       if (Tentative->getParent() == Inst->getParent() &&
49333ca95b02SDimitry Andric           (!BetterPos || !DT.dominates(Inst, BetterPos)))
49343ca95b02SDimitry Andric         BetterPos = &*std::next(BasicBlock::iterator(Inst));
49353ca95b02SDimitry Andric     }
49363ca95b02SDimitry Andric     if (!AllDominate)
49373ca95b02SDimitry Andric       break;
49383ca95b02SDimitry Andric     if (BetterPos)
49393ca95b02SDimitry Andric       IP = BetterPos->getIterator();
49403ca95b02SDimitry Andric     else
49413ca95b02SDimitry Andric       IP = Tentative->getIterator();
49423ca95b02SDimitry Andric 
4943f22ef01cSRoman Divacky     const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4944f22ef01cSRoman Divacky     unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4945f22ef01cSRoman Divacky 
4946f22ef01cSRoman Divacky     BasicBlock *IDom;
4947f22ef01cSRoman Divacky     for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4948f22ef01cSRoman Divacky       if (!Rung) return IP;
4949f22ef01cSRoman Divacky       Rung = Rung->getIDom();
4950f22ef01cSRoman Divacky       if (!Rung) return IP;
4951f22ef01cSRoman Divacky       IDom = Rung->getBlock();
4952f22ef01cSRoman Divacky 
4953f22ef01cSRoman Divacky       // Don't climb into a loop though.
4954f22ef01cSRoman Divacky       const Loop *IDomLoop = LI.getLoopFor(IDom);
4955f22ef01cSRoman Divacky       unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4956f22ef01cSRoman Divacky       if (IDomDepth <= IPLoopDepth &&
4957f22ef01cSRoman Divacky           (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4958f22ef01cSRoman Divacky         break;
4959f22ef01cSRoman Divacky     }
4960f22ef01cSRoman Divacky 
49613ca95b02SDimitry Andric     Tentative = IDom->getTerminator();
4962f22ef01cSRoman Divacky   }
4963f22ef01cSRoman Divacky 
4964f22ef01cSRoman Divacky   return IP;
4965f22ef01cSRoman Divacky }
4966f22ef01cSRoman Divacky 
49677d523365SDimitry Andric /// Determine an input position which will be dominated by the operands and
49687d523365SDimitry Andric /// which will dominate the result.
4969f22ef01cSRoman Divacky BasicBlock::iterator
AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,const LSRFixup & LF,const LSRUse & LU,SCEVExpander & Rewriter) const4970dff0c46cSDimitry Andric LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4971f22ef01cSRoman Divacky                                            const LSRFixup &LF,
4972dff0c46cSDimitry Andric                                            const LSRUse &LU,
4973dff0c46cSDimitry Andric                                            SCEVExpander &Rewriter) const {
4974f22ef01cSRoman Divacky   // Collect some instructions which must be dominated by the
4975f22ef01cSRoman Divacky   // expanding replacement. These must be dominated by any operands that
4976f22ef01cSRoman Divacky   // will be required in the expansion.
4977f22ef01cSRoman Divacky   SmallVector<Instruction *, 4> Inputs;
4978f22ef01cSRoman Divacky   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4979f22ef01cSRoman Divacky     Inputs.push_back(I);
4980f22ef01cSRoman Divacky   if (LU.Kind == LSRUse::ICmpZero)
4981f22ef01cSRoman Divacky     if (Instruction *I =
4982f22ef01cSRoman Divacky           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4983f22ef01cSRoman Divacky       Inputs.push_back(I);
4984f22ef01cSRoman Divacky   if (LF.PostIncLoops.count(L)) {
4985f22ef01cSRoman Divacky     if (LF.isUseFullyOutsideLoop(L))
4986f22ef01cSRoman Divacky       Inputs.push_back(L->getLoopLatch()->getTerminator());
4987f22ef01cSRoman Divacky     else
4988f22ef01cSRoman Divacky       Inputs.push_back(IVIncInsertPos);
4989f22ef01cSRoman Divacky   }
4990f22ef01cSRoman Divacky   // The expansion must also be dominated by the increment positions of any
4991f22ef01cSRoman Divacky   // loops it for which it is using post-inc mode.
4992ff0cc061SDimitry Andric   for (const Loop *PIL : LF.PostIncLoops) {
4993f22ef01cSRoman Divacky     if (PIL == L) continue;
4994f22ef01cSRoman Divacky 
4995f22ef01cSRoman Divacky     // Be dominated by the loop exit.
4996f22ef01cSRoman Divacky     SmallVector<BasicBlock *, 4> ExitingBlocks;
4997f22ef01cSRoman Divacky     PIL->getExitingBlocks(ExitingBlocks);
4998f22ef01cSRoman Divacky     if (!ExitingBlocks.empty()) {
4999f22ef01cSRoman Divacky       BasicBlock *BB = ExitingBlocks[0];
5000f22ef01cSRoman Divacky       for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
5001f22ef01cSRoman Divacky         BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5002f22ef01cSRoman Divacky       Inputs.push_back(BB->getTerminator());
5003f22ef01cSRoman Divacky     }
5004f22ef01cSRoman Divacky   }
5005f22ef01cSRoman Divacky 
50067d523365SDimitry Andric   assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5007dff0c46cSDimitry Andric          && !isa<DbgInfoIntrinsic>(LowestIP) &&
5008dff0c46cSDimitry Andric          "Insertion point must be a normal instruction");
5009dff0c46cSDimitry Andric 
5010f22ef01cSRoman Divacky   // Then, climb up the immediate dominator tree as far as we can go while
5011f22ef01cSRoman Divacky   // still being dominated by the input positions.
5012dff0c46cSDimitry Andric   BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
5013f22ef01cSRoman Divacky 
5014f22ef01cSRoman Divacky   // Don't insert instructions before PHI nodes.
5015f22ef01cSRoman Divacky   while (isa<PHINode>(IP)) ++IP;
5016f22ef01cSRoman Divacky 
50176122f3e6SDimitry Andric   // Ignore landingpad instructions.
50183ca95b02SDimitry Andric   while (IP->isEHPad()) ++IP;
50196122f3e6SDimitry Andric 
5020f22ef01cSRoman Divacky   // Ignore debug intrinsics.
5021f22ef01cSRoman Divacky   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5022f22ef01cSRoman Divacky 
5023dff0c46cSDimitry Andric   // Set IP below instructions recently inserted by SCEVExpander. This keeps the
5024dff0c46cSDimitry Andric   // IP consistent across expansions and allows the previously inserted
5025dff0c46cSDimitry Andric   // instructions to be reused by subsequent expansion.
50267d523365SDimitry Andric   while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
50277d523365SDimitry Andric     ++IP;
5028dff0c46cSDimitry Andric 
5029f22ef01cSRoman Divacky   return IP;
5030f22ef01cSRoman Divacky }
5031f22ef01cSRoman Divacky 
50327d523365SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
50337d523365SDimitry Andric /// is called "expanding").
Expand(const LSRUse & LU,const LSRFixup & LF,const Formula & F,BasicBlock::iterator IP,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const5034f37b6182SDimitry Andric Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
5035f37b6182SDimitry Andric                            const Formula &F, BasicBlock::iterator IP,
5036f22ef01cSRoman Divacky                            SCEVExpander &Rewriter,
5037f37b6182SDimitry Andric                            SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5038f785676fSDimitry Andric   if (LU.RigidFormula)
5039f785676fSDimitry Andric     return LF.OperandValToReplace;
5040f22ef01cSRoman Divacky 
5041f22ef01cSRoman Divacky   // Determine an input position which will be dominated by the operands and
5042f22ef01cSRoman Divacky   // which will dominate the result.
5043dff0c46cSDimitry Andric   IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
50446c4bc1bdSDimitry Andric   Rewriter.setInsertPoint(&*IP);
5045f22ef01cSRoman Divacky 
5046f22ef01cSRoman Divacky   // Inform the Rewriter if we have a post-increment use, so that it can
5047f22ef01cSRoman Divacky   // perform an advantageous expansion.
5048f22ef01cSRoman Divacky   Rewriter.setPostInc(LF.PostIncLoops);
5049f22ef01cSRoman Divacky 
5050f22ef01cSRoman Divacky   // This is the type that the user actually needs.
50516122f3e6SDimitry Andric   Type *OpTy = LF.OperandValToReplace->getType();
5052f22ef01cSRoman Divacky   // This will be the type that we'll initially expand to.
50536122f3e6SDimitry Andric   Type *Ty = F.getType();
5054f22ef01cSRoman Divacky   if (!Ty)
5055f22ef01cSRoman Divacky     // No type known; just expand directly to the ultimate type.
5056f22ef01cSRoman Divacky     Ty = OpTy;
5057f22ef01cSRoman Divacky   else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
5058f22ef01cSRoman Divacky     // Expand directly to the ultimate type if it's the right size.
5059f22ef01cSRoman Divacky     Ty = OpTy;
5060f22ef01cSRoman Divacky   // This is the type to do integer arithmetic in.
50616122f3e6SDimitry Andric   Type *IntTy = SE.getEffectiveSCEVType(Ty);
5062f22ef01cSRoman Divacky 
5063f22ef01cSRoman Divacky   // Build up a list of operands to add together to form the full base.
5064f22ef01cSRoman Divacky   SmallVector<const SCEV *, 8> Ops;
5065f22ef01cSRoman Divacky 
5066f22ef01cSRoman Divacky   // Expand the BaseRegs portion.
5067ff0cc061SDimitry Andric   for (const SCEV *Reg : F.BaseRegs) {
5068f22ef01cSRoman Divacky     assert(!Reg->isZero() && "Zero allocated in a base register!");
5069f22ef01cSRoman Divacky 
5070f22ef01cSRoman Divacky     // If we're expanding for a post-inc user, make the post-inc adjustment.
50717a7e6055SDimitry Andric     Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
50726c4bc1bdSDimitry Andric     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
5073f22ef01cSRoman Divacky   }
5074f22ef01cSRoman Divacky 
5075f22ef01cSRoman Divacky   // Expand the ScaledReg portion.
507691bc56edSDimitry Andric   Value *ICmpScaledV = nullptr;
5077139f7f9bSDimitry Andric   if (F.Scale != 0) {
5078f22ef01cSRoman Divacky     const SCEV *ScaledS = F.ScaledReg;
5079f22ef01cSRoman Divacky 
5080f22ef01cSRoman Divacky     // If we're expanding for a post-inc user, make the post-inc adjustment.
5081f22ef01cSRoman Divacky     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
50827a7e6055SDimitry Andric     ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
5083f22ef01cSRoman Divacky 
5084f22ef01cSRoman Divacky     if (LU.Kind == LSRUse::ICmpZero) {
508591bc56edSDimitry Andric       // Expand ScaleReg as if it was part of the base regs.
508691bc56edSDimitry Andric       if (F.Scale == 1)
508791bc56edSDimitry Andric         Ops.push_back(
50886c4bc1bdSDimitry Andric             SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
508991bc56edSDimitry Andric       else {
5090f22ef01cSRoman Divacky         // An interesting way of "folding" with an icmp is to use a negated
5091f22ef01cSRoman Divacky         // scale, which we'll implement by inserting it into the other operand
5092f22ef01cSRoman Divacky         // of the icmp.
5093139f7f9bSDimitry Andric         assert(F.Scale == -1 &&
5094f22ef01cSRoman Divacky                "The only scale supported by ICmpZero uses is -1!");
50956c4bc1bdSDimitry Andric         ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
509691bc56edSDimitry Andric       }
5097f22ef01cSRoman Divacky     } else {
5098f22ef01cSRoman Divacky       // Otherwise just expand the scaled register and an explicit scale,
5099f22ef01cSRoman Divacky       // which is expected to be matched as part of the address.
51007ae0e2c9SDimitry Andric 
51017ae0e2c9SDimitry Andric       // Flush the operand list to suppress SCEVExpander hoisting address modes.
510291bc56edSDimitry Andric       // Unless the addressing mode will not be folded.
510391bc56edSDimitry Andric       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
510491bc56edSDimitry Andric           isAMCompletelyFolded(TTI, LU, F)) {
51054ba319b5SDimitry Andric         Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
51067ae0e2c9SDimitry Andric         Ops.clear();
51077ae0e2c9SDimitry Andric         Ops.push_back(SE.getUnknown(FullV));
51087ae0e2c9SDimitry Andric       }
51096c4bc1bdSDimitry Andric       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
511091bc56edSDimitry Andric       if (F.Scale != 1)
511191bc56edSDimitry Andric         ScaledS =
511291bc56edSDimitry Andric             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
5113f22ef01cSRoman Divacky       Ops.push_back(ScaledS);
5114f22ef01cSRoman Divacky     }
5115f22ef01cSRoman Divacky   }
5116f22ef01cSRoman Divacky 
5117f22ef01cSRoman Divacky   // Expand the GV portion.
5118139f7f9bSDimitry Andric   if (F.BaseGV) {
5119f22ef01cSRoman Divacky     // Flush the operand list to suppress SCEVExpander hoisting.
51207ae0e2c9SDimitry Andric     if (!Ops.empty()) {
51216c4bc1bdSDimitry Andric       Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
51227ae0e2c9SDimitry Andric       Ops.clear();
51237ae0e2c9SDimitry Andric       Ops.push_back(SE.getUnknown(FullV));
51247ae0e2c9SDimitry Andric     }
5125139f7f9bSDimitry Andric     Ops.push_back(SE.getUnknown(F.BaseGV));
51267ae0e2c9SDimitry Andric   }
51277ae0e2c9SDimitry Andric 
51287ae0e2c9SDimitry Andric   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
51297ae0e2c9SDimitry Andric   // unfolded offsets. LSR assumes they both live next to their uses.
51307ae0e2c9SDimitry Andric   if (!Ops.empty()) {
51316c4bc1bdSDimitry Andric     Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
5132f22ef01cSRoman Divacky     Ops.clear();
5133f22ef01cSRoman Divacky     Ops.push_back(SE.getUnknown(FullV));
5134f22ef01cSRoman Divacky   }
5135f22ef01cSRoman Divacky 
5136f22ef01cSRoman Divacky   // Expand the immediate portion.
5137139f7f9bSDimitry Andric   int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
5138f22ef01cSRoman Divacky   if (Offset != 0) {
5139f22ef01cSRoman Divacky     if (LU.Kind == LSRUse::ICmpZero) {
5140f22ef01cSRoman Divacky       // The other interesting way of "folding" with an ICmpZero is to use a
5141f22ef01cSRoman Divacky       // negated immediate.
5142f22ef01cSRoman Divacky       if (!ICmpScaledV)
51436122f3e6SDimitry Andric         ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
5144f22ef01cSRoman Divacky       else {
5145f22ef01cSRoman Divacky         Ops.push_back(SE.getUnknown(ICmpScaledV));
5146f22ef01cSRoman Divacky         ICmpScaledV = ConstantInt::get(IntTy, Offset);
5147f22ef01cSRoman Divacky       }
5148f22ef01cSRoman Divacky     } else {
5149f22ef01cSRoman Divacky       // Just add the immediate values. These again are expected to be matched
5150f22ef01cSRoman Divacky       // as part of the address.
5151f22ef01cSRoman Divacky       Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
5152f22ef01cSRoman Divacky     }
5153f22ef01cSRoman Divacky   }
5154f22ef01cSRoman Divacky 
5155bd5abe19SDimitry Andric   // Expand the unfolded offset portion.
5156bd5abe19SDimitry Andric   int64_t UnfoldedOffset = F.UnfoldedOffset;
5157bd5abe19SDimitry Andric   if (UnfoldedOffset != 0) {
5158bd5abe19SDimitry Andric     // Just add the immediate values.
5159bd5abe19SDimitry Andric     Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
5160bd5abe19SDimitry Andric                                                        UnfoldedOffset)));
5161bd5abe19SDimitry Andric   }
5162bd5abe19SDimitry Andric 
5163f22ef01cSRoman Divacky   // Emit instructions summing all the operands.
5164f22ef01cSRoman Divacky   const SCEV *FullS = Ops.empty() ?
5165f22ef01cSRoman Divacky                       SE.getConstant(IntTy, 0) :
5166f22ef01cSRoman Divacky                       SE.getAddExpr(Ops);
51676c4bc1bdSDimitry Andric   Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
5168f22ef01cSRoman Divacky 
5169f22ef01cSRoman Divacky   // We're done expanding now, so reset the rewriter.
5170f22ef01cSRoman Divacky   Rewriter.clearPostInc();
5171f22ef01cSRoman Divacky 
5172f22ef01cSRoman Divacky   // An ICmpZero Formula represents an ICmp which we're handling as a
5173f22ef01cSRoman Divacky   // comparison against zero. Now that we've expanded an expression for that
5174f22ef01cSRoman Divacky   // form, update the ICmp's other operand.
5175f22ef01cSRoman Divacky   if (LU.Kind == LSRUse::ICmpZero) {
5176f22ef01cSRoman Divacky     ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
517797bc6c73SDimitry Andric     DeadInsts.emplace_back(CI->getOperand(1));
5178139f7f9bSDimitry Andric     assert(!F.BaseGV && "ICmp does not support folding a global value and "
5179f22ef01cSRoman Divacky                            "a scale at the same time!");
5180139f7f9bSDimitry Andric     if (F.Scale == -1) {
5181f22ef01cSRoman Divacky       if (ICmpScaledV->getType() != OpTy) {
5182f22ef01cSRoman Divacky         Instruction *Cast =
5183f22ef01cSRoman Divacky           CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
5184f22ef01cSRoman Divacky                                                    OpTy, false),
5185f22ef01cSRoman Divacky                            ICmpScaledV, OpTy, "tmp", CI);
5186f22ef01cSRoman Divacky         ICmpScaledV = Cast;
5187f22ef01cSRoman Divacky       }
5188f22ef01cSRoman Divacky       CI->setOperand(1, ICmpScaledV);
5189f22ef01cSRoman Divacky     } else {
519091bc56edSDimitry Andric       // A scale of 1 means that the scale has been expanded as part of the
519191bc56edSDimitry Andric       // base regs.
519291bc56edSDimitry Andric       assert((F.Scale == 0 || F.Scale == 1) &&
5193f22ef01cSRoman Divacky              "ICmp does not support folding a global value and "
5194f22ef01cSRoman Divacky              "a scale at the same time!");
5195f22ef01cSRoman Divacky       Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
5196f22ef01cSRoman Divacky                                            -(uint64_t)Offset);
5197f22ef01cSRoman Divacky       if (C->getType() != OpTy)
5198f22ef01cSRoman Divacky         C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
5199f22ef01cSRoman Divacky                                                           OpTy, false),
5200f22ef01cSRoman Divacky                                   C, OpTy);
5201f22ef01cSRoman Divacky 
5202f22ef01cSRoman Divacky       CI->setOperand(1, C);
5203f22ef01cSRoman Divacky     }
5204f22ef01cSRoman Divacky   }
5205f22ef01cSRoman Divacky 
5206f22ef01cSRoman Divacky   return FullV;
5207f22ef01cSRoman Divacky }
5208f22ef01cSRoman Divacky 
52097d523365SDimitry Andric /// Helper for Rewrite. PHI nodes are special because the use of their operands
52107d523365SDimitry Andric /// effectively happens in their predecessor blocks, so the expression may need
52117d523365SDimitry Andric /// to be expanded in multiple places.
RewriteForPHI(PHINode * PN,const LSRUse & LU,const LSRFixup & LF,const Formula & F,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const5212f37b6182SDimitry Andric void LSRInstance::RewriteForPHI(
5213f37b6182SDimitry Andric     PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5214f37b6182SDimitry Andric     SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5215f22ef01cSRoman Divacky   DenseMap<BasicBlock *, Value *> Inserted;
5216f22ef01cSRoman Divacky   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
5217f22ef01cSRoman Divacky     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
5218f22ef01cSRoman Divacky       BasicBlock *BB = PN->getIncomingBlock(i);
5219f22ef01cSRoman Divacky 
5220f22ef01cSRoman Divacky       // If this is a critical edge, split the edge so that we do not insert
5221f22ef01cSRoman Divacky       // the code on all predecessor/successor paths.  We do this unless this
5222f22ef01cSRoman Divacky       // is the canonical backedge for this loop, which complicates post-inc
5223f22ef01cSRoman Divacky       // users.
5224f22ef01cSRoman Divacky       if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
522594c53d40SDimitry Andric           !isa<IndirectBrInst>(BB->getTerminator()) &&
522694c53d40SDimitry Andric           !isa<CatchSwitchInst>(BB->getTerminator())) {
52276122f3e6SDimitry Andric         BasicBlock *Parent = PN->getParent();
52286122f3e6SDimitry Andric         Loop *PNLoop = LI.getLoopFor(Parent);
52296122f3e6SDimitry Andric         if (!PNLoop || Parent != PNLoop->getHeader()) {
5230f22ef01cSRoman Divacky           // Split the critical edge.
523191bc56edSDimitry Andric           BasicBlock *NewBB = nullptr;
52326122f3e6SDimitry Andric           if (!Parent->isLandingPad()) {
5233ff0cc061SDimitry Andric             NewBB = SplitCriticalEdge(BB, Parent,
5234ff0cc061SDimitry Andric                                       CriticalEdgeSplittingOptions(&DT, &LI)
5235ff0cc061SDimitry Andric                                           .setMergeIdenticalEdges()
5236ff0cc061SDimitry Andric                                           .setDontDeleteUselessPHIs());
52376122f3e6SDimitry Andric           } else {
52386122f3e6SDimitry Andric             SmallVector<BasicBlock*, 2> NewBBs;
52397d523365SDimitry Andric             SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
52406122f3e6SDimitry Andric             NewBB = NewBBs[0];
52416122f3e6SDimitry Andric           }
52423861d79fSDimitry Andric           // If NewBB==NULL, then SplitCriticalEdge refused to split because all
52433861d79fSDimitry Andric           // phi predecessors are identical. The simple thing to do is skip
52443861d79fSDimitry Andric           // splitting in this case rather than complicate the API.
52453861d79fSDimitry Andric           if (NewBB) {
5246f22ef01cSRoman Divacky             // If PN is outside of the loop and BB is in the loop, we want to
5247f22ef01cSRoman Divacky             // move the block to be immediately before the PHI block, not
5248f22ef01cSRoman Divacky             // immediately after BB.
5249f22ef01cSRoman Divacky             if (L->contains(BB) && !L->contains(PN))
5250f22ef01cSRoman Divacky               NewBB->moveBefore(PN->getParent());
5251f22ef01cSRoman Divacky 
5252f22ef01cSRoman Divacky             // Splitting the edge can reduce the number of PHI entries we have.
5253f22ef01cSRoman Divacky             e = PN->getNumIncomingValues();
5254f22ef01cSRoman Divacky             BB = NewBB;
5255f22ef01cSRoman Divacky             i = PN->getBasicBlockIndex(BB);
5256f22ef01cSRoman Divacky           }
52572754fe60SDimitry Andric         }
52583861d79fSDimitry Andric       }
5259f22ef01cSRoman Divacky 
5260f22ef01cSRoman Divacky       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
526191bc56edSDimitry Andric         Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
5262f22ef01cSRoman Divacky       if (!Pair.second)
5263f22ef01cSRoman Divacky         PN->setIncomingValue(i, Pair.first->second);
5264f22ef01cSRoman Divacky       else {
5265d88c1a5aSDimitry Andric         Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(),
52667d523365SDimitry Andric                               Rewriter, DeadInsts);
5267f22ef01cSRoman Divacky 
5268f22ef01cSRoman Divacky         // If this is reuse-by-noop-cast, insert the noop cast.
52696122f3e6SDimitry Andric         Type *OpTy = LF.OperandValToReplace->getType();
5270f22ef01cSRoman Divacky         if (FullV->getType() != OpTy)
5271f22ef01cSRoman Divacky           FullV =
5272f22ef01cSRoman Divacky             CastInst::Create(CastInst::getCastOpcode(FullV, false,
5273f22ef01cSRoman Divacky                                                      OpTy, false),
5274f22ef01cSRoman Divacky                              FullV, LF.OperandValToReplace->getType(),
5275f22ef01cSRoman Divacky                              "tmp", BB->getTerminator());
5276f22ef01cSRoman Divacky 
5277f22ef01cSRoman Divacky         PN->setIncomingValue(i, FullV);
5278f22ef01cSRoman Divacky         Pair.first->second = FullV;
5279f22ef01cSRoman Divacky       }
5280f22ef01cSRoman Divacky     }
5281f22ef01cSRoman Divacky }
5282f22ef01cSRoman Divacky 
52837d523365SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
52847d523365SDimitry Andric /// is called "expanding"), and update the UserInst to reference the newly
52857d523365SDimitry Andric /// expanded value.
Rewrite(const LSRUse & LU,const LSRFixup & LF,const Formula & F,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const5286f37b6182SDimitry Andric void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
5287f37b6182SDimitry Andric                           const Formula &F, SCEVExpander &Rewriter,
5288f37b6182SDimitry Andric                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5289f22ef01cSRoman Divacky   // First, find an insertion point that dominates UserInst. For PHI nodes,
5290f22ef01cSRoman Divacky   // find the nearest block which dominates all the relevant uses.
5291f22ef01cSRoman Divacky   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5292d88c1a5aSDimitry Andric     RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
5293f22ef01cSRoman Divacky   } else {
52947d523365SDimitry Andric     Value *FullV =
5295d88c1a5aSDimitry Andric       Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
5296f22ef01cSRoman Divacky 
5297f22ef01cSRoman Divacky     // If this is reuse-by-noop-cast, insert the noop cast.
52986122f3e6SDimitry Andric     Type *OpTy = LF.OperandValToReplace->getType();
5299f22ef01cSRoman Divacky     if (FullV->getType() != OpTy) {
5300f22ef01cSRoman Divacky       Instruction *Cast =
5301f22ef01cSRoman Divacky         CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
5302f22ef01cSRoman Divacky                          FullV, OpTy, "tmp", LF.UserInst);
5303f22ef01cSRoman Divacky       FullV = Cast;
5304f22ef01cSRoman Divacky     }
5305f22ef01cSRoman Divacky 
5306f22ef01cSRoman Divacky     // Update the user. ICmpZero is handled specially here (for now) because
5307f22ef01cSRoman Divacky     // Expand may have updated one of the operands of the icmp already, and
5308f22ef01cSRoman Divacky     // its new value may happen to be equal to LF.OperandValToReplace, in
5309f22ef01cSRoman Divacky     // which case doing replaceUsesOfWith leads to replacing both operands
5310f22ef01cSRoman Divacky     // with the same value. TODO: Reorganize this.
5311d88c1a5aSDimitry Andric     if (LU.Kind == LSRUse::ICmpZero)
5312f22ef01cSRoman Divacky       LF.UserInst->setOperand(0, FullV);
5313f22ef01cSRoman Divacky     else
5314f22ef01cSRoman Divacky       LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5315f22ef01cSRoman Divacky   }
5316f22ef01cSRoman Divacky 
531797bc6c73SDimitry Andric   DeadInsts.emplace_back(LF.OperandValToReplace);
5318f22ef01cSRoman Divacky }
5319f22ef01cSRoman Divacky 
53207d523365SDimitry Andric /// Rewrite all the fixup locations with new values, following the chosen
53217d523365SDimitry Andric /// solution.
ImplementSolution(const SmallVectorImpl<const Formula * > & Solution)53227d523365SDimitry Andric void LSRInstance::ImplementSolution(
53237d523365SDimitry Andric     const SmallVectorImpl<const Formula *> &Solution) {
5324f22ef01cSRoman Divacky   // Keep track of instructions we may have made dead, so that
5325f22ef01cSRoman Divacky   // we can remove them after we are done working.
5326f37b6182SDimitry Andric   SmallVector<WeakTrackingVH, 16> DeadInsts;
5327f22ef01cSRoman Divacky 
5328ff0cc061SDimitry Andric   SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
5329ff0cc061SDimitry Andric                         "lsr");
5330dff0c46cSDimitry Andric #ifndef NDEBUG
5331dff0c46cSDimitry Andric   Rewriter.setDebugType(DEBUG_TYPE);
5332dff0c46cSDimitry Andric #endif
5333f22ef01cSRoman Divacky   Rewriter.disableCanonicalMode();
53346122f3e6SDimitry Andric   Rewriter.enableLSRMode();
5335f22ef01cSRoman Divacky   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
5336f22ef01cSRoman Divacky 
5337dff0c46cSDimitry Andric   // Mark phi nodes that terminate chains so the expander tries to reuse them.
5338ff0cc061SDimitry Andric   for (const IVChain &Chain : IVChainVec) {
5339ff0cc061SDimitry Andric     if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5340dff0c46cSDimitry Andric       Rewriter.setChainedPhi(PN);
5341dff0c46cSDimitry Andric   }
5342dff0c46cSDimitry Andric 
5343f22ef01cSRoman Divacky   // Expand the new value definitions and update the users.
5344d88c1a5aSDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
5345d88c1a5aSDimitry Andric     for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
5346d88c1a5aSDimitry Andric       Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
5347f22ef01cSRoman Divacky       Changed = true;
5348f22ef01cSRoman Divacky     }
5349f22ef01cSRoman Divacky 
5350ff0cc061SDimitry Andric   for (const IVChain &Chain : IVChainVec) {
5351ff0cc061SDimitry Andric     GenerateIVChain(Chain, Rewriter, DeadInsts);
5352dff0c46cSDimitry Andric     Changed = true;
5353dff0c46cSDimitry Andric   }
5354f22ef01cSRoman Divacky   // Clean up after ourselves. This must be done before deleting any
5355f22ef01cSRoman Divacky   // instructions.
5356f22ef01cSRoman Divacky   Rewriter.clear();
5357f22ef01cSRoman Divacky 
5358f22ef01cSRoman Divacky   Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
5359f22ef01cSRoman Divacky }
5360f22ef01cSRoman Divacky 
LSRInstance(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI)53617d523365SDimitry Andric LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
53627d523365SDimitry Andric                          DominatorTree &DT, LoopInfo &LI,
53637d523365SDimitry Andric                          const TargetTransformInfo &TTI)
53642cab237bSDimitry Andric     : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
5365f22ef01cSRoman Divacky   // If LoopSimplify form is not available, stay out of trouble.
5366dff0c46cSDimitry Andric   if (!L->isLoopSimplifyForm())
5367dff0c46cSDimitry Andric     return;
5368f22ef01cSRoman Divacky 
5369f22ef01cSRoman Divacky   // If there's no interesting work to be done, bail early.
5370f22ef01cSRoman Divacky   if (IU.empty()) return;
5371f22ef01cSRoman Divacky 
5372cb4dff85SDimitry Andric   // If there's too much analysis to be done, bail early. We won't be able to
5373cb4dff85SDimitry Andric   // model the problem anyway.
5374cb4dff85SDimitry Andric   unsigned NumUsers = 0;
5375ff0cc061SDimitry Andric   for (const IVStrideUse &U : IU) {
5376cb4dff85SDimitry Andric     if (++NumUsers > MaxIVUsers) {
5377ff0cc061SDimitry Andric       (void)U;
53784ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
53794ba319b5SDimitry Andric                         << "\n");
5380cb4dff85SDimitry Andric       return;
5381cb4dff85SDimitry Andric     }
5382ce479d84SDimitry Andric     // Bail out if we have a PHI on an EHPad that gets a value from a
5383ce479d84SDimitry Andric     // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is
5384ce479d84SDimitry Andric     // no good place to stick any instructions.
5385ce479d84SDimitry Andric     if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
5386ce479d84SDimitry Andric        auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
5387ce479d84SDimitry Andric        if (isa<FuncletPadInst>(FirstNonPHI) ||
5388ce479d84SDimitry Andric            isa<CatchSwitchInst>(FirstNonPHI))
5389ce479d84SDimitry Andric          for (BasicBlock *PredBB : PN->blocks())
5390ce479d84SDimitry Andric            if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5391ce479d84SDimitry Andric              return;
5392ce479d84SDimitry Andric     }
5393cb4dff85SDimitry Andric   }
5394cb4dff85SDimitry Andric 
5395dff0c46cSDimitry Andric #ifndef NDEBUG
5396dff0c46cSDimitry Andric   // All dominating loops must have preheaders, or SCEVExpander may not be able
5397dff0c46cSDimitry Andric   // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
5398dff0c46cSDimitry Andric   //
5399dff0c46cSDimitry Andric   // IVUsers analysis should only create users that are dominated by simple loop
5400dff0c46cSDimitry Andric   // headers. Since this loop should dominate all of its users, its user list
5401dff0c46cSDimitry Andric   // should be empty if this loop itself is not within a simple loop nest.
5402dff0c46cSDimitry Andric   for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
5403dff0c46cSDimitry Andric        Rung; Rung = Rung->getIDom()) {
5404dff0c46cSDimitry Andric     BasicBlock *BB = Rung->getBlock();
5405dff0c46cSDimitry Andric     const Loop *DomLoop = LI.getLoopFor(BB);
5406dff0c46cSDimitry Andric     if (DomLoop && DomLoop->getHeader() == BB) {
5407dff0c46cSDimitry Andric       assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
5408dff0c46cSDimitry Andric     }
5409dff0c46cSDimitry Andric   }
5410dff0c46cSDimitry Andric #endif // DEBUG
5411dff0c46cSDimitry Andric 
54124ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "\nLSR on loop ";
541391bc56edSDimitry Andric              L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
5414f22ef01cSRoman Divacky              dbgs() << ":\n");
5415f22ef01cSRoman Divacky 
5416f22ef01cSRoman Divacky   // First, perform some low-level loop optimizations.
5417f22ef01cSRoman Divacky   OptimizeShadowIV();
5418f22ef01cSRoman Divacky   OptimizeLoopTermCond();
5419f22ef01cSRoman Divacky 
54206122f3e6SDimitry Andric   // If loop preparation eliminates all interesting IV users, bail.
54216122f3e6SDimitry Andric   if (IU.empty()) return;
54226122f3e6SDimitry Andric 
54236122f3e6SDimitry Andric   // Skip nested loops until we can model them better with formulae.
5424dff0c46cSDimitry Andric   if (!L->empty()) {
54254ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
54266122f3e6SDimitry Andric     return;
54276122f3e6SDimitry Andric   }
54286122f3e6SDimitry Andric 
5429f22ef01cSRoman Divacky   // Start collecting data and preparing for the solver.
5430dff0c46cSDimitry Andric   CollectChains();
5431f22ef01cSRoman Divacky   CollectInterestingTypesAndFactors();
5432f22ef01cSRoman Divacky   CollectFixupsAndInitialFormulae();
5433f22ef01cSRoman Divacky   CollectLoopInvariantFixupsAndFormulae();
5434f22ef01cSRoman Divacky 
54354ba319b5SDimitry Andric   if (Uses.empty())
54364ba319b5SDimitry Andric     return;
54374ba319b5SDimitry Andric 
54384ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
5439f22ef01cSRoman Divacky              print_uses(dbgs()));
5440f22ef01cSRoman Divacky 
5441f22ef01cSRoman Divacky   // Now use the reuse data to generate a bunch of interesting ways
5442f22ef01cSRoman Divacky   // to formulate the values needed for the uses.
5443f22ef01cSRoman Divacky   GenerateAllReuseFormulae();
5444f22ef01cSRoman Divacky 
5445f22ef01cSRoman Divacky   FilterOutUndesirableDedicatedRegisters();
5446f22ef01cSRoman Divacky   NarrowSearchSpaceUsingHeuristics();
5447f22ef01cSRoman Divacky 
5448f22ef01cSRoman Divacky   SmallVector<const Formula *, 8> Solution;
5449f22ef01cSRoman Divacky   Solve(Solution);
5450f22ef01cSRoman Divacky 
5451f22ef01cSRoman Divacky   // Release memory that is no longer needed.
5452f22ef01cSRoman Divacky   Factors.clear();
5453f22ef01cSRoman Divacky   Types.clear();
5454f22ef01cSRoman Divacky   RegUses.clear();
5455f22ef01cSRoman Divacky 
54566122f3e6SDimitry Andric   if (Solution.empty())
54576122f3e6SDimitry Andric     return;
54586122f3e6SDimitry Andric 
5459f22ef01cSRoman Divacky #ifndef NDEBUG
5460f22ef01cSRoman Divacky   // Formulae should be legal.
5461ff0cc061SDimitry Andric   for (const LSRUse &LU : Uses) {
5462ff0cc061SDimitry Andric     for (const Formula &F : LU.Formulae)
5463139f7f9bSDimitry Andric       assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
5464ff0cc061SDimitry Andric                         F) && "Illegal formula generated!");
5465f22ef01cSRoman Divacky   };
5466f22ef01cSRoman Divacky #endif
5467f22ef01cSRoman Divacky 
5468f22ef01cSRoman Divacky   // Now that we've decided what we want, make it so.
54697d523365SDimitry Andric   ImplementSolution(Solution);
5470f22ef01cSRoman Divacky }
5471f22ef01cSRoman Divacky 
54722cab237bSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print_factors_and_types(raw_ostream & OS) const5473f22ef01cSRoman Divacky void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
5474f22ef01cSRoman Divacky   if (Factors.empty() && Types.empty()) return;
5475f22ef01cSRoman Divacky 
5476f22ef01cSRoman Divacky   OS << "LSR has identified the following interesting factors and types: ";
5477f22ef01cSRoman Divacky   bool First = true;
5478f22ef01cSRoman Divacky 
5479ff0cc061SDimitry Andric   for (int64_t Factor : Factors) {
5480f22ef01cSRoman Divacky     if (!First) OS << ", ";
5481f22ef01cSRoman Divacky     First = false;
5482ff0cc061SDimitry Andric     OS << '*' << Factor;
5483f22ef01cSRoman Divacky   }
5484f22ef01cSRoman Divacky 
5485ff0cc061SDimitry Andric   for (Type *Ty : Types) {
5486f22ef01cSRoman Divacky     if (!First) OS << ", ";
5487f22ef01cSRoman Divacky     First = false;
5488ff0cc061SDimitry Andric     OS << '(' << *Ty << ')';
5489f22ef01cSRoman Divacky   }
5490f22ef01cSRoman Divacky   OS << '\n';
5491f22ef01cSRoman Divacky }
5492f22ef01cSRoman Divacky 
print_fixups(raw_ostream & OS) const5493f22ef01cSRoman Divacky void LSRInstance::print_fixups(raw_ostream &OS) const {
5494f22ef01cSRoman Divacky   OS << "LSR is examining the following fixup sites:\n";
5495d88c1a5aSDimitry Andric   for (const LSRUse &LU : Uses)
5496d88c1a5aSDimitry Andric     for (const LSRFixup &LF : LU.Fixups) {
5497f22ef01cSRoman Divacky       dbgs() << "  ";
5498ff0cc061SDimitry Andric       LF.print(OS);
5499f22ef01cSRoman Divacky       OS << '\n';
5500f22ef01cSRoman Divacky     }
5501f22ef01cSRoman Divacky }
5502f22ef01cSRoman Divacky 
print_uses(raw_ostream & OS) const5503f22ef01cSRoman Divacky void LSRInstance::print_uses(raw_ostream &OS) const {
5504f22ef01cSRoman Divacky   OS << "LSR is examining the following uses:\n";
5505ff0cc061SDimitry Andric   for (const LSRUse &LU : Uses) {
5506f22ef01cSRoman Divacky     dbgs() << "  ";
5507f22ef01cSRoman Divacky     LU.print(OS);
5508f22ef01cSRoman Divacky     OS << '\n';
5509ff0cc061SDimitry Andric     for (const Formula &F : LU.Formulae) {
5510f22ef01cSRoman Divacky       OS << "    ";
5511ff0cc061SDimitry Andric       F.print(OS);
5512f22ef01cSRoman Divacky       OS << '\n';
5513f22ef01cSRoman Divacky     }
5514f22ef01cSRoman Divacky   }
5515f22ef01cSRoman Divacky }
5516f22ef01cSRoman Divacky 
print(raw_ostream & OS) const5517f22ef01cSRoman Divacky void LSRInstance::print(raw_ostream &OS) const {
5518f22ef01cSRoman Divacky   print_factors_and_types(OS);
5519f22ef01cSRoman Divacky   print_fixups(OS);
5520f22ef01cSRoman Divacky   print_uses(OS);
5521f22ef01cSRoman Divacky }
5522f22ef01cSRoman Divacky 
dump() const55237a7e6055SDimitry Andric LLVM_DUMP_METHOD void LSRInstance::dump() const {
5524f22ef01cSRoman Divacky   print(errs()); errs() << '\n';
5525f22ef01cSRoman Divacky }
55267a7e6055SDimitry Andric #endif
5527f22ef01cSRoman Divacky 
5528f22ef01cSRoman Divacky namespace {
5529f22ef01cSRoman Divacky 
5530f22ef01cSRoman Divacky class LoopStrengthReduce : public LoopPass {
5531f22ef01cSRoman Divacky public:
5532f22ef01cSRoman Divacky   static char ID; // Pass ID, replacement for typeid
5533d88c1a5aSDimitry Andric 
5534139f7f9bSDimitry Andric   LoopStrengthReduce();
5535f22ef01cSRoman Divacky 
5536f22ef01cSRoman Divacky private:
553791bc56edSDimitry Andric   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
553891bc56edSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
5539f22ef01cSRoman Divacky };
5540f22ef01cSRoman Divacky 
5541d88c1a5aSDimitry Andric } // end anonymous namespace
5542f22ef01cSRoman Divacky 
LoopStrengthReduce()5543139f7f9bSDimitry Andric LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
55442754fe60SDimitry Andric   initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
55452754fe60SDimitry Andric }
5546f22ef01cSRoman Divacky 
getAnalysisUsage(AnalysisUsage & AU) const5547f22ef01cSRoman Divacky void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
5548f22ef01cSRoman Divacky   // We split critical edges, so we change the CFG.  However, we do update
5549f22ef01cSRoman Divacky   // many analyses if they are around.
5550f22ef01cSRoman Divacky   AU.addPreservedID(LoopSimplifyID);
5551f22ef01cSRoman Divacky 
5552ff0cc061SDimitry Andric   AU.addRequired<LoopInfoWrapperPass>();
5553ff0cc061SDimitry Andric   AU.addPreserved<LoopInfoWrapperPass>();
5554f22ef01cSRoman Divacky   AU.addRequiredID(LoopSimplifyID);
555591bc56edSDimitry Andric   AU.addRequired<DominatorTreeWrapperPass>();
555691bc56edSDimitry Andric   AU.addPreserved<DominatorTreeWrapperPass>();
55577d523365SDimitry Andric   AU.addRequired<ScalarEvolutionWrapperPass>();
55587d523365SDimitry Andric   AU.addPreserved<ScalarEvolutionWrapperPass>();
55592754fe60SDimitry Andric   // Requiring LoopSimplify a second time here prevents IVUsers from running
55602754fe60SDimitry Andric   // twice, since LoopSimplify was invalidated by running ScalarEvolution.
55612754fe60SDimitry Andric   AU.addRequiredID(LoopSimplifyID);
55623ca95b02SDimitry Andric   AU.addRequired<IVUsersWrapperPass>();
55633ca95b02SDimitry Andric   AU.addPreserved<IVUsersWrapperPass>();
5564ff0cc061SDimitry Andric   AU.addRequired<TargetTransformInfoWrapperPass>();
5565f22ef01cSRoman Divacky }
5566f22ef01cSRoman Divacky 
ReduceLoopStrength(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI)5567d88c1a5aSDimitry Andric static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
5568d88c1a5aSDimitry Andric                                DominatorTree &DT, LoopInfo &LI,
5569d88c1a5aSDimitry Andric                                const TargetTransformInfo &TTI) {
5570f22ef01cSRoman Divacky   bool Changed = false;
5571f22ef01cSRoman Divacky 
5572f22ef01cSRoman Divacky   // Run the main LSR transformation.
55737d523365SDimitry Andric   Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5574f22ef01cSRoman Divacky 
5575dff0c46cSDimitry Andric   // Remove any extra phis created by processing inner loops.
5576f22ef01cSRoman Divacky   Changed |= DeleteDeadPHIs(L->getHeader());
5577139f7f9bSDimitry Andric   if (EnablePhiElim && L->isLoopSimplifyForm()) {
5578f37b6182SDimitry Andric     SmallVector<WeakTrackingVH, 16> DeadInsts;
5579ff0cc061SDimitry Andric     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5580d88c1a5aSDimitry Andric     SCEVExpander Rewriter(SE, DL, "lsr");
5581dff0c46cSDimitry Andric #ifndef NDEBUG
5582dff0c46cSDimitry Andric     Rewriter.setDebugType(DEBUG_TYPE);
5583dff0c46cSDimitry Andric #endif
5584d88c1a5aSDimitry Andric     unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
5585dff0c46cSDimitry Andric     if (numFolded) {
5586dff0c46cSDimitry Andric       Changed = true;
5587dff0c46cSDimitry Andric       DeleteTriviallyDeadInstructions(DeadInsts);
5588dff0c46cSDimitry Andric       DeleteDeadPHIs(L->getHeader());
5589dff0c46cSDimitry Andric     }
5590dff0c46cSDimitry Andric   }
5591f22ef01cSRoman Divacky   return Changed;
5592f22ef01cSRoman Divacky }
5593d88c1a5aSDimitry Andric 
runOnLoop(Loop * L,LPPassManager &)5594d88c1a5aSDimitry Andric bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
5595d88c1a5aSDimitry Andric   if (skipLoop(L))
5596d88c1a5aSDimitry Andric     return false;
5597d88c1a5aSDimitry Andric 
5598d88c1a5aSDimitry Andric   auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
5599d88c1a5aSDimitry Andric   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5600d88c1a5aSDimitry Andric   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5601d88c1a5aSDimitry Andric   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5602d88c1a5aSDimitry Andric   const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5603d88c1a5aSDimitry Andric       *L->getHeader()->getParent());
5604d88c1a5aSDimitry Andric   return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
5605d88c1a5aSDimitry Andric }
5606d88c1a5aSDimitry Andric 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)5607f1a29dd3SDimitry Andric PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
5608f1a29dd3SDimitry Andric                                               LoopStandardAnalysisResults &AR,
5609f1a29dd3SDimitry Andric                                               LPMUpdater &) {
5610f1a29dd3SDimitry Andric   if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
5611f1a29dd3SDimitry Andric                           AR.DT, AR.LI, AR.TTI))
5612d88c1a5aSDimitry Andric     return PreservedAnalyses::all();
5613d88c1a5aSDimitry Andric 
5614d88c1a5aSDimitry Andric   return getLoopPassPreservedAnalyses();
5615d88c1a5aSDimitry Andric }
5616d88c1a5aSDimitry Andric 
5617d88c1a5aSDimitry Andric char LoopStrengthReduce::ID = 0;
56182cab237bSDimitry Andric 
5619d88c1a5aSDimitry Andric INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
5620d88c1a5aSDimitry Andric                       "Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)5621d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
5622d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5623d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
5624d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
5625d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
5626d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
5627d88c1a5aSDimitry Andric INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
5628d88c1a5aSDimitry Andric                     "Loop Strength Reduction", false, false)
5629d88c1a5aSDimitry Andric 
5630d88c1a5aSDimitry Andric Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
5631