1ff0cc061SDimitry Andric //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2ff0cc061SDimitry Andric //
3ff0cc061SDimitry Andric //                     The LLVM Compiler Infrastructure
4ff0cc061SDimitry Andric //
5ff0cc061SDimitry Andric // This file is distributed under the University of Illinois Open Source
6ff0cc061SDimitry Andric // License. See LICENSE.TXT for details.
7ff0cc061SDimitry Andric //
8ff0cc061SDimitry Andric //===----------------------------------------------------------------------===//
9ff0cc061SDimitry Andric //
10ff0cc061SDimitry Andric // This file implements the Loop Distribution Pass.  Its main focus is to
11ff0cc061SDimitry Andric // distribute loops that cannot be vectorized due to dependence cycles.  It
12ff0cc061SDimitry Andric // tries to isolate the offending dependences into a new loop allowing
13ff0cc061SDimitry Andric // vectorization of the remaining parts.
14ff0cc061SDimitry Andric //
15ff0cc061SDimitry Andric // For dependence analysis, the pass uses the LoopVectorizer's
16ff0cc061SDimitry Andric // LoopAccessAnalysis.  Because this analysis presumes no change in the order of
17ff0cc061SDimitry Andric // memory operations, special care is taken to preserve the lexical order of
18ff0cc061SDimitry Andric // these operations.
19ff0cc061SDimitry Andric //
20ff0cc061SDimitry Andric // Similarly to the Vectorizer, the pass also supports loop versioning to
21ff0cc061SDimitry Andric // run-time disambiguate potentially overlapping arrays.
22ff0cc061SDimitry Andric //
23ff0cc061SDimitry Andric //===----------------------------------------------------------------------===//
24ff0cc061SDimitry Andric 
253ca95b02SDimitry Andric #include "llvm/Transforms/Scalar/LoopDistribute.h"
262cab237bSDimitry Andric #include "llvm/ADT/DenseMap.h"
27ff0cc061SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
28ff0cc061SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h"
292cab237bSDimitry Andric #include "llvm/ADT/Optional.h"
30ff0cc061SDimitry Andric #include "llvm/ADT/STLExtras.h"
312cab237bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
322cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
33ff0cc061SDimitry Andric #include "llvm/ADT/Statistic.h"
342cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
352cab237bSDimitry Andric #include "llvm/ADT/Twine.h"
362cab237bSDimitry Andric #include "llvm/ADT/iterator_range.h"
372cab237bSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
382cab237bSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
39d88c1a5aSDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
40ff0cc061SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h"
412cab237bSDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
42ff0cc061SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
432cab237bSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
442cab237bSDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
452cab237bSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
462cab237bSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
472cab237bSDimitry Andric #include "llvm/IR/BasicBlock.h"
482cab237bSDimitry Andric #include "llvm/IR/Constants.h"
493ca95b02SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
50ff0cc061SDimitry Andric #include "llvm/IR/Dominators.h"
512cab237bSDimitry Andric #include "llvm/IR/Function.h"
522cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
532cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
542cab237bSDimitry Andric #include "llvm/IR/Instructions.h"
552cab237bSDimitry Andric #include "llvm/IR/LLVMContext.h"
562cab237bSDimitry Andric #include "llvm/IR/Metadata.h"
572cab237bSDimitry Andric #include "llvm/IR/PassManager.h"
582cab237bSDimitry Andric #include "llvm/IR/Value.h"
59ff0cc061SDimitry Andric #include "llvm/Pass.h"
602cab237bSDimitry Andric #include "llvm/Support/Casting.h"
61ff0cc061SDimitry Andric #include "llvm/Support/CommandLine.h"
62ff0cc061SDimitry Andric #include "llvm/Support/Debug.h"
632cab237bSDimitry Andric #include "llvm/Support/raw_ostream.h"
642cab237bSDimitry Andric #include "llvm/Transforms/Scalar.h"
65ff0cc061SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
66ff0cc061SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
677d523365SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
68875ed548SDimitry Andric #include "llvm/Transforms/Utils/LoopVersioning.h"
692cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
702cab237bSDimitry Andric #include <cassert>
712cab237bSDimitry Andric #include <functional>
72ff0cc061SDimitry Andric #include <list>
732cab237bSDimitry Andric #include <tuple>
742cab237bSDimitry Andric #include <utility>
752cab237bSDimitry Andric 
762cab237bSDimitry Andric using namespace llvm;
77ff0cc061SDimitry Andric 
78ff0cc061SDimitry Andric #define LDIST_NAME "loop-distribute"
79ff0cc061SDimitry Andric #define DEBUG_TYPE LDIST_NAME
80ff0cc061SDimitry Andric 
81*b5893f02SDimitry Andric /// @{
82*b5893f02SDimitry Andric /// Metadata attribute names
83*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupAll =
84*b5893f02SDimitry Andric     "llvm.loop.distribute.followup_all";
85*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupCoincident =
86*b5893f02SDimitry Andric     "llvm.loop.distribute.followup_coincident";
87*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupSequential =
88*b5893f02SDimitry Andric     "llvm.loop.distribute.followup_sequential";
89*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupFallback =
90*b5893f02SDimitry Andric     "llvm.loop.distribute.followup_fallback";
91*b5893f02SDimitry Andric /// @}
92*b5893f02SDimitry Andric 
93ff0cc061SDimitry Andric static cl::opt<bool>
94ff0cc061SDimitry Andric     LDistVerify("loop-distribute-verify", cl::Hidden,
95ff0cc061SDimitry Andric                 cl::desc("Turn on DominatorTree and LoopInfo verification "
96ff0cc061SDimitry Andric                          "after Loop Distribution"),
97ff0cc061SDimitry Andric                 cl::init(false));
98ff0cc061SDimitry Andric 
99ff0cc061SDimitry Andric static cl::opt<bool> DistributeNonIfConvertible(
100ff0cc061SDimitry Andric     "loop-distribute-non-if-convertible", cl::Hidden,
101ff0cc061SDimitry Andric     cl::desc("Whether to distribute into a loop that may not be "
102ff0cc061SDimitry Andric              "if-convertible by the loop vectorizer"),
103ff0cc061SDimitry Andric     cl::init(false));
104ff0cc061SDimitry Andric 
1057d523365SDimitry Andric static cl::opt<unsigned> DistributeSCEVCheckThreshold(
1067d523365SDimitry Andric     "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
1077d523365SDimitry Andric     cl::desc("The maximum number of SCEV checks allowed for Loop "
1087d523365SDimitry Andric              "Distribution"));
1097d523365SDimitry Andric 
1103ca95b02SDimitry Andric static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
1113ca95b02SDimitry Andric     "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
1123ca95b02SDimitry Andric     cl::Hidden,
1133ca95b02SDimitry Andric     cl::desc(
1143ca95b02SDimitry Andric         "The maximum number of SCEV checks allowed for Loop "
1153ca95b02SDimitry Andric         "Distribution for loop marked with #pragma loop distribute(enable)"));
1163ca95b02SDimitry Andric 
1173ca95b02SDimitry Andric static cl::opt<bool> EnableLoopDistribute(
1183ca95b02SDimitry Andric     "enable-loop-distribute", cl::Hidden,
119d88c1a5aSDimitry Andric     cl::desc("Enable the new, experimental LoopDistribution Pass"),
120d88c1a5aSDimitry Andric     cl::init(false));
1213ca95b02SDimitry Andric 
122ff0cc061SDimitry Andric STATISTIC(NumLoopsDistributed, "Number of loops distributed");
123ff0cc061SDimitry Andric 
124ff0cc061SDimitry Andric namespace {
1252cab237bSDimitry Andric 
1264ba319b5SDimitry Andric /// Maintains the set of instructions of the loop for a partition before
127ff0cc061SDimitry Andric /// cloning.  After cloning, it hosts the new loop.
128ff0cc061SDimitry Andric class InstPartition {
1292cab237bSDimitry Andric   using InstructionSet = SmallPtrSet<Instruction *, 8>;
130ff0cc061SDimitry Andric 
131ff0cc061SDimitry Andric public:
InstPartition(Instruction * I,Loop * L,bool DepCycle=false)132ff0cc061SDimitry Andric   InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
1332cab237bSDimitry Andric       : DepCycle(DepCycle), OrigLoop(L) {
134ff0cc061SDimitry Andric     Set.insert(I);
135ff0cc061SDimitry Andric   }
136ff0cc061SDimitry Andric 
1374ba319b5SDimitry Andric   /// Returns whether this partition contains a dependence cycle.
hasDepCycle() const138ff0cc061SDimitry Andric   bool hasDepCycle() const { return DepCycle; }
139ff0cc061SDimitry Andric 
1404ba319b5SDimitry Andric   /// Adds an instruction to this partition.
add(Instruction * I)141ff0cc061SDimitry Andric   void add(Instruction *I) { Set.insert(I); }
142ff0cc061SDimitry Andric 
1434ba319b5SDimitry Andric   /// Collection accessors.
begin()144ff0cc061SDimitry Andric   InstructionSet::iterator begin() { return Set.begin(); }
end()145ff0cc061SDimitry Andric   InstructionSet::iterator end() { return Set.end(); }
begin() const146ff0cc061SDimitry Andric   InstructionSet::const_iterator begin() const { return Set.begin(); }
end() const147ff0cc061SDimitry Andric   InstructionSet::const_iterator end() const { return Set.end(); }
empty() const148ff0cc061SDimitry Andric   bool empty() const { return Set.empty(); }
149ff0cc061SDimitry Andric 
1504ba319b5SDimitry Andric   /// Moves this partition into \p Other.  This partition becomes empty
151ff0cc061SDimitry Andric   /// after this.
moveTo(InstPartition & Other)152ff0cc061SDimitry Andric   void moveTo(InstPartition &Other) {
153ff0cc061SDimitry Andric     Other.Set.insert(Set.begin(), Set.end());
154ff0cc061SDimitry Andric     Set.clear();
155ff0cc061SDimitry Andric     Other.DepCycle |= DepCycle;
156ff0cc061SDimitry Andric   }
157ff0cc061SDimitry Andric 
1584ba319b5SDimitry Andric   /// Populates the partition with a transitive closure of all the
159ff0cc061SDimitry Andric   /// instructions that the seeded instructions dependent on.
populateUsedSet()160ff0cc061SDimitry Andric   void populateUsedSet() {
161ff0cc061SDimitry Andric     // FIXME: We currently don't use control-dependence but simply include all
162ff0cc061SDimitry Andric     // blocks (possibly empty at the end) and let simplifycfg mostly clean this
163ff0cc061SDimitry Andric     // up.
164ff0cc061SDimitry Andric     for (auto *B : OrigLoop->getBlocks())
165ff0cc061SDimitry Andric       Set.insert(B->getTerminator());
166ff0cc061SDimitry Andric 
167ff0cc061SDimitry Andric     // Follow the use-def chains to form a transitive closure of all the
168ff0cc061SDimitry Andric     // instructions that the originally seeded instructions depend on.
169ff0cc061SDimitry Andric     SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
170ff0cc061SDimitry Andric     while (!Worklist.empty()) {
171ff0cc061SDimitry Andric       Instruction *I = Worklist.pop_back_val();
172ff0cc061SDimitry Andric       // Insert instructions from the loop that we depend on.
173ff0cc061SDimitry Andric       for (Value *V : I->operand_values()) {
174ff0cc061SDimitry Andric         auto *I = dyn_cast<Instruction>(V);
175ff0cc061SDimitry Andric         if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
176ff0cc061SDimitry Andric           Worklist.push_back(I);
177ff0cc061SDimitry Andric       }
178ff0cc061SDimitry Andric     }
179ff0cc061SDimitry Andric   }
180ff0cc061SDimitry Andric 
1814ba319b5SDimitry Andric   /// Clones the original loop.
182ff0cc061SDimitry Andric   ///
183ff0cc061SDimitry Andric   /// Updates LoopInfo and DominatorTree using the information that block \p
184ff0cc061SDimitry Andric   /// LoopDomBB dominates the loop.
cloneLoopWithPreheader(BasicBlock * InsertBefore,BasicBlock * LoopDomBB,unsigned Index,LoopInfo * LI,DominatorTree * DT)185ff0cc061SDimitry Andric   Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
186ff0cc061SDimitry Andric                                unsigned Index, LoopInfo *LI,
187ff0cc061SDimitry Andric                                DominatorTree *DT) {
188ff0cc061SDimitry Andric     ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
189ff0cc061SDimitry Andric                                           VMap, Twine(".ldist") + Twine(Index),
190ff0cc061SDimitry Andric                                           LI, DT, ClonedLoopBlocks);
191ff0cc061SDimitry Andric     return ClonedLoop;
192ff0cc061SDimitry Andric   }
193ff0cc061SDimitry Andric 
1944ba319b5SDimitry Andric   /// The cloned loop.  If this partition is mapped to the original loop,
195ff0cc061SDimitry Andric   /// this is null.
getClonedLoop() const196ff0cc061SDimitry Andric   const Loop *getClonedLoop() const { return ClonedLoop; }
197ff0cc061SDimitry Andric 
1984ba319b5SDimitry Andric   /// Returns the loop where this partition ends up after distribution.
199ff0cc061SDimitry Andric   /// If this partition is mapped to the original loop then use the block from
200ff0cc061SDimitry Andric   /// the loop.
getDistributedLoop() const201*b5893f02SDimitry Andric   Loop *getDistributedLoop() const {
202ff0cc061SDimitry Andric     return ClonedLoop ? ClonedLoop : OrigLoop;
203ff0cc061SDimitry Andric   }
204ff0cc061SDimitry Andric 
2054ba319b5SDimitry Andric   /// The VMap that is populated by cloning and then used in
206ff0cc061SDimitry Andric   /// remapinstruction to remap the cloned instructions.
getVMap()207ff0cc061SDimitry Andric   ValueToValueMapTy &getVMap() { return VMap; }
208ff0cc061SDimitry Andric 
2094ba319b5SDimitry Andric   /// Remaps the cloned instructions using VMap.
remapInstructions()210875ed548SDimitry Andric   void remapInstructions() {
211875ed548SDimitry Andric     remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
212875ed548SDimitry Andric   }
213ff0cc061SDimitry Andric 
2144ba319b5SDimitry Andric   /// Based on the set of instructions selected for this partition,
215ff0cc061SDimitry Andric   /// removes the unnecessary ones.
removeUnusedInsts()216ff0cc061SDimitry Andric   void removeUnusedInsts() {
217ff0cc061SDimitry Andric     SmallVector<Instruction *, 8> Unused;
218ff0cc061SDimitry Andric 
219ff0cc061SDimitry Andric     for (auto *Block : OrigLoop->getBlocks())
220ff0cc061SDimitry Andric       for (auto &Inst : *Block)
221ff0cc061SDimitry Andric         if (!Set.count(&Inst)) {
222ff0cc061SDimitry Andric           Instruction *NewInst = &Inst;
223ff0cc061SDimitry Andric           if (!VMap.empty())
224ff0cc061SDimitry Andric             NewInst = cast<Instruction>(VMap[NewInst]);
225ff0cc061SDimitry Andric 
226ff0cc061SDimitry Andric           assert(!isa<BranchInst>(NewInst) &&
227ff0cc061SDimitry Andric                  "Branches are marked used early on");
228ff0cc061SDimitry Andric           Unused.push_back(NewInst);
229ff0cc061SDimitry Andric         }
230ff0cc061SDimitry Andric 
231ff0cc061SDimitry Andric     // Delete the instructions backwards, as it has a reduced likelihood of
232ff0cc061SDimitry Andric     // having to update as many def-use and use-def chains.
2333ca95b02SDimitry Andric     for (auto *Inst : reverse(Unused)) {
234ff0cc061SDimitry Andric       if (!Inst->use_empty())
235ff0cc061SDimitry Andric         Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
236ff0cc061SDimitry Andric       Inst->eraseFromParent();
237ff0cc061SDimitry Andric     }
238ff0cc061SDimitry Andric   }
239ff0cc061SDimitry Andric 
print() const240ff0cc061SDimitry Andric   void print() const {
241ff0cc061SDimitry Andric     if (DepCycle)
242ff0cc061SDimitry Andric       dbgs() << "  (cycle)\n";
243ff0cc061SDimitry Andric     for (auto *I : Set)
244ff0cc061SDimitry Andric       // Prefix with the block name.
245ff0cc061SDimitry Andric       dbgs() << "  " << I->getParent()->getName() << ":" << *I << "\n";
246ff0cc061SDimitry Andric   }
247ff0cc061SDimitry Andric 
printBlocks() const248ff0cc061SDimitry Andric   void printBlocks() const {
249ff0cc061SDimitry Andric     for (auto *BB : getDistributedLoop()->getBlocks())
250ff0cc061SDimitry Andric       dbgs() << *BB;
251ff0cc061SDimitry Andric   }
252ff0cc061SDimitry Andric 
253ff0cc061SDimitry Andric private:
2544ba319b5SDimitry Andric   /// Instructions from OrigLoop selected for this partition.
255ff0cc061SDimitry Andric   InstructionSet Set;
256ff0cc061SDimitry Andric 
2574ba319b5SDimitry Andric   /// Whether this partition contains a dependence cycle.
258ff0cc061SDimitry Andric   bool DepCycle;
259ff0cc061SDimitry Andric 
2604ba319b5SDimitry Andric   /// The original loop.
261ff0cc061SDimitry Andric   Loop *OrigLoop;
262ff0cc061SDimitry Andric 
2634ba319b5SDimitry Andric   /// The cloned loop.  If this partition is mapped to the original loop,
264ff0cc061SDimitry Andric   /// this is null.
2652cab237bSDimitry Andric   Loop *ClonedLoop = nullptr;
266ff0cc061SDimitry Andric 
2674ba319b5SDimitry Andric   /// The blocks of ClonedLoop including the preheader.  If this
268ff0cc061SDimitry Andric   /// partition is mapped to the original loop, this is empty.
269ff0cc061SDimitry Andric   SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
270ff0cc061SDimitry Andric 
2714ba319b5SDimitry Andric   /// These gets populated once the set of instructions have been
272ff0cc061SDimitry Andric   /// finalized. If this partition is mapped to the original loop, these are not
273ff0cc061SDimitry Andric   /// set.
274ff0cc061SDimitry Andric   ValueToValueMapTy VMap;
275ff0cc061SDimitry Andric };
276ff0cc061SDimitry Andric 
2774ba319b5SDimitry Andric /// Holds the set of Partitions.  It populates them, merges them and then
278ff0cc061SDimitry Andric /// clones the loops.
279ff0cc061SDimitry Andric class InstPartitionContainer {
2802cab237bSDimitry Andric   using InstToPartitionIdT = DenseMap<Instruction *, int>;
281ff0cc061SDimitry Andric 
282ff0cc061SDimitry Andric public:
InstPartitionContainer(Loop * L,LoopInfo * LI,DominatorTree * DT)283ff0cc061SDimitry Andric   InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
284ff0cc061SDimitry Andric       : L(L), LI(LI), DT(DT) {}
285ff0cc061SDimitry Andric 
2864ba319b5SDimitry Andric   /// Returns the number of partitions.
getSize() const287ff0cc061SDimitry Andric   unsigned getSize() const { return PartitionContainer.size(); }
288ff0cc061SDimitry Andric 
2894ba319b5SDimitry Andric   /// Adds \p Inst into the current partition if that is marked to
290ff0cc061SDimitry Andric   /// contain cycles.  Otherwise start a new partition for it.
addToCyclicPartition(Instruction * Inst)291ff0cc061SDimitry Andric   void addToCyclicPartition(Instruction *Inst) {
292ff0cc061SDimitry Andric     // If the current partition is non-cyclic.  Start a new one.
293ff0cc061SDimitry Andric     if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
294ff0cc061SDimitry Andric       PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
295ff0cc061SDimitry Andric     else
296ff0cc061SDimitry Andric       PartitionContainer.back().add(Inst);
297ff0cc061SDimitry Andric   }
298ff0cc061SDimitry Andric 
2994ba319b5SDimitry Andric   /// Adds \p Inst into a partition that is not marked to contain
300ff0cc061SDimitry Andric   /// dependence cycles.
301ff0cc061SDimitry Andric   ///
302ff0cc061SDimitry Andric   //  Initially we isolate memory instructions into as many partitions as
303ff0cc061SDimitry Andric   //  possible, then later we may merge them back together.
addToNewNonCyclicPartition(Instruction * Inst)304ff0cc061SDimitry Andric   void addToNewNonCyclicPartition(Instruction *Inst) {
305ff0cc061SDimitry Andric     PartitionContainer.emplace_back(Inst, L);
306ff0cc061SDimitry Andric   }
307ff0cc061SDimitry Andric 
3084ba319b5SDimitry Andric   /// Merges adjacent non-cyclic partitions.
309ff0cc061SDimitry Andric   ///
310ff0cc061SDimitry Andric   /// The idea is that we currently only want to isolate the non-vectorizable
311ff0cc061SDimitry Andric   /// partition.  We could later allow more distribution among these partition
312ff0cc061SDimitry Andric   /// too.
mergeAdjacentNonCyclic()313ff0cc061SDimitry Andric   void mergeAdjacentNonCyclic() {
314ff0cc061SDimitry Andric     mergeAdjacentPartitionsIf(
315ff0cc061SDimitry Andric         [](const InstPartition *P) { return !P->hasDepCycle(); });
316ff0cc061SDimitry Andric   }
317ff0cc061SDimitry Andric 
3184ba319b5SDimitry Andric   /// If a partition contains only conditional stores, we won't vectorize
319ff0cc061SDimitry Andric   /// it.  Try to merge it with a previous cyclic partition.
mergeNonIfConvertible()320ff0cc061SDimitry Andric   void mergeNonIfConvertible() {
321ff0cc061SDimitry Andric     mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
322ff0cc061SDimitry Andric       if (Partition->hasDepCycle())
323ff0cc061SDimitry Andric         return true;
324ff0cc061SDimitry Andric 
325ff0cc061SDimitry Andric       // Now, check if all stores are conditional in this partition.
326ff0cc061SDimitry Andric       bool seenStore = false;
327ff0cc061SDimitry Andric 
328ff0cc061SDimitry Andric       for (auto *Inst : *Partition)
329ff0cc061SDimitry Andric         if (isa<StoreInst>(Inst)) {
330ff0cc061SDimitry Andric           seenStore = true;
331ff0cc061SDimitry Andric           if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
332ff0cc061SDimitry Andric             return false;
333ff0cc061SDimitry Andric         }
334ff0cc061SDimitry Andric       return seenStore;
335ff0cc061SDimitry Andric     });
336ff0cc061SDimitry Andric   }
337ff0cc061SDimitry Andric 
3384ba319b5SDimitry Andric   /// Merges the partitions according to various heuristics.
mergeBeforePopulating()339ff0cc061SDimitry Andric   void mergeBeforePopulating() {
340ff0cc061SDimitry Andric     mergeAdjacentNonCyclic();
341ff0cc061SDimitry Andric     if (!DistributeNonIfConvertible)
342ff0cc061SDimitry Andric       mergeNonIfConvertible();
343ff0cc061SDimitry Andric   }
344ff0cc061SDimitry Andric 
3454ba319b5SDimitry Andric   /// Merges partitions in order to ensure that no loads are duplicated.
346ff0cc061SDimitry Andric   ///
347ff0cc061SDimitry Andric   /// We can't duplicate loads because that could potentially reorder them.
348ff0cc061SDimitry Andric   /// LoopAccessAnalysis provides dependency information with the context that
349ff0cc061SDimitry Andric   /// the order of memory operation is preserved.
350ff0cc061SDimitry Andric   ///
351ff0cc061SDimitry Andric   /// Return if any partitions were merged.
mergeToAvoidDuplicatedLoads()352ff0cc061SDimitry Andric   bool mergeToAvoidDuplicatedLoads() {
3532cab237bSDimitry Andric     using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
3542cab237bSDimitry Andric     using ToBeMergedT = EquivalenceClasses<InstPartition *>;
355ff0cc061SDimitry Andric 
356ff0cc061SDimitry Andric     LoadToPartitionT LoadToPartition;
357ff0cc061SDimitry Andric     ToBeMergedT ToBeMerged;
358ff0cc061SDimitry Andric 
359ff0cc061SDimitry Andric     // Step through the partitions and create equivalence between partitions
360ff0cc061SDimitry Andric     // that contain the same load.  Also put partitions in between them in the
361ff0cc061SDimitry Andric     // same equivalence class to avoid reordering of memory operations.
362ff0cc061SDimitry Andric     for (PartitionContainerT::iterator I = PartitionContainer.begin(),
363ff0cc061SDimitry Andric                                        E = PartitionContainer.end();
364ff0cc061SDimitry Andric          I != E; ++I) {
365ff0cc061SDimitry Andric       auto *PartI = &*I;
366ff0cc061SDimitry Andric 
367ff0cc061SDimitry Andric       // If a load occurs in two partitions PartI and PartJ, merge all
368ff0cc061SDimitry Andric       // partitions (PartI, PartJ] into PartI.
369ff0cc061SDimitry Andric       for (Instruction *Inst : *PartI)
370ff0cc061SDimitry Andric         if (isa<LoadInst>(Inst)) {
371ff0cc061SDimitry Andric           bool NewElt;
372ff0cc061SDimitry Andric           LoadToPartitionT::iterator LoadToPart;
373ff0cc061SDimitry Andric 
374ff0cc061SDimitry Andric           std::tie(LoadToPart, NewElt) =
375ff0cc061SDimitry Andric               LoadToPartition.insert(std::make_pair(Inst, PartI));
376ff0cc061SDimitry Andric           if (!NewElt) {
3774ba319b5SDimitry Andric             LLVM_DEBUG(dbgs()
3784ba319b5SDimitry Andric                        << "Merging partitions due to this load in multiple "
3794ba319b5SDimitry Andric                        << "partitions: " << PartI << ", " << LoadToPart->second
3804ba319b5SDimitry Andric                        << "\n"
3814ba319b5SDimitry Andric                        << *Inst << "\n");
382ff0cc061SDimitry Andric 
383ff0cc061SDimitry Andric             auto PartJ = I;
384ff0cc061SDimitry Andric             do {
385ff0cc061SDimitry Andric               --PartJ;
386ff0cc061SDimitry Andric               ToBeMerged.unionSets(PartI, &*PartJ);
387ff0cc061SDimitry Andric             } while (&*PartJ != LoadToPart->second);
388ff0cc061SDimitry Andric           }
389ff0cc061SDimitry Andric         }
390ff0cc061SDimitry Andric     }
391ff0cc061SDimitry Andric     if (ToBeMerged.empty())
392ff0cc061SDimitry Andric       return false;
393ff0cc061SDimitry Andric 
394ff0cc061SDimitry Andric     // Merge the member of an equivalence class into its class leader.  This
395ff0cc061SDimitry Andric     // makes the members empty.
396ff0cc061SDimitry Andric     for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
397ff0cc061SDimitry Andric          I != E; ++I) {
398ff0cc061SDimitry Andric       if (!I->isLeader())
399ff0cc061SDimitry Andric         continue;
400ff0cc061SDimitry Andric 
401ff0cc061SDimitry Andric       auto PartI = I->getData();
402ff0cc061SDimitry Andric       for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
403ff0cc061SDimitry Andric                                    ToBeMerged.member_end())) {
404ff0cc061SDimitry Andric         PartJ->moveTo(*PartI);
405ff0cc061SDimitry Andric       }
406ff0cc061SDimitry Andric     }
407ff0cc061SDimitry Andric 
408ff0cc061SDimitry Andric     // Remove the empty partitions.
409ff0cc061SDimitry Andric     PartitionContainer.remove_if(
410ff0cc061SDimitry Andric         [](const InstPartition &P) { return P.empty(); });
411ff0cc061SDimitry Andric 
412ff0cc061SDimitry Andric     return true;
413ff0cc061SDimitry Andric   }
414ff0cc061SDimitry Andric 
4154ba319b5SDimitry Andric   /// Sets up the mapping between instructions to partitions.  If the
416ff0cc061SDimitry Andric   /// instruction is duplicated across multiple partitions, set the entry to -1.
setupPartitionIdOnInstructions()417ff0cc061SDimitry Andric   void setupPartitionIdOnInstructions() {
418ff0cc061SDimitry Andric     int PartitionID = 0;
419ff0cc061SDimitry Andric     for (const auto &Partition : PartitionContainer) {
420ff0cc061SDimitry Andric       for (Instruction *Inst : Partition) {
421ff0cc061SDimitry Andric         bool NewElt;
422ff0cc061SDimitry Andric         InstToPartitionIdT::iterator Iter;
423ff0cc061SDimitry Andric 
424ff0cc061SDimitry Andric         std::tie(Iter, NewElt) =
425ff0cc061SDimitry Andric             InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
426ff0cc061SDimitry Andric         if (!NewElt)
427ff0cc061SDimitry Andric           Iter->second = -1;
428ff0cc061SDimitry Andric       }
429ff0cc061SDimitry Andric       ++PartitionID;
430ff0cc061SDimitry Andric     }
431ff0cc061SDimitry Andric   }
432ff0cc061SDimitry Andric 
4334ba319b5SDimitry Andric   /// Populates the partition with everything that the seeding
434ff0cc061SDimitry Andric   /// instructions require.
populateUsedSet()435ff0cc061SDimitry Andric   void populateUsedSet() {
436ff0cc061SDimitry Andric     for (auto &P : PartitionContainer)
437ff0cc061SDimitry Andric       P.populateUsedSet();
438ff0cc061SDimitry Andric   }
439ff0cc061SDimitry Andric 
4404ba319b5SDimitry Andric   /// This performs the main chunk of the work of cloning the loops for
441ff0cc061SDimitry Andric   /// the partitions.
cloneLoops()4427d523365SDimitry Andric   void cloneLoops() {
443ff0cc061SDimitry Andric     BasicBlock *OrigPH = L->getLoopPreheader();
444ff0cc061SDimitry Andric     // At this point the predecessor of the preheader is either the memcheck
445ff0cc061SDimitry Andric     // block or the top part of the original preheader.
446ff0cc061SDimitry Andric     BasicBlock *Pred = OrigPH->getSinglePredecessor();
447ff0cc061SDimitry Andric     assert(Pred && "Preheader does not have a single predecessor");
448ff0cc061SDimitry Andric     BasicBlock *ExitBlock = L->getExitBlock();
449ff0cc061SDimitry Andric     assert(ExitBlock && "No single exit block");
450ff0cc061SDimitry Andric     Loop *NewLoop;
451ff0cc061SDimitry Andric 
452ff0cc061SDimitry Andric     assert(!PartitionContainer.empty() && "at least two partitions expected");
453ff0cc061SDimitry Andric     // We're cloning the preheader along with the loop so we already made sure
454ff0cc061SDimitry Andric     // it was empty.
455ff0cc061SDimitry Andric     assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
456ff0cc061SDimitry Andric            "preheader not empty");
457ff0cc061SDimitry Andric 
458*b5893f02SDimitry Andric     // Preserve the original loop ID for use after the transformation.
459*b5893f02SDimitry Andric     MDNode *OrigLoopID = L->getLoopID();
460*b5893f02SDimitry Andric 
461ff0cc061SDimitry Andric     // Create a loop for each partition except the last.  Clone the original
462ff0cc061SDimitry Andric     // loop before PH along with adding a preheader for the cloned loop.  Then
463ff0cc061SDimitry Andric     // update PH to point to the newly added preheader.
464ff0cc061SDimitry Andric     BasicBlock *TopPH = OrigPH;
465ff0cc061SDimitry Andric     unsigned Index = getSize() - 1;
466ff0cc061SDimitry Andric     for (auto I = std::next(PartitionContainer.rbegin()),
467ff0cc061SDimitry Andric               E = PartitionContainer.rend();
468ff0cc061SDimitry Andric          I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
469ff0cc061SDimitry Andric       auto *Part = &*I;
470ff0cc061SDimitry Andric 
471ff0cc061SDimitry Andric       NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
472ff0cc061SDimitry Andric 
473ff0cc061SDimitry Andric       Part->getVMap()[ExitBlock] = TopPH;
474ff0cc061SDimitry Andric       Part->remapInstructions();
475*b5893f02SDimitry Andric       setNewLoopID(OrigLoopID, Part);
476ff0cc061SDimitry Andric     }
477ff0cc061SDimitry Andric     Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
478ff0cc061SDimitry Andric 
479*b5893f02SDimitry Andric     // Also set a new loop ID for the last loop.
480*b5893f02SDimitry Andric     setNewLoopID(OrigLoopID, &PartitionContainer.back());
481*b5893f02SDimitry Andric 
482ff0cc061SDimitry Andric     // Now go in forward order and update the immediate dominator for the
483ff0cc061SDimitry Andric     // preheaders with the exiting block of the previous loop.  Dominance
484ff0cc061SDimitry Andric     // within the loop is updated in cloneLoopWithPreheader.
485ff0cc061SDimitry Andric     for (auto Curr = PartitionContainer.cbegin(),
486ff0cc061SDimitry Andric               Next = std::next(PartitionContainer.cbegin()),
487ff0cc061SDimitry Andric               E = PartitionContainer.cend();
488ff0cc061SDimitry Andric          Next != E; ++Curr, ++Next)
489ff0cc061SDimitry Andric       DT->changeImmediateDominator(
490ff0cc061SDimitry Andric           Next->getDistributedLoop()->getLoopPreheader(),
491ff0cc061SDimitry Andric           Curr->getDistributedLoop()->getExitingBlock());
492ff0cc061SDimitry Andric   }
493ff0cc061SDimitry Andric 
4944ba319b5SDimitry Andric   /// Removes the dead instructions from the cloned loops.
removeUnusedInsts()495ff0cc061SDimitry Andric   void removeUnusedInsts() {
496ff0cc061SDimitry Andric     for (auto &Partition : PartitionContainer)
497ff0cc061SDimitry Andric       Partition.removeUnusedInsts();
498ff0cc061SDimitry Andric   }
499ff0cc061SDimitry Andric 
5004ba319b5SDimitry Andric   /// For each memory pointer, it computes the partitionId the pointer is
501ff0cc061SDimitry Andric   /// used in.
502ff0cc061SDimitry Andric   ///
503ff0cc061SDimitry Andric   /// This returns an array of int where the I-th entry corresponds to I-th
504ff0cc061SDimitry Andric   /// entry in LAI.getRuntimePointerCheck().  If the pointer is used in multiple
505ff0cc061SDimitry Andric   /// partitions its entry is set to -1.
506ff0cc061SDimitry Andric   SmallVector<int, 8>
computePartitionSetForPointers(const LoopAccessInfo & LAI)507ff0cc061SDimitry Andric   computePartitionSetForPointers(const LoopAccessInfo &LAI) {
508875ed548SDimitry Andric     const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
509ff0cc061SDimitry Andric 
510ff0cc061SDimitry Andric     unsigned N = RtPtrCheck->Pointers.size();
511ff0cc061SDimitry Andric     SmallVector<int, 8> PtrToPartitions(N);
512ff0cc061SDimitry Andric     for (unsigned I = 0; I < N; ++I) {
513875ed548SDimitry Andric       Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
514ff0cc061SDimitry Andric       auto Instructions =
515875ed548SDimitry Andric           LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
516ff0cc061SDimitry Andric 
517ff0cc061SDimitry Andric       int &Partition = PtrToPartitions[I];
518ff0cc061SDimitry Andric       // First set it to uninitialized.
519ff0cc061SDimitry Andric       Partition = -2;
520ff0cc061SDimitry Andric       for (Instruction *Inst : Instructions) {
521ff0cc061SDimitry Andric         // Note that this could be -1 if Inst is duplicated across multiple
522ff0cc061SDimitry Andric         // partitions.
523ff0cc061SDimitry Andric         int ThisPartition = this->InstToPartitionId[Inst];
524ff0cc061SDimitry Andric         if (Partition == -2)
525ff0cc061SDimitry Andric           Partition = ThisPartition;
526ff0cc061SDimitry Andric         // -1 means belonging to multiple partitions.
527ff0cc061SDimitry Andric         else if (Partition == -1)
528ff0cc061SDimitry Andric           break;
529ff0cc061SDimitry Andric         else if (Partition != (int)ThisPartition)
530ff0cc061SDimitry Andric           Partition = -1;
531ff0cc061SDimitry Andric       }
532ff0cc061SDimitry Andric       assert(Partition != -2 && "Pointer not belonging to any partition");
533ff0cc061SDimitry Andric     }
534ff0cc061SDimitry Andric 
535ff0cc061SDimitry Andric     return PtrToPartitions;
536ff0cc061SDimitry Andric   }
537ff0cc061SDimitry Andric 
print(raw_ostream & OS) const538ff0cc061SDimitry Andric   void print(raw_ostream &OS) const {
539ff0cc061SDimitry Andric     unsigned Index = 0;
540ff0cc061SDimitry Andric     for (const auto &P : PartitionContainer) {
541ff0cc061SDimitry Andric       OS << "Partition " << Index++ << " (" << &P << "):\n";
542ff0cc061SDimitry Andric       P.print();
543ff0cc061SDimitry Andric     }
544ff0cc061SDimitry Andric   }
545ff0cc061SDimitry Andric 
dump() const546ff0cc061SDimitry Andric   void dump() const { print(dbgs()); }
547ff0cc061SDimitry Andric 
548ff0cc061SDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & OS,const InstPartitionContainer & Partitions)549ff0cc061SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS,
550ff0cc061SDimitry Andric                                  const InstPartitionContainer &Partitions) {
551ff0cc061SDimitry Andric     Partitions.print(OS);
552ff0cc061SDimitry Andric     return OS;
553ff0cc061SDimitry Andric   }
554ff0cc061SDimitry Andric #endif
555ff0cc061SDimitry Andric 
printBlocks() const556ff0cc061SDimitry Andric   void printBlocks() const {
557ff0cc061SDimitry Andric     unsigned Index = 0;
558ff0cc061SDimitry Andric     for (const auto &P : PartitionContainer) {
559ff0cc061SDimitry Andric       dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
560ff0cc061SDimitry Andric       P.printBlocks();
561ff0cc061SDimitry Andric     }
562ff0cc061SDimitry Andric   }
563ff0cc061SDimitry Andric 
564ff0cc061SDimitry Andric private:
5652cab237bSDimitry Andric   using PartitionContainerT = std::list<InstPartition>;
566ff0cc061SDimitry Andric 
5674ba319b5SDimitry Andric   /// List of partitions.
568ff0cc061SDimitry Andric   PartitionContainerT PartitionContainer;
569ff0cc061SDimitry Andric 
5704ba319b5SDimitry Andric   /// Mapping from Instruction to partition Id.  If the instruction
571ff0cc061SDimitry Andric   /// belongs to multiple partitions the entry contains -1.
572ff0cc061SDimitry Andric   InstToPartitionIdT InstToPartitionId;
573ff0cc061SDimitry Andric 
574ff0cc061SDimitry Andric   Loop *L;
575ff0cc061SDimitry Andric   LoopInfo *LI;
576ff0cc061SDimitry Andric   DominatorTree *DT;
577ff0cc061SDimitry Andric 
5784ba319b5SDimitry Andric   /// The control structure to merge adjacent partitions if both satisfy
579ff0cc061SDimitry Andric   /// the \p Predicate.
580ff0cc061SDimitry Andric   template <class UnaryPredicate>
mergeAdjacentPartitionsIf(UnaryPredicate Predicate)581ff0cc061SDimitry Andric   void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
582ff0cc061SDimitry Andric     InstPartition *PrevMatch = nullptr;
583ff0cc061SDimitry Andric     for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
584ff0cc061SDimitry Andric       auto DoesMatch = Predicate(&*I);
585ff0cc061SDimitry Andric       if (PrevMatch == nullptr && DoesMatch) {
586ff0cc061SDimitry Andric         PrevMatch = &*I;
587ff0cc061SDimitry Andric         ++I;
588ff0cc061SDimitry Andric       } else if (PrevMatch != nullptr && DoesMatch) {
589ff0cc061SDimitry Andric         I->moveTo(*PrevMatch);
590ff0cc061SDimitry Andric         I = PartitionContainer.erase(I);
591ff0cc061SDimitry Andric       } else {
592ff0cc061SDimitry Andric         PrevMatch = nullptr;
593ff0cc061SDimitry Andric         ++I;
594ff0cc061SDimitry Andric       }
595ff0cc061SDimitry Andric     }
596ff0cc061SDimitry Andric   }
597*b5893f02SDimitry Andric 
598*b5893f02SDimitry Andric   /// Assign new LoopIDs for the partition's cloned loop.
setNewLoopID(MDNode * OrigLoopID,InstPartition * Part)599*b5893f02SDimitry Andric   void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
600*b5893f02SDimitry Andric     Optional<MDNode *> PartitionID = makeFollowupLoopID(
601*b5893f02SDimitry Andric         OrigLoopID,
602*b5893f02SDimitry Andric         {LLVMLoopDistributeFollowupAll,
603*b5893f02SDimitry Andric          Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
604*b5893f02SDimitry Andric                              : LLVMLoopDistributeFollowupCoincident});
605*b5893f02SDimitry Andric     if (PartitionID.hasValue()) {
606*b5893f02SDimitry Andric       Loop *NewLoop = Part->getDistributedLoop();
607*b5893f02SDimitry Andric       NewLoop->setLoopID(PartitionID.getValue());
608*b5893f02SDimitry Andric     }
609*b5893f02SDimitry Andric   }
610ff0cc061SDimitry Andric };
611ff0cc061SDimitry Andric 
6124ba319b5SDimitry Andric /// For each memory instruction, this class maintains difference of the
613ff0cc061SDimitry Andric /// number of unsafe dependences that start out from this instruction minus
614ff0cc061SDimitry Andric /// those that end here.
615ff0cc061SDimitry Andric ///
616ff0cc061SDimitry Andric /// By traversing the memory instructions in program order and accumulating this
617ff0cc061SDimitry Andric /// number, we know whether any unsafe dependence crosses over a program point.
618ff0cc061SDimitry Andric class MemoryInstructionDependences {
6192cab237bSDimitry Andric   using Dependence = MemoryDepChecker::Dependence;
620ff0cc061SDimitry Andric 
621ff0cc061SDimitry Andric public:
622ff0cc061SDimitry Andric   struct Entry {
623ff0cc061SDimitry Andric     Instruction *Inst;
6242cab237bSDimitry Andric     unsigned NumUnsafeDependencesStartOrEnd = 0;
625ff0cc061SDimitry Andric 
Entry__anonfcfe86310111::MemoryInstructionDependences::Entry6262cab237bSDimitry Andric     Entry(Instruction *Inst) : Inst(Inst) {}
627ff0cc061SDimitry Andric   };
628ff0cc061SDimitry Andric 
6292cab237bSDimitry Andric   using AccessesType = SmallVector<Entry, 8>;
630ff0cc061SDimitry Andric 
begin() const631ff0cc061SDimitry Andric   AccessesType::const_iterator begin() const { return Accesses.begin(); }
end() const632ff0cc061SDimitry Andric   AccessesType::const_iterator end() const { return Accesses.end(); }
633ff0cc061SDimitry Andric 
MemoryInstructionDependences(const SmallVectorImpl<Instruction * > & Instructions,const SmallVectorImpl<Dependence> & Dependences)634ff0cc061SDimitry Andric   MemoryInstructionDependences(
635ff0cc061SDimitry Andric       const SmallVectorImpl<Instruction *> &Instructions,
6367d523365SDimitry Andric       const SmallVectorImpl<Dependence> &Dependences) {
637ff0cc061SDimitry Andric     Accesses.append(Instructions.begin(), Instructions.end());
638ff0cc061SDimitry Andric 
6394ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Backward dependences:\n");
6407d523365SDimitry Andric     for (auto &Dep : Dependences)
641ff0cc061SDimitry Andric       if (Dep.isPossiblyBackward()) {
642ff0cc061SDimitry Andric         // Note that the designations source and destination follow the program
643ff0cc061SDimitry Andric         // order, i.e. source is always first.  (The direction is given by the
644ff0cc061SDimitry Andric         // DepType.)
645ff0cc061SDimitry Andric         ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
646ff0cc061SDimitry Andric         --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
647ff0cc061SDimitry Andric 
6484ba319b5SDimitry Andric         LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
649ff0cc061SDimitry Andric       }
650ff0cc061SDimitry Andric   }
651ff0cc061SDimitry Andric 
652ff0cc061SDimitry Andric private:
653ff0cc061SDimitry Andric   AccessesType Accesses;
654ff0cc061SDimitry Andric };
655ff0cc061SDimitry Andric 
6564ba319b5SDimitry Andric /// The actual class performing the per-loop work.
6573ca95b02SDimitry Andric class LoopDistributeForLoop {
658ff0cc061SDimitry Andric public:
LoopDistributeForLoop(Loop * L,Function * F,LoopInfo * LI,DominatorTree * DT,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)6593ca95b02SDimitry Andric   LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
6603ca95b02SDimitry Andric                         ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
6612cab237bSDimitry Andric       : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
6623ca95b02SDimitry Andric     setForced();
663ff0cc061SDimitry Andric   }
664ff0cc061SDimitry Andric 
6654ba319b5SDimitry Andric   /// Try to distribute an inner-most loop.
processLoop(std::function<const LoopAccessInfo & (Loop &)> & GetLAA)6663ca95b02SDimitry Andric   bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
6673ca95b02SDimitry Andric     assert(L->empty() && "Only process inner loops.");
668ff0cc061SDimitry Andric 
6694ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nLDist: In \""
6704ba319b5SDimitry Andric                       << L->getHeader()->getParent()->getName()
6713ca95b02SDimitry Andric                       << "\" checking " << *L << "\n");
672ff0cc061SDimitry Andric 
6733ca95b02SDimitry Andric     if (!L->getExitBlock())
674d88c1a5aSDimitry Andric       return fail("MultipleExitBlocks", "multiple exit blocks");
675d88c1a5aSDimitry Andric     if (!L->isLoopSimplifyForm())
676d88c1a5aSDimitry Andric       return fail("NotLoopSimplifyForm",
677d88c1a5aSDimitry Andric                   "loop is not in loop-simplify form");
678d88c1a5aSDimitry Andric 
679d88c1a5aSDimitry Andric     BasicBlock *PH = L->getLoopPreheader();
680ff0cc061SDimitry Andric 
6813ca95b02SDimitry Andric     // LAA will check that we only have a single exiting block.
6823ca95b02SDimitry Andric     LAI = &GetLAA(*L);
683ff0cc061SDimitry Andric 
6843ca95b02SDimitry Andric     // Currently, we only distribute to isolate the part of the loop with
6853ca95b02SDimitry Andric     // dependence cycles to enable partial vectorization.
6863ca95b02SDimitry Andric     if (LAI->canVectorizeMemory())
687d88c1a5aSDimitry Andric       return fail("MemOpsCanBeVectorized",
688d88c1a5aSDimitry Andric                   "memory operations are safe for vectorization");
6893ca95b02SDimitry Andric 
6903ca95b02SDimitry Andric     auto *Dependences = LAI->getDepChecker().getDependences();
6913ca95b02SDimitry Andric     if (!Dependences || Dependences->empty())
692d88c1a5aSDimitry Andric       return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
6933ca95b02SDimitry Andric 
6943ca95b02SDimitry Andric     InstPartitionContainer Partitions(L, LI, DT);
6953ca95b02SDimitry Andric 
6963ca95b02SDimitry Andric     // First, go through each memory operation and assign them to consecutive
6973ca95b02SDimitry Andric     // partitions (the order of partitions follows program order).  Put those
6983ca95b02SDimitry Andric     // with unsafe dependences into "cyclic" partition otherwise put each store
6993ca95b02SDimitry Andric     // in its own "non-cyclic" partition (we'll merge these later).
7003ca95b02SDimitry Andric     //
7013ca95b02SDimitry Andric     // Note that a memory operation (e.g. Load2 below) at a program point that
7023ca95b02SDimitry Andric     // has an unsafe dependence (Store3->Load1) spanning over it must be
7033ca95b02SDimitry Andric     // included in the same cyclic partition as the dependent operations.  This
7043ca95b02SDimitry Andric     // is to preserve the original program order after distribution.  E.g.:
7053ca95b02SDimitry Andric     //
7063ca95b02SDimitry Andric     //                NumUnsafeDependencesStartOrEnd  NumUnsafeDependencesActive
7073ca95b02SDimitry Andric     //  Load1   -.                     1                       0->1
7083ca95b02SDimitry Andric     //  Load2    | /Unsafe/            0                       1
7093ca95b02SDimitry Andric     //  Store3  -'                    -1                       1->0
7103ca95b02SDimitry Andric     //  Load4                          0                       0
7113ca95b02SDimitry Andric     //
7123ca95b02SDimitry Andric     // NumUnsafeDependencesActive > 0 indicates this situation and in this case
7133ca95b02SDimitry Andric     // we just keep assigning to the same cyclic partition until
7143ca95b02SDimitry Andric     // NumUnsafeDependencesActive reaches 0.
7153ca95b02SDimitry Andric     const MemoryDepChecker &DepChecker = LAI->getDepChecker();
7163ca95b02SDimitry Andric     MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
7173ca95b02SDimitry Andric                                      *Dependences);
7183ca95b02SDimitry Andric 
7193ca95b02SDimitry Andric     int NumUnsafeDependencesActive = 0;
7203ca95b02SDimitry Andric     for (auto &InstDep : MID) {
7213ca95b02SDimitry Andric       Instruction *I = InstDep.Inst;
7223ca95b02SDimitry Andric       // We update NumUnsafeDependencesActive post-instruction, catch the
7233ca95b02SDimitry Andric       // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
7243ca95b02SDimitry Andric       if (NumUnsafeDependencesActive ||
7253ca95b02SDimitry Andric           InstDep.NumUnsafeDependencesStartOrEnd > 0)
7263ca95b02SDimitry Andric         Partitions.addToCyclicPartition(I);
7273ca95b02SDimitry Andric       else
7283ca95b02SDimitry Andric         Partitions.addToNewNonCyclicPartition(I);
7293ca95b02SDimitry Andric       NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
7303ca95b02SDimitry Andric       assert(NumUnsafeDependencesActive >= 0 &&
7313ca95b02SDimitry Andric              "Negative number of dependences active");
732ff0cc061SDimitry Andric     }
733ff0cc061SDimitry Andric 
7343ca95b02SDimitry Andric     // Add partitions for values used outside.  These partitions can be out of
7353ca95b02SDimitry Andric     // order from the original program order.  This is OK because if the
7363ca95b02SDimitry Andric     // partition uses a load we will merge this partition with the original
7373ca95b02SDimitry Andric     // partition of the load that we set up in the previous loop (see
7383ca95b02SDimitry Andric     // mergeToAvoidDuplicatedLoads).
7393ca95b02SDimitry Andric     auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
7403ca95b02SDimitry Andric     for (auto *Inst : DefsUsedOutside)
7413ca95b02SDimitry Andric       Partitions.addToNewNonCyclicPartition(Inst);
7423ca95b02SDimitry Andric 
7434ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
7443ca95b02SDimitry Andric     if (Partitions.getSize() < 2)
745d88c1a5aSDimitry Andric       return fail("CantIsolateUnsafeDeps",
746d88c1a5aSDimitry Andric                   "cannot isolate unsafe dependencies");
7473ca95b02SDimitry Andric 
7483ca95b02SDimitry Andric     // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
7493ca95b02SDimitry Andric     // should be able to vectorize these together.
7503ca95b02SDimitry Andric     Partitions.mergeBeforePopulating();
7514ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
7523ca95b02SDimitry Andric     if (Partitions.getSize() < 2)
753d88c1a5aSDimitry Andric       return fail("CantIsolateUnsafeDeps",
754d88c1a5aSDimitry Andric                   "cannot isolate unsafe dependencies");
7553ca95b02SDimitry Andric 
7563ca95b02SDimitry Andric     // Now, populate the partitions with non-memory operations.
7573ca95b02SDimitry Andric     Partitions.populateUsedSet();
7584ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
7593ca95b02SDimitry Andric 
7603ca95b02SDimitry Andric     // In order to preserve original lexical order for loads, keep them in the
7613ca95b02SDimitry Andric     // partition that we set up in the MemoryInstructionDependences loop.
7623ca95b02SDimitry Andric     if (Partitions.mergeToAvoidDuplicatedLoads()) {
7634ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
7643ca95b02SDimitry Andric                         << Partitions);
7653ca95b02SDimitry Andric       if (Partitions.getSize() < 2)
766d88c1a5aSDimitry Andric         return fail("CantIsolateUnsafeDeps",
767d88c1a5aSDimitry Andric                     "cannot isolate unsafe dependencies");
768ff0cc061SDimitry Andric     }
769ff0cc061SDimitry Andric 
7703ca95b02SDimitry Andric     // Don't distribute the loop if we need too many SCEV run-time checks.
7713ca95b02SDimitry Andric     const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
7723ca95b02SDimitry Andric     if (Pred.getComplexity() > (IsForced.getValueOr(false)
7733ca95b02SDimitry Andric                                     ? PragmaDistributeSCEVCheckThreshold
7743ca95b02SDimitry Andric                                     : DistributeSCEVCheckThreshold))
775d88c1a5aSDimitry Andric       return fail("TooManySCEVRuntimeChecks",
776d88c1a5aSDimitry Andric                   "too many SCEV run-time checks needed.\n");
7773ca95b02SDimitry Andric 
778*b5893f02SDimitry Andric     if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
779*b5893f02SDimitry Andric       return fail("HeuristicDisabled", "distribution heuristic disabled");
780*b5893f02SDimitry Andric 
7814ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
7823ca95b02SDimitry Andric     // We're done forming the partitions set up the reverse mapping from
7833ca95b02SDimitry Andric     // instructions to partitions.
7843ca95b02SDimitry Andric     Partitions.setupPartitionIdOnInstructions();
7853ca95b02SDimitry Andric 
7863ca95b02SDimitry Andric     // To keep things simple have an empty preheader before we version or clone
7873ca95b02SDimitry Andric     // the loop.  (Also split if this has no predecessor, i.e. entry, because we
7883ca95b02SDimitry Andric     // rely on PH having a predecessor.)
7893ca95b02SDimitry Andric     if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
7903ca95b02SDimitry Andric       SplitBlock(PH, PH->getTerminator(), DT, LI);
7913ca95b02SDimitry Andric 
7923ca95b02SDimitry Andric     // If we need run-time checks, version the loop now.
7933ca95b02SDimitry Andric     auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
7943ca95b02SDimitry Andric     const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
7953ca95b02SDimitry Andric     const auto &AllChecks = RtPtrChecking->getChecks();
7963ca95b02SDimitry Andric     auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
7973ca95b02SDimitry Andric                                                   RtPtrChecking);
7983ca95b02SDimitry Andric 
7993ca95b02SDimitry Andric     if (!Pred.isAlwaysTrue() || !Checks.empty()) {
800*b5893f02SDimitry Andric       MDNode *OrigLoopID = L->getLoopID();
801*b5893f02SDimitry Andric 
8024ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "\nPointers:\n");
8034ba319b5SDimitry Andric       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
8043ca95b02SDimitry Andric       LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
8053ca95b02SDimitry Andric       LVer.setAliasChecks(std::move(Checks));
8063ca95b02SDimitry Andric       LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
8073ca95b02SDimitry Andric       LVer.versionLoop(DefsUsedOutside);
8083ca95b02SDimitry Andric       LVer.annotateLoopWithNoAlias();
809*b5893f02SDimitry Andric 
810*b5893f02SDimitry Andric       // The unversioned loop will not be changed, so we inherit all attributes
811*b5893f02SDimitry Andric       // from the original loop, but remove the loop distribution metadata to
812*b5893f02SDimitry Andric       // avoid to distribute it again.
813*b5893f02SDimitry Andric       MDNode *UnversionedLoopID =
814*b5893f02SDimitry Andric           makeFollowupLoopID(OrigLoopID,
815*b5893f02SDimitry Andric                              {LLVMLoopDistributeFollowupAll,
816*b5893f02SDimitry Andric                               LLVMLoopDistributeFollowupFallback},
817*b5893f02SDimitry Andric                              "llvm.loop.distribute.", true)
818*b5893f02SDimitry Andric               .getValue();
819*b5893f02SDimitry Andric       LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
8203ca95b02SDimitry Andric     }
8213ca95b02SDimitry Andric 
8223ca95b02SDimitry Andric     // Create identical copies of the original loop for each partition and hook
8233ca95b02SDimitry Andric     // them up sequentially.
8243ca95b02SDimitry Andric     Partitions.cloneLoops();
8253ca95b02SDimitry Andric 
8263ca95b02SDimitry Andric     // Now, we remove the instruction from each loop that don't belong to that
8273ca95b02SDimitry Andric     // partition.
8283ca95b02SDimitry Andric     Partitions.removeUnusedInsts();
8294ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
8304ba319b5SDimitry Andric     LLVM_DEBUG(Partitions.printBlocks());
8313ca95b02SDimitry Andric 
8323ca95b02SDimitry Andric     if (LDistVerify) {
833d88c1a5aSDimitry Andric       LI->verify(*DT);
8344ba319b5SDimitry Andric       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
8353ca95b02SDimitry Andric     }
8363ca95b02SDimitry Andric 
8373ca95b02SDimitry Andric     ++NumLoopsDistributed;
8383ca95b02SDimitry Andric     // Report the success.
8392cab237bSDimitry Andric     ORE->emit([&]() {
8402cab237bSDimitry Andric       return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
841d88c1a5aSDimitry Andric                                 L->getHeader())
8422cab237bSDimitry Andric              << "distributed loop";
8432cab237bSDimitry Andric     });
8443ca95b02SDimitry Andric     return true;
8453ca95b02SDimitry Andric   }
8463ca95b02SDimitry Andric 
8474ba319b5SDimitry Andric   /// Provide diagnostics then \return with false.
fail(StringRef RemarkName,StringRef Message)848d88c1a5aSDimitry Andric   bool fail(StringRef RemarkName, StringRef Message) {
8493ca95b02SDimitry Andric     LLVMContext &Ctx = F->getContext();
8503ca95b02SDimitry Andric     bool Forced = isForced().getValueOr(false);
8513ca95b02SDimitry Andric 
8524ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
8533ca95b02SDimitry Andric 
8543ca95b02SDimitry Andric     // With Rpass-missed report that distribution failed.
8552cab237bSDimitry Andric     ORE->emit([&]() {
8562cab237bSDimitry Andric       return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
8572cab237bSDimitry Andric                                       L->getStartLoc(), L->getHeader())
8582cab237bSDimitry Andric              << "loop not distributed: use -Rpass-analysis=loop-distribute for "
8592cab237bSDimitry Andric                 "more "
8602cab237bSDimitry Andric                 "info";
8612cab237bSDimitry Andric     });
8623ca95b02SDimitry Andric 
8633ca95b02SDimitry Andric     // With Rpass-analysis report why.  This is on by default if distribution
8643ca95b02SDimitry Andric     // was requested explicitly.
865d88c1a5aSDimitry Andric     ORE->emit(OptimizationRemarkAnalysis(
866d88c1a5aSDimitry Andric                   Forced ? OptimizationRemarkAnalysis::AlwaysPrint : LDIST_NAME,
867d88c1a5aSDimitry Andric                   RemarkName, L->getStartLoc(), L->getHeader())
868d88c1a5aSDimitry Andric               << "loop not distributed: " << Message);
8693ca95b02SDimitry Andric 
8703ca95b02SDimitry Andric     // Also issue a warning if distribution was requested explicitly but it
8713ca95b02SDimitry Andric     // failed.
8723ca95b02SDimitry Andric     if (Forced)
8733ca95b02SDimitry Andric       Ctx.diagnose(DiagnosticInfoOptimizationFailure(
8743ca95b02SDimitry Andric           *F, L->getStartLoc(), "loop not distributed: failed "
8753ca95b02SDimitry Andric                                 "explicitly specified loop distribution"));
8763ca95b02SDimitry Andric 
8773ca95b02SDimitry Andric     return false;
8783ca95b02SDimitry Andric   }
8793ca95b02SDimitry Andric 
8804ba319b5SDimitry Andric   /// Return if distribution forced to be enabled/disabled for the loop.
8813ca95b02SDimitry Andric   ///
8823ca95b02SDimitry Andric   /// If the optional has a value, it indicates whether distribution was forced
8833ca95b02SDimitry Andric   /// to be enabled (true) or disabled (false).  If the optional has no value
8843ca95b02SDimitry Andric   /// distribution was not forced either way.
isForced() const8853ca95b02SDimitry Andric   const Optional<bool> &isForced() const { return IsForced; }
886ff0cc061SDimitry Andric 
887ff0cc061SDimitry Andric private:
8884ba319b5SDimitry Andric   /// Filter out checks between pointers from the same partition.
8897d523365SDimitry Andric   ///
8907d523365SDimitry Andric   /// \p PtrToPartition contains the partition number for pointers.  Partition
8917d523365SDimitry Andric   /// number -1 means that the pointer is used in multiple partitions.  In this
8927d523365SDimitry Andric   /// case we can't safely omit the check.
8937d523365SDimitry Andric   SmallVector<RuntimePointerChecking::PointerCheck, 4>
includeOnlyCrossPartitionChecks(const SmallVectorImpl<RuntimePointerChecking::PointerCheck> & AllChecks,const SmallVectorImpl<int> & PtrToPartition,const RuntimePointerChecking * RtPtrChecking)8947d523365SDimitry Andric   includeOnlyCrossPartitionChecks(
8957d523365SDimitry Andric       const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
8967d523365SDimitry Andric       const SmallVectorImpl<int> &PtrToPartition,
8977d523365SDimitry Andric       const RuntimePointerChecking *RtPtrChecking) {
8987d523365SDimitry Andric     SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
8997d523365SDimitry Andric 
9007a7e6055SDimitry Andric     copy_if(AllChecks, std::back_inserter(Checks),
9017d523365SDimitry Andric             [&](const RuntimePointerChecking::PointerCheck &Check) {
9027d523365SDimitry Andric               for (unsigned PtrIdx1 : Check.first->Members)
9037d523365SDimitry Andric                 for (unsigned PtrIdx2 : Check.second->Members)
9047d523365SDimitry Andric                   // Only include this check if there is a pair of pointers
9057d523365SDimitry Andric                   // that require checking and the pointers fall into
9067d523365SDimitry Andric                   // separate partitions.
9077d523365SDimitry Andric                   //
9087d523365SDimitry Andric                   // (Note that we already know at this point that the two
9097d523365SDimitry Andric                   // pointer groups need checking but it doesn't follow
9107d523365SDimitry Andric                   // that each pair of pointers within the two groups need
9117d523365SDimitry Andric                   // checking as well.
9127d523365SDimitry Andric                   //
9137d523365SDimitry Andric                   // In other words we don't want to include a check just
9147d523365SDimitry Andric                   // because there is a pair of pointers between the two
9157d523365SDimitry Andric                   // pointer groups that require checks and a different
9167d523365SDimitry Andric                   // pair whose pointers fall into different partitions.)
9177d523365SDimitry Andric                   if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
9187d523365SDimitry Andric                       !RuntimePointerChecking::arePointersInSamePartition(
9197d523365SDimitry Andric                           PtrToPartition, PtrIdx1, PtrIdx2))
9207d523365SDimitry Andric                     return true;
9217d523365SDimitry Andric               return false;
9227d523365SDimitry Andric             });
9237d523365SDimitry Andric 
9247d523365SDimitry Andric     return Checks;
9257d523365SDimitry Andric   }
9267d523365SDimitry Andric 
9274ba319b5SDimitry Andric   /// Check whether the loop metadata is forcing distribution to be
9283ca95b02SDimitry Andric   /// enabled/disabled.
setForced()9293ca95b02SDimitry Andric   void setForced() {
9303ca95b02SDimitry Andric     Optional<const MDOperand *> Value =
9313ca95b02SDimitry Andric         findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
9323ca95b02SDimitry Andric     if (!Value)
9333ca95b02SDimitry Andric       return;
934ff0cc061SDimitry Andric 
9353ca95b02SDimitry Andric     const MDOperand *Op = *Value;
9363ca95b02SDimitry Andric     assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
9373ca95b02SDimitry Andric     IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
938ff0cc061SDimitry Andric   }
939ff0cc061SDimitry Andric 
9403ca95b02SDimitry Andric   Loop *L;
9413ca95b02SDimitry Andric   Function *F;
942ff0cc061SDimitry Andric 
943ff0cc061SDimitry Andric   // Analyses used.
944ff0cc061SDimitry Andric   LoopInfo *LI;
9452cab237bSDimitry Andric   const LoopAccessInfo *LAI = nullptr;
946ff0cc061SDimitry Andric   DominatorTree *DT;
9477d523365SDimitry Andric   ScalarEvolution *SE;
9483ca95b02SDimitry Andric   OptimizationRemarkEmitter *ORE;
9493ca95b02SDimitry Andric 
9504ba319b5SDimitry Andric   /// Indicates whether distribution is forced to be enabled/disabled for
9513ca95b02SDimitry Andric   /// the loop.
9523ca95b02SDimitry Andric   ///
9533ca95b02SDimitry Andric   /// If the optional has a value, it indicates whether distribution was forced
9543ca95b02SDimitry Andric   /// to be enabled (true) or disabled (false).  If the optional has no value
9553ca95b02SDimitry Andric   /// distribution was not forced either way.
9563ca95b02SDimitry Andric   Optional<bool> IsForced;
9573ca95b02SDimitry Andric };
9583ca95b02SDimitry Andric 
9592cab237bSDimitry Andric } // end anonymous namespace
9602cab237bSDimitry Andric 
9613ca95b02SDimitry Andric /// Shared implementation between new and old PMs.
runImpl(Function & F,LoopInfo * LI,DominatorTree * DT,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE,std::function<const LoopAccessInfo & (Loop &)> & GetLAA)9623ca95b02SDimitry Andric static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
9633ca95b02SDimitry Andric                     ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
964d88c1a5aSDimitry Andric                     std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
9653ca95b02SDimitry Andric   // Build up a worklist of inner-loops to vectorize. This is necessary as the
9663ca95b02SDimitry Andric   // act of distributing a loop creates new loops and can invalidate iterators
9673ca95b02SDimitry Andric   // across the loops.
9683ca95b02SDimitry Andric   SmallVector<Loop *, 8> Worklist;
9693ca95b02SDimitry Andric 
9703ca95b02SDimitry Andric   for (Loop *TopLevelLoop : *LI)
9713ca95b02SDimitry Andric     for (Loop *L : depth_first(TopLevelLoop))
9723ca95b02SDimitry Andric       // We only handle inner-most loops.
9733ca95b02SDimitry Andric       if (L->empty())
9743ca95b02SDimitry Andric         Worklist.push_back(L);
9753ca95b02SDimitry Andric 
9763ca95b02SDimitry Andric   // Now walk the identified inner loops.
9773ca95b02SDimitry Andric   bool Changed = false;
9783ca95b02SDimitry Andric   for (Loop *L : Worklist) {
9793ca95b02SDimitry Andric     LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
9803ca95b02SDimitry Andric 
9813ca95b02SDimitry Andric     // If distribution was forced for the specific loop to be
9823ca95b02SDimitry Andric     // enabled/disabled, follow that.  Otherwise use the global flag.
983d88c1a5aSDimitry Andric     if (LDL.isForced().getValueOr(EnableLoopDistribute))
9843ca95b02SDimitry Andric       Changed |= LDL.processLoop(GetLAA);
9853ca95b02SDimitry Andric   }
9863ca95b02SDimitry Andric 
9873ca95b02SDimitry Andric   // Process each loop nest in the function.
9883ca95b02SDimitry Andric   return Changed;
9893ca95b02SDimitry Andric }
9903ca95b02SDimitry Andric 
9912cab237bSDimitry Andric namespace {
9922cab237bSDimitry Andric 
9934ba319b5SDimitry Andric /// The pass class.
9943ca95b02SDimitry Andric class LoopDistributeLegacy : public FunctionPass {
9953ca95b02SDimitry Andric public:
9962cab237bSDimitry Andric   static char ID;
9972cab237bSDimitry Andric 
LoopDistributeLegacy()998d88c1a5aSDimitry Andric   LoopDistributeLegacy() : FunctionPass(ID) {
9993ca95b02SDimitry Andric     // The default is set by the caller.
10003ca95b02SDimitry Andric     initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
10013ca95b02SDimitry Andric   }
10023ca95b02SDimitry Andric 
runOnFunction(Function & F)10033ca95b02SDimitry Andric   bool runOnFunction(Function &F) override {
10043ca95b02SDimitry Andric     if (skipFunction(F))
10053ca95b02SDimitry Andric       return false;
10063ca95b02SDimitry Andric 
10073ca95b02SDimitry Andric     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
10083ca95b02SDimitry Andric     auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
10093ca95b02SDimitry Andric     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
10103ca95b02SDimitry Andric     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
10113ca95b02SDimitry Andric     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
10123ca95b02SDimitry Andric     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
10133ca95b02SDimitry Andric         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
10143ca95b02SDimitry Andric 
1015d88c1a5aSDimitry Andric     return runImpl(F, LI, DT, SE, ORE, GetLAA);
10163ca95b02SDimitry Andric   }
10173ca95b02SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const10183ca95b02SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
10193ca95b02SDimitry Andric     AU.addRequired<ScalarEvolutionWrapperPass>();
10203ca95b02SDimitry Andric     AU.addRequired<LoopInfoWrapperPass>();
10213ca95b02SDimitry Andric     AU.addPreserved<LoopInfoWrapperPass>();
10223ca95b02SDimitry Andric     AU.addRequired<LoopAccessLegacyAnalysis>();
10233ca95b02SDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
10243ca95b02SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
10253ca95b02SDimitry Andric     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1026d88c1a5aSDimitry Andric     AU.addPreserved<GlobalsAAWrapperPass>();
10273ca95b02SDimitry Andric   }
1028ff0cc061SDimitry Andric };
10292cab237bSDimitry Andric 
10302cab237bSDimitry Andric } // end anonymous namespace
1031ff0cc061SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)10323ca95b02SDimitry Andric PreservedAnalyses LoopDistributePass::run(Function &F,
10333ca95b02SDimitry Andric                                           FunctionAnalysisManager &AM) {
10343ca95b02SDimitry Andric   auto &LI = AM.getResult<LoopAnalysis>(F);
10353ca95b02SDimitry Andric   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
10363ca95b02SDimitry Andric   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
10373ca95b02SDimitry Andric   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
10383ca95b02SDimitry Andric 
1039f1a29dd3SDimitry Andric   // We don't directly need these analyses but they're required for loop
1040f1a29dd3SDimitry Andric   // analyses so provide them below.
1041f1a29dd3SDimitry Andric   auto &AA = AM.getResult<AAManager>(F);
1042f1a29dd3SDimitry Andric   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1043f1a29dd3SDimitry Andric   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1044f1a29dd3SDimitry Andric   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1045f1a29dd3SDimitry Andric 
10463ca95b02SDimitry Andric   auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
10473ca95b02SDimitry Andric   std::function<const LoopAccessInfo &(Loop &)> GetLAA =
10483ca95b02SDimitry Andric       [&](Loop &L) -> const LoopAccessInfo & {
10492cab237bSDimitry Andric     LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1050f1a29dd3SDimitry Andric     return LAM.getResult<LoopAccessAnalysis>(L, AR);
10513ca95b02SDimitry Andric   };
10523ca95b02SDimitry Andric 
1053d88c1a5aSDimitry Andric   bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
10543ca95b02SDimitry Andric   if (!Changed)
10553ca95b02SDimitry Andric     return PreservedAnalyses::all();
10563ca95b02SDimitry Andric   PreservedAnalyses PA;
10573ca95b02SDimitry Andric   PA.preserve<LoopAnalysis>();
10583ca95b02SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
1059d88c1a5aSDimitry Andric   PA.preserve<GlobalsAA>();
10603ca95b02SDimitry Andric   return PA;
10613ca95b02SDimitry Andric }
10623ca95b02SDimitry Andric 
10633ca95b02SDimitry Andric char LoopDistributeLegacy::ID;
10642cab237bSDimitry Andric 
1065d88c1a5aSDimitry Andric static const char ldist_name[] = "Loop Distribution";
1066ff0cc061SDimitry Andric 
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy,LDIST_NAME,ldist_name,false,false)10673ca95b02SDimitry Andric INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
10683ca95b02SDimitry Andric                       false)
1069ff0cc061SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
10703ca95b02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
1071ff0cc061SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
10727d523365SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
10733ca95b02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
10743ca95b02SDimitry Andric INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1075ff0cc061SDimitry Andric 
10762cab237bSDimitry Andric FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
1077