1ff0cc061SDimitry Andric //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2ff0cc061SDimitry Andric //
3ff0cc061SDimitry Andric // The LLVM Compiler Infrastructure
4ff0cc061SDimitry Andric //
5ff0cc061SDimitry Andric // This file is distributed under the University of Illinois Open Source
6ff0cc061SDimitry Andric // License. See LICENSE.TXT for details.
7ff0cc061SDimitry Andric //
8ff0cc061SDimitry Andric //===----------------------------------------------------------------------===//
9ff0cc061SDimitry Andric //
10ff0cc061SDimitry Andric // This file implements the Loop Distribution Pass. Its main focus is to
11ff0cc061SDimitry Andric // distribute loops that cannot be vectorized due to dependence cycles. It
12ff0cc061SDimitry Andric // tries to isolate the offending dependences into a new loop allowing
13ff0cc061SDimitry Andric // vectorization of the remaining parts.
14ff0cc061SDimitry Andric //
15ff0cc061SDimitry Andric // For dependence analysis, the pass uses the LoopVectorizer's
16ff0cc061SDimitry Andric // LoopAccessAnalysis. Because this analysis presumes no change in the order of
17ff0cc061SDimitry Andric // memory operations, special care is taken to preserve the lexical order of
18ff0cc061SDimitry Andric // these operations.
19ff0cc061SDimitry Andric //
20ff0cc061SDimitry Andric // Similarly to the Vectorizer, the pass also supports loop versioning to
21ff0cc061SDimitry Andric // run-time disambiguate potentially overlapping arrays.
22ff0cc061SDimitry Andric //
23ff0cc061SDimitry Andric //===----------------------------------------------------------------------===//
24ff0cc061SDimitry Andric
253ca95b02SDimitry Andric #include "llvm/Transforms/Scalar/LoopDistribute.h"
262cab237bSDimitry Andric #include "llvm/ADT/DenseMap.h"
27ff0cc061SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
28ff0cc061SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h"
292cab237bSDimitry Andric #include "llvm/ADT/Optional.h"
30ff0cc061SDimitry Andric #include "llvm/ADT/STLExtras.h"
312cab237bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
322cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
33ff0cc061SDimitry Andric #include "llvm/ADT/Statistic.h"
342cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
352cab237bSDimitry Andric #include "llvm/ADT/Twine.h"
362cab237bSDimitry Andric #include "llvm/ADT/iterator_range.h"
372cab237bSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
382cab237bSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
39d88c1a5aSDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
40ff0cc061SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h"
412cab237bSDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
42ff0cc061SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
432cab237bSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
442cab237bSDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
452cab237bSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
462cab237bSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
472cab237bSDimitry Andric #include "llvm/IR/BasicBlock.h"
482cab237bSDimitry Andric #include "llvm/IR/Constants.h"
493ca95b02SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
50ff0cc061SDimitry Andric #include "llvm/IR/Dominators.h"
512cab237bSDimitry Andric #include "llvm/IR/Function.h"
522cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
532cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
542cab237bSDimitry Andric #include "llvm/IR/Instructions.h"
552cab237bSDimitry Andric #include "llvm/IR/LLVMContext.h"
562cab237bSDimitry Andric #include "llvm/IR/Metadata.h"
572cab237bSDimitry Andric #include "llvm/IR/PassManager.h"
582cab237bSDimitry Andric #include "llvm/IR/Value.h"
59ff0cc061SDimitry Andric #include "llvm/Pass.h"
602cab237bSDimitry Andric #include "llvm/Support/Casting.h"
61ff0cc061SDimitry Andric #include "llvm/Support/CommandLine.h"
62ff0cc061SDimitry Andric #include "llvm/Support/Debug.h"
632cab237bSDimitry Andric #include "llvm/Support/raw_ostream.h"
642cab237bSDimitry Andric #include "llvm/Transforms/Scalar.h"
65ff0cc061SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
66ff0cc061SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
677d523365SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
68875ed548SDimitry Andric #include "llvm/Transforms/Utils/LoopVersioning.h"
692cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
702cab237bSDimitry Andric #include <cassert>
712cab237bSDimitry Andric #include <functional>
72ff0cc061SDimitry Andric #include <list>
732cab237bSDimitry Andric #include <tuple>
742cab237bSDimitry Andric #include <utility>
752cab237bSDimitry Andric
762cab237bSDimitry Andric using namespace llvm;
77ff0cc061SDimitry Andric
78ff0cc061SDimitry Andric #define LDIST_NAME "loop-distribute"
79ff0cc061SDimitry Andric #define DEBUG_TYPE LDIST_NAME
80ff0cc061SDimitry Andric
81*b5893f02SDimitry Andric /// @{
82*b5893f02SDimitry Andric /// Metadata attribute names
83*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupAll =
84*b5893f02SDimitry Andric "llvm.loop.distribute.followup_all";
85*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupCoincident =
86*b5893f02SDimitry Andric "llvm.loop.distribute.followup_coincident";
87*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupSequential =
88*b5893f02SDimitry Andric "llvm.loop.distribute.followup_sequential";
89*b5893f02SDimitry Andric static const char *const LLVMLoopDistributeFollowupFallback =
90*b5893f02SDimitry Andric "llvm.loop.distribute.followup_fallback";
91*b5893f02SDimitry Andric /// @}
92*b5893f02SDimitry Andric
93ff0cc061SDimitry Andric static cl::opt<bool>
94ff0cc061SDimitry Andric LDistVerify("loop-distribute-verify", cl::Hidden,
95ff0cc061SDimitry Andric cl::desc("Turn on DominatorTree and LoopInfo verification "
96ff0cc061SDimitry Andric "after Loop Distribution"),
97ff0cc061SDimitry Andric cl::init(false));
98ff0cc061SDimitry Andric
99ff0cc061SDimitry Andric static cl::opt<bool> DistributeNonIfConvertible(
100ff0cc061SDimitry Andric "loop-distribute-non-if-convertible", cl::Hidden,
101ff0cc061SDimitry Andric cl::desc("Whether to distribute into a loop that may not be "
102ff0cc061SDimitry Andric "if-convertible by the loop vectorizer"),
103ff0cc061SDimitry Andric cl::init(false));
104ff0cc061SDimitry Andric
1057d523365SDimitry Andric static cl::opt<unsigned> DistributeSCEVCheckThreshold(
1067d523365SDimitry Andric "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
1077d523365SDimitry Andric cl::desc("The maximum number of SCEV checks allowed for Loop "
1087d523365SDimitry Andric "Distribution"));
1097d523365SDimitry Andric
1103ca95b02SDimitry Andric static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
1113ca95b02SDimitry Andric "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
1123ca95b02SDimitry Andric cl::Hidden,
1133ca95b02SDimitry Andric cl::desc(
1143ca95b02SDimitry Andric "The maximum number of SCEV checks allowed for Loop "
1153ca95b02SDimitry Andric "Distribution for loop marked with #pragma loop distribute(enable)"));
1163ca95b02SDimitry Andric
1173ca95b02SDimitry Andric static cl::opt<bool> EnableLoopDistribute(
1183ca95b02SDimitry Andric "enable-loop-distribute", cl::Hidden,
119d88c1a5aSDimitry Andric cl::desc("Enable the new, experimental LoopDistribution Pass"),
120d88c1a5aSDimitry Andric cl::init(false));
1213ca95b02SDimitry Andric
122ff0cc061SDimitry Andric STATISTIC(NumLoopsDistributed, "Number of loops distributed");
123ff0cc061SDimitry Andric
124ff0cc061SDimitry Andric namespace {
1252cab237bSDimitry Andric
1264ba319b5SDimitry Andric /// Maintains the set of instructions of the loop for a partition before
127ff0cc061SDimitry Andric /// cloning. After cloning, it hosts the new loop.
128ff0cc061SDimitry Andric class InstPartition {
1292cab237bSDimitry Andric using InstructionSet = SmallPtrSet<Instruction *, 8>;
130ff0cc061SDimitry Andric
131ff0cc061SDimitry Andric public:
InstPartition(Instruction * I,Loop * L,bool DepCycle=false)132ff0cc061SDimitry Andric InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
1332cab237bSDimitry Andric : DepCycle(DepCycle), OrigLoop(L) {
134ff0cc061SDimitry Andric Set.insert(I);
135ff0cc061SDimitry Andric }
136ff0cc061SDimitry Andric
1374ba319b5SDimitry Andric /// Returns whether this partition contains a dependence cycle.
hasDepCycle() const138ff0cc061SDimitry Andric bool hasDepCycle() const { return DepCycle; }
139ff0cc061SDimitry Andric
1404ba319b5SDimitry Andric /// Adds an instruction to this partition.
add(Instruction * I)141ff0cc061SDimitry Andric void add(Instruction *I) { Set.insert(I); }
142ff0cc061SDimitry Andric
1434ba319b5SDimitry Andric /// Collection accessors.
begin()144ff0cc061SDimitry Andric InstructionSet::iterator begin() { return Set.begin(); }
end()145ff0cc061SDimitry Andric InstructionSet::iterator end() { return Set.end(); }
begin() const146ff0cc061SDimitry Andric InstructionSet::const_iterator begin() const { return Set.begin(); }
end() const147ff0cc061SDimitry Andric InstructionSet::const_iterator end() const { return Set.end(); }
empty() const148ff0cc061SDimitry Andric bool empty() const { return Set.empty(); }
149ff0cc061SDimitry Andric
1504ba319b5SDimitry Andric /// Moves this partition into \p Other. This partition becomes empty
151ff0cc061SDimitry Andric /// after this.
moveTo(InstPartition & Other)152ff0cc061SDimitry Andric void moveTo(InstPartition &Other) {
153ff0cc061SDimitry Andric Other.Set.insert(Set.begin(), Set.end());
154ff0cc061SDimitry Andric Set.clear();
155ff0cc061SDimitry Andric Other.DepCycle |= DepCycle;
156ff0cc061SDimitry Andric }
157ff0cc061SDimitry Andric
1584ba319b5SDimitry Andric /// Populates the partition with a transitive closure of all the
159ff0cc061SDimitry Andric /// instructions that the seeded instructions dependent on.
populateUsedSet()160ff0cc061SDimitry Andric void populateUsedSet() {
161ff0cc061SDimitry Andric // FIXME: We currently don't use control-dependence but simply include all
162ff0cc061SDimitry Andric // blocks (possibly empty at the end) and let simplifycfg mostly clean this
163ff0cc061SDimitry Andric // up.
164ff0cc061SDimitry Andric for (auto *B : OrigLoop->getBlocks())
165ff0cc061SDimitry Andric Set.insert(B->getTerminator());
166ff0cc061SDimitry Andric
167ff0cc061SDimitry Andric // Follow the use-def chains to form a transitive closure of all the
168ff0cc061SDimitry Andric // instructions that the originally seeded instructions depend on.
169ff0cc061SDimitry Andric SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
170ff0cc061SDimitry Andric while (!Worklist.empty()) {
171ff0cc061SDimitry Andric Instruction *I = Worklist.pop_back_val();
172ff0cc061SDimitry Andric // Insert instructions from the loop that we depend on.
173ff0cc061SDimitry Andric for (Value *V : I->operand_values()) {
174ff0cc061SDimitry Andric auto *I = dyn_cast<Instruction>(V);
175ff0cc061SDimitry Andric if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
176ff0cc061SDimitry Andric Worklist.push_back(I);
177ff0cc061SDimitry Andric }
178ff0cc061SDimitry Andric }
179ff0cc061SDimitry Andric }
180ff0cc061SDimitry Andric
1814ba319b5SDimitry Andric /// Clones the original loop.
182ff0cc061SDimitry Andric ///
183ff0cc061SDimitry Andric /// Updates LoopInfo and DominatorTree using the information that block \p
184ff0cc061SDimitry Andric /// LoopDomBB dominates the loop.
cloneLoopWithPreheader(BasicBlock * InsertBefore,BasicBlock * LoopDomBB,unsigned Index,LoopInfo * LI,DominatorTree * DT)185ff0cc061SDimitry Andric Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
186ff0cc061SDimitry Andric unsigned Index, LoopInfo *LI,
187ff0cc061SDimitry Andric DominatorTree *DT) {
188ff0cc061SDimitry Andric ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
189ff0cc061SDimitry Andric VMap, Twine(".ldist") + Twine(Index),
190ff0cc061SDimitry Andric LI, DT, ClonedLoopBlocks);
191ff0cc061SDimitry Andric return ClonedLoop;
192ff0cc061SDimitry Andric }
193ff0cc061SDimitry Andric
1944ba319b5SDimitry Andric /// The cloned loop. If this partition is mapped to the original loop,
195ff0cc061SDimitry Andric /// this is null.
getClonedLoop() const196ff0cc061SDimitry Andric const Loop *getClonedLoop() const { return ClonedLoop; }
197ff0cc061SDimitry Andric
1984ba319b5SDimitry Andric /// Returns the loop where this partition ends up after distribution.
199ff0cc061SDimitry Andric /// If this partition is mapped to the original loop then use the block from
200ff0cc061SDimitry Andric /// the loop.
getDistributedLoop() const201*b5893f02SDimitry Andric Loop *getDistributedLoop() const {
202ff0cc061SDimitry Andric return ClonedLoop ? ClonedLoop : OrigLoop;
203ff0cc061SDimitry Andric }
204ff0cc061SDimitry Andric
2054ba319b5SDimitry Andric /// The VMap that is populated by cloning and then used in
206ff0cc061SDimitry Andric /// remapinstruction to remap the cloned instructions.
getVMap()207ff0cc061SDimitry Andric ValueToValueMapTy &getVMap() { return VMap; }
208ff0cc061SDimitry Andric
2094ba319b5SDimitry Andric /// Remaps the cloned instructions using VMap.
remapInstructions()210875ed548SDimitry Andric void remapInstructions() {
211875ed548SDimitry Andric remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
212875ed548SDimitry Andric }
213ff0cc061SDimitry Andric
2144ba319b5SDimitry Andric /// Based on the set of instructions selected for this partition,
215ff0cc061SDimitry Andric /// removes the unnecessary ones.
removeUnusedInsts()216ff0cc061SDimitry Andric void removeUnusedInsts() {
217ff0cc061SDimitry Andric SmallVector<Instruction *, 8> Unused;
218ff0cc061SDimitry Andric
219ff0cc061SDimitry Andric for (auto *Block : OrigLoop->getBlocks())
220ff0cc061SDimitry Andric for (auto &Inst : *Block)
221ff0cc061SDimitry Andric if (!Set.count(&Inst)) {
222ff0cc061SDimitry Andric Instruction *NewInst = &Inst;
223ff0cc061SDimitry Andric if (!VMap.empty())
224ff0cc061SDimitry Andric NewInst = cast<Instruction>(VMap[NewInst]);
225ff0cc061SDimitry Andric
226ff0cc061SDimitry Andric assert(!isa<BranchInst>(NewInst) &&
227ff0cc061SDimitry Andric "Branches are marked used early on");
228ff0cc061SDimitry Andric Unused.push_back(NewInst);
229ff0cc061SDimitry Andric }
230ff0cc061SDimitry Andric
231ff0cc061SDimitry Andric // Delete the instructions backwards, as it has a reduced likelihood of
232ff0cc061SDimitry Andric // having to update as many def-use and use-def chains.
2333ca95b02SDimitry Andric for (auto *Inst : reverse(Unused)) {
234ff0cc061SDimitry Andric if (!Inst->use_empty())
235ff0cc061SDimitry Andric Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
236ff0cc061SDimitry Andric Inst->eraseFromParent();
237ff0cc061SDimitry Andric }
238ff0cc061SDimitry Andric }
239ff0cc061SDimitry Andric
print() const240ff0cc061SDimitry Andric void print() const {
241ff0cc061SDimitry Andric if (DepCycle)
242ff0cc061SDimitry Andric dbgs() << " (cycle)\n";
243ff0cc061SDimitry Andric for (auto *I : Set)
244ff0cc061SDimitry Andric // Prefix with the block name.
245ff0cc061SDimitry Andric dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
246ff0cc061SDimitry Andric }
247ff0cc061SDimitry Andric
printBlocks() const248ff0cc061SDimitry Andric void printBlocks() const {
249ff0cc061SDimitry Andric for (auto *BB : getDistributedLoop()->getBlocks())
250ff0cc061SDimitry Andric dbgs() << *BB;
251ff0cc061SDimitry Andric }
252ff0cc061SDimitry Andric
253ff0cc061SDimitry Andric private:
2544ba319b5SDimitry Andric /// Instructions from OrigLoop selected for this partition.
255ff0cc061SDimitry Andric InstructionSet Set;
256ff0cc061SDimitry Andric
2574ba319b5SDimitry Andric /// Whether this partition contains a dependence cycle.
258ff0cc061SDimitry Andric bool DepCycle;
259ff0cc061SDimitry Andric
2604ba319b5SDimitry Andric /// The original loop.
261ff0cc061SDimitry Andric Loop *OrigLoop;
262ff0cc061SDimitry Andric
2634ba319b5SDimitry Andric /// The cloned loop. If this partition is mapped to the original loop,
264ff0cc061SDimitry Andric /// this is null.
2652cab237bSDimitry Andric Loop *ClonedLoop = nullptr;
266ff0cc061SDimitry Andric
2674ba319b5SDimitry Andric /// The blocks of ClonedLoop including the preheader. If this
268ff0cc061SDimitry Andric /// partition is mapped to the original loop, this is empty.
269ff0cc061SDimitry Andric SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
270ff0cc061SDimitry Andric
2714ba319b5SDimitry Andric /// These gets populated once the set of instructions have been
272ff0cc061SDimitry Andric /// finalized. If this partition is mapped to the original loop, these are not
273ff0cc061SDimitry Andric /// set.
274ff0cc061SDimitry Andric ValueToValueMapTy VMap;
275ff0cc061SDimitry Andric };
276ff0cc061SDimitry Andric
2774ba319b5SDimitry Andric /// Holds the set of Partitions. It populates them, merges them and then
278ff0cc061SDimitry Andric /// clones the loops.
279ff0cc061SDimitry Andric class InstPartitionContainer {
2802cab237bSDimitry Andric using InstToPartitionIdT = DenseMap<Instruction *, int>;
281ff0cc061SDimitry Andric
282ff0cc061SDimitry Andric public:
InstPartitionContainer(Loop * L,LoopInfo * LI,DominatorTree * DT)283ff0cc061SDimitry Andric InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
284ff0cc061SDimitry Andric : L(L), LI(LI), DT(DT) {}
285ff0cc061SDimitry Andric
2864ba319b5SDimitry Andric /// Returns the number of partitions.
getSize() const287ff0cc061SDimitry Andric unsigned getSize() const { return PartitionContainer.size(); }
288ff0cc061SDimitry Andric
2894ba319b5SDimitry Andric /// Adds \p Inst into the current partition if that is marked to
290ff0cc061SDimitry Andric /// contain cycles. Otherwise start a new partition for it.
addToCyclicPartition(Instruction * Inst)291ff0cc061SDimitry Andric void addToCyclicPartition(Instruction *Inst) {
292ff0cc061SDimitry Andric // If the current partition is non-cyclic. Start a new one.
293ff0cc061SDimitry Andric if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
294ff0cc061SDimitry Andric PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
295ff0cc061SDimitry Andric else
296ff0cc061SDimitry Andric PartitionContainer.back().add(Inst);
297ff0cc061SDimitry Andric }
298ff0cc061SDimitry Andric
2994ba319b5SDimitry Andric /// Adds \p Inst into a partition that is not marked to contain
300ff0cc061SDimitry Andric /// dependence cycles.
301ff0cc061SDimitry Andric ///
302ff0cc061SDimitry Andric // Initially we isolate memory instructions into as many partitions as
303ff0cc061SDimitry Andric // possible, then later we may merge them back together.
addToNewNonCyclicPartition(Instruction * Inst)304ff0cc061SDimitry Andric void addToNewNonCyclicPartition(Instruction *Inst) {
305ff0cc061SDimitry Andric PartitionContainer.emplace_back(Inst, L);
306ff0cc061SDimitry Andric }
307ff0cc061SDimitry Andric
3084ba319b5SDimitry Andric /// Merges adjacent non-cyclic partitions.
309ff0cc061SDimitry Andric ///
310ff0cc061SDimitry Andric /// The idea is that we currently only want to isolate the non-vectorizable
311ff0cc061SDimitry Andric /// partition. We could later allow more distribution among these partition
312ff0cc061SDimitry Andric /// too.
mergeAdjacentNonCyclic()313ff0cc061SDimitry Andric void mergeAdjacentNonCyclic() {
314ff0cc061SDimitry Andric mergeAdjacentPartitionsIf(
315ff0cc061SDimitry Andric [](const InstPartition *P) { return !P->hasDepCycle(); });
316ff0cc061SDimitry Andric }
317ff0cc061SDimitry Andric
3184ba319b5SDimitry Andric /// If a partition contains only conditional stores, we won't vectorize
319ff0cc061SDimitry Andric /// it. Try to merge it with a previous cyclic partition.
mergeNonIfConvertible()320ff0cc061SDimitry Andric void mergeNonIfConvertible() {
321ff0cc061SDimitry Andric mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
322ff0cc061SDimitry Andric if (Partition->hasDepCycle())
323ff0cc061SDimitry Andric return true;
324ff0cc061SDimitry Andric
325ff0cc061SDimitry Andric // Now, check if all stores are conditional in this partition.
326ff0cc061SDimitry Andric bool seenStore = false;
327ff0cc061SDimitry Andric
328ff0cc061SDimitry Andric for (auto *Inst : *Partition)
329ff0cc061SDimitry Andric if (isa<StoreInst>(Inst)) {
330ff0cc061SDimitry Andric seenStore = true;
331ff0cc061SDimitry Andric if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
332ff0cc061SDimitry Andric return false;
333ff0cc061SDimitry Andric }
334ff0cc061SDimitry Andric return seenStore;
335ff0cc061SDimitry Andric });
336ff0cc061SDimitry Andric }
337ff0cc061SDimitry Andric
3384ba319b5SDimitry Andric /// Merges the partitions according to various heuristics.
mergeBeforePopulating()339ff0cc061SDimitry Andric void mergeBeforePopulating() {
340ff0cc061SDimitry Andric mergeAdjacentNonCyclic();
341ff0cc061SDimitry Andric if (!DistributeNonIfConvertible)
342ff0cc061SDimitry Andric mergeNonIfConvertible();
343ff0cc061SDimitry Andric }
344ff0cc061SDimitry Andric
3454ba319b5SDimitry Andric /// Merges partitions in order to ensure that no loads are duplicated.
346ff0cc061SDimitry Andric ///
347ff0cc061SDimitry Andric /// We can't duplicate loads because that could potentially reorder them.
348ff0cc061SDimitry Andric /// LoopAccessAnalysis provides dependency information with the context that
349ff0cc061SDimitry Andric /// the order of memory operation is preserved.
350ff0cc061SDimitry Andric ///
351ff0cc061SDimitry Andric /// Return if any partitions were merged.
mergeToAvoidDuplicatedLoads()352ff0cc061SDimitry Andric bool mergeToAvoidDuplicatedLoads() {
3532cab237bSDimitry Andric using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
3542cab237bSDimitry Andric using ToBeMergedT = EquivalenceClasses<InstPartition *>;
355ff0cc061SDimitry Andric
356ff0cc061SDimitry Andric LoadToPartitionT LoadToPartition;
357ff0cc061SDimitry Andric ToBeMergedT ToBeMerged;
358ff0cc061SDimitry Andric
359ff0cc061SDimitry Andric // Step through the partitions and create equivalence between partitions
360ff0cc061SDimitry Andric // that contain the same load. Also put partitions in between them in the
361ff0cc061SDimitry Andric // same equivalence class to avoid reordering of memory operations.
362ff0cc061SDimitry Andric for (PartitionContainerT::iterator I = PartitionContainer.begin(),
363ff0cc061SDimitry Andric E = PartitionContainer.end();
364ff0cc061SDimitry Andric I != E; ++I) {
365ff0cc061SDimitry Andric auto *PartI = &*I;
366ff0cc061SDimitry Andric
367ff0cc061SDimitry Andric // If a load occurs in two partitions PartI and PartJ, merge all
368ff0cc061SDimitry Andric // partitions (PartI, PartJ] into PartI.
369ff0cc061SDimitry Andric for (Instruction *Inst : *PartI)
370ff0cc061SDimitry Andric if (isa<LoadInst>(Inst)) {
371ff0cc061SDimitry Andric bool NewElt;
372ff0cc061SDimitry Andric LoadToPartitionT::iterator LoadToPart;
373ff0cc061SDimitry Andric
374ff0cc061SDimitry Andric std::tie(LoadToPart, NewElt) =
375ff0cc061SDimitry Andric LoadToPartition.insert(std::make_pair(Inst, PartI));
376ff0cc061SDimitry Andric if (!NewElt) {
3774ba319b5SDimitry Andric LLVM_DEBUG(dbgs()
3784ba319b5SDimitry Andric << "Merging partitions due to this load in multiple "
3794ba319b5SDimitry Andric << "partitions: " << PartI << ", " << LoadToPart->second
3804ba319b5SDimitry Andric << "\n"
3814ba319b5SDimitry Andric << *Inst << "\n");
382ff0cc061SDimitry Andric
383ff0cc061SDimitry Andric auto PartJ = I;
384ff0cc061SDimitry Andric do {
385ff0cc061SDimitry Andric --PartJ;
386ff0cc061SDimitry Andric ToBeMerged.unionSets(PartI, &*PartJ);
387ff0cc061SDimitry Andric } while (&*PartJ != LoadToPart->second);
388ff0cc061SDimitry Andric }
389ff0cc061SDimitry Andric }
390ff0cc061SDimitry Andric }
391ff0cc061SDimitry Andric if (ToBeMerged.empty())
392ff0cc061SDimitry Andric return false;
393ff0cc061SDimitry Andric
394ff0cc061SDimitry Andric // Merge the member of an equivalence class into its class leader. This
395ff0cc061SDimitry Andric // makes the members empty.
396ff0cc061SDimitry Andric for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
397ff0cc061SDimitry Andric I != E; ++I) {
398ff0cc061SDimitry Andric if (!I->isLeader())
399ff0cc061SDimitry Andric continue;
400ff0cc061SDimitry Andric
401ff0cc061SDimitry Andric auto PartI = I->getData();
402ff0cc061SDimitry Andric for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
403ff0cc061SDimitry Andric ToBeMerged.member_end())) {
404ff0cc061SDimitry Andric PartJ->moveTo(*PartI);
405ff0cc061SDimitry Andric }
406ff0cc061SDimitry Andric }
407ff0cc061SDimitry Andric
408ff0cc061SDimitry Andric // Remove the empty partitions.
409ff0cc061SDimitry Andric PartitionContainer.remove_if(
410ff0cc061SDimitry Andric [](const InstPartition &P) { return P.empty(); });
411ff0cc061SDimitry Andric
412ff0cc061SDimitry Andric return true;
413ff0cc061SDimitry Andric }
414ff0cc061SDimitry Andric
4154ba319b5SDimitry Andric /// Sets up the mapping between instructions to partitions. If the
416ff0cc061SDimitry Andric /// instruction is duplicated across multiple partitions, set the entry to -1.
setupPartitionIdOnInstructions()417ff0cc061SDimitry Andric void setupPartitionIdOnInstructions() {
418ff0cc061SDimitry Andric int PartitionID = 0;
419ff0cc061SDimitry Andric for (const auto &Partition : PartitionContainer) {
420ff0cc061SDimitry Andric for (Instruction *Inst : Partition) {
421ff0cc061SDimitry Andric bool NewElt;
422ff0cc061SDimitry Andric InstToPartitionIdT::iterator Iter;
423ff0cc061SDimitry Andric
424ff0cc061SDimitry Andric std::tie(Iter, NewElt) =
425ff0cc061SDimitry Andric InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
426ff0cc061SDimitry Andric if (!NewElt)
427ff0cc061SDimitry Andric Iter->second = -1;
428ff0cc061SDimitry Andric }
429ff0cc061SDimitry Andric ++PartitionID;
430ff0cc061SDimitry Andric }
431ff0cc061SDimitry Andric }
432ff0cc061SDimitry Andric
4334ba319b5SDimitry Andric /// Populates the partition with everything that the seeding
434ff0cc061SDimitry Andric /// instructions require.
populateUsedSet()435ff0cc061SDimitry Andric void populateUsedSet() {
436ff0cc061SDimitry Andric for (auto &P : PartitionContainer)
437ff0cc061SDimitry Andric P.populateUsedSet();
438ff0cc061SDimitry Andric }
439ff0cc061SDimitry Andric
4404ba319b5SDimitry Andric /// This performs the main chunk of the work of cloning the loops for
441ff0cc061SDimitry Andric /// the partitions.
cloneLoops()4427d523365SDimitry Andric void cloneLoops() {
443ff0cc061SDimitry Andric BasicBlock *OrigPH = L->getLoopPreheader();
444ff0cc061SDimitry Andric // At this point the predecessor of the preheader is either the memcheck
445ff0cc061SDimitry Andric // block or the top part of the original preheader.
446ff0cc061SDimitry Andric BasicBlock *Pred = OrigPH->getSinglePredecessor();
447ff0cc061SDimitry Andric assert(Pred && "Preheader does not have a single predecessor");
448ff0cc061SDimitry Andric BasicBlock *ExitBlock = L->getExitBlock();
449ff0cc061SDimitry Andric assert(ExitBlock && "No single exit block");
450ff0cc061SDimitry Andric Loop *NewLoop;
451ff0cc061SDimitry Andric
452ff0cc061SDimitry Andric assert(!PartitionContainer.empty() && "at least two partitions expected");
453ff0cc061SDimitry Andric // We're cloning the preheader along with the loop so we already made sure
454ff0cc061SDimitry Andric // it was empty.
455ff0cc061SDimitry Andric assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
456ff0cc061SDimitry Andric "preheader not empty");
457ff0cc061SDimitry Andric
458*b5893f02SDimitry Andric // Preserve the original loop ID for use after the transformation.
459*b5893f02SDimitry Andric MDNode *OrigLoopID = L->getLoopID();
460*b5893f02SDimitry Andric
461ff0cc061SDimitry Andric // Create a loop for each partition except the last. Clone the original
462ff0cc061SDimitry Andric // loop before PH along with adding a preheader for the cloned loop. Then
463ff0cc061SDimitry Andric // update PH to point to the newly added preheader.
464ff0cc061SDimitry Andric BasicBlock *TopPH = OrigPH;
465ff0cc061SDimitry Andric unsigned Index = getSize() - 1;
466ff0cc061SDimitry Andric for (auto I = std::next(PartitionContainer.rbegin()),
467ff0cc061SDimitry Andric E = PartitionContainer.rend();
468ff0cc061SDimitry Andric I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
469ff0cc061SDimitry Andric auto *Part = &*I;
470ff0cc061SDimitry Andric
471ff0cc061SDimitry Andric NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
472ff0cc061SDimitry Andric
473ff0cc061SDimitry Andric Part->getVMap()[ExitBlock] = TopPH;
474ff0cc061SDimitry Andric Part->remapInstructions();
475*b5893f02SDimitry Andric setNewLoopID(OrigLoopID, Part);
476ff0cc061SDimitry Andric }
477ff0cc061SDimitry Andric Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
478ff0cc061SDimitry Andric
479*b5893f02SDimitry Andric // Also set a new loop ID for the last loop.
480*b5893f02SDimitry Andric setNewLoopID(OrigLoopID, &PartitionContainer.back());
481*b5893f02SDimitry Andric
482ff0cc061SDimitry Andric // Now go in forward order and update the immediate dominator for the
483ff0cc061SDimitry Andric // preheaders with the exiting block of the previous loop. Dominance
484ff0cc061SDimitry Andric // within the loop is updated in cloneLoopWithPreheader.
485ff0cc061SDimitry Andric for (auto Curr = PartitionContainer.cbegin(),
486ff0cc061SDimitry Andric Next = std::next(PartitionContainer.cbegin()),
487ff0cc061SDimitry Andric E = PartitionContainer.cend();
488ff0cc061SDimitry Andric Next != E; ++Curr, ++Next)
489ff0cc061SDimitry Andric DT->changeImmediateDominator(
490ff0cc061SDimitry Andric Next->getDistributedLoop()->getLoopPreheader(),
491ff0cc061SDimitry Andric Curr->getDistributedLoop()->getExitingBlock());
492ff0cc061SDimitry Andric }
493ff0cc061SDimitry Andric
4944ba319b5SDimitry Andric /// Removes the dead instructions from the cloned loops.
removeUnusedInsts()495ff0cc061SDimitry Andric void removeUnusedInsts() {
496ff0cc061SDimitry Andric for (auto &Partition : PartitionContainer)
497ff0cc061SDimitry Andric Partition.removeUnusedInsts();
498ff0cc061SDimitry Andric }
499ff0cc061SDimitry Andric
5004ba319b5SDimitry Andric /// For each memory pointer, it computes the partitionId the pointer is
501ff0cc061SDimitry Andric /// used in.
502ff0cc061SDimitry Andric ///
503ff0cc061SDimitry Andric /// This returns an array of int where the I-th entry corresponds to I-th
504ff0cc061SDimitry Andric /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
505ff0cc061SDimitry Andric /// partitions its entry is set to -1.
506ff0cc061SDimitry Andric SmallVector<int, 8>
computePartitionSetForPointers(const LoopAccessInfo & LAI)507ff0cc061SDimitry Andric computePartitionSetForPointers(const LoopAccessInfo &LAI) {
508875ed548SDimitry Andric const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
509ff0cc061SDimitry Andric
510ff0cc061SDimitry Andric unsigned N = RtPtrCheck->Pointers.size();
511ff0cc061SDimitry Andric SmallVector<int, 8> PtrToPartitions(N);
512ff0cc061SDimitry Andric for (unsigned I = 0; I < N; ++I) {
513875ed548SDimitry Andric Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
514ff0cc061SDimitry Andric auto Instructions =
515875ed548SDimitry Andric LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
516ff0cc061SDimitry Andric
517ff0cc061SDimitry Andric int &Partition = PtrToPartitions[I];
518ff0cc061SDimitry Andric // First set it to uninitialized.
519ff0cc061SDimitry Andric Partition = -2;
520ff0cc061SDimitry Andric for (Instruction *Inst : Instructions) {
521ff0cc061SDimitry Andric // Note that this could be -1 if Inst is duplicated across multiple
522ff0cc061SDimitry Andric // partitions.
523ff0cc061SDimitry Andric int ThisPartition = this->InstToPartitionId[Inst];
524ff0cc061SDimitry Andric if (Partition == -2)
525ff0cc061SDimitry Andric Partition = ThisPartition;
526ff0cc061SDimitry Andric // -1 means belonging to multiple partitions.
527ff0cc061SDimitry Andric else if (Partition == -1)
528ff0cc061SDimitry Andric break;
529ff0cc061SDimitry Andric else if (Partition != (int)ThisPartition)
530ff0cc061SDimitry Andric Partition = -1;
531ff0cc061SDimitry Andric }
532ff0cc061SDimitry Andric assert(Partition != -2 && "Pointer not belonging to any partition");
533ff0cc061SDimitry Andric }
534ff0cc061SDimitry Andric
535ff0cc061SDimitry Andric return PtrToPartitions;
536ff0cc061SDimitry Andric }
537ff0cc061SDimitry Andric
print(raw_ostream & OS) const538ff0cc061SDimitry Andric void print(raw_ostream &OS) const {
539ff0cc061SDimitry Andric unsigned Index = 0;
540ff0cc061SDimitry Andric for (const auto &P : PartitionContainer) {
541ff0cc061SDimitry Andric OS << "Partition " << Index++ << " (" << &P << "):\n";
542ff0cc061SDimitry Andric P.print();
543ff0cc061SDimitry Andric }
544ff0cc061SDimitry Andric }
545ff0cc061SDimitry Andric
dump() const546ff0cc061SDimitry Andric void dump() const { print(dbgs()); }
547ff0cc061SDimitry Andric
548ff0cc061SDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & OS,const InstPartitionContainer & Partitions)549ff0cc061SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS,
550ff0cc061SDimitry Andric const InstPartitionContainer &Partitions) {
551ff0cc061SDimitry Andric Partitions.print(OS);
552ff0cc061SDimitry Andric return OS;
553ff0cc061SDimitry Andric }
554ff0cc061SDimitry Andric #endif
555ff0cc061SDimitry Andric
printBlocks() const556ff0cc061SDimitry Andric void printBlocks() const {
557ff0cc061SDimitry Andric unsigned Index = 0;
558ff0cc061SDimitry Andric for (const auto &P : PartitionContainer) {
559ff0cc061SDimitry Andric dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
560ff0cc061SDimitry Andric P.printBlocks();
561ff0cc061SDimitry Andric }
562ff0cc061SDimitry Andric }
563ff0cc061SDimitry Andric
564ff0cc061SDimitry Andric private:
5652cab237bSDimitry Andric using PartitionContainerT = std::list<InstPartition>;
566ff0cc061SDimitry Andric
5674ba319b5SDimitry Andric /// List of partitions.
568ff0cc061SDimitry Andric PartitionContainerT PartitionContainer;
569ff0cc061SDimitry Andric
5704ba319b5SDimitry Andric /// Mapping from Instruction to partition Id. If the instruction
571ff0cc061SDimitry Andric /// belongs to multiple partitions the entry contains -1.
572ff0cc061SDimitry Andric InstToPartitionIdT InstToPartitionId;
573ff0cc061SDimitry Andric
574ff0cc061SDimitry Andric Loop *L;
575ff0cc061SDimitry Andric LoopInfo *LI;
576ff0cc061SDimitry Andric DominatorTree *DT;
577ff0cc061SDimitry Andric
5784ba319b5SDimitry Andric /// The control structure to merge adjacent partitions if both satisfy
579ff0cc061SDimitry Andric /// the \p Predicate.
580ff0cc061SDimitry Andric template <class UnaryPredicate>
mergeAdjacentPartitionsIf(UnaryPredicate Predicate)581ff0cc061SDimitry Andric void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
582ff0cc061SDimitry Andric InstPartition *PrevMatch = nullptr;
583ff0cc061SDimitry Andric for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
584ff0cc061SDimitry Andric auto DoesMatch = Predicate(&*I);
585ff0cc061SDimitry Andric if (PrevMatch == nullptr && DoesMatch) {
586ff0cc061SDimitry Andric PrevMatch = &*I;
587ff0cc061SDimitry Andric ++I;
588ff0cc061SDimitry Andric } else if (PrevMatch != nullptr && DoesMatch) {
589ff0cc061SDimitry Andric I->moveTo(*PrevMatch);
590ff0cc061SDimitry Andric I = PartitionContainer.erase(I);
591ff0cc061SDimitry Andric } else {
592ff0cc061SDimitry Andric PrevMatch = nullptr;
593ff0cc061SDimitry Andric ++I;
594ff0cc061SDimitry Andric }
595ff0cc061SDimitry Andric }
596ff0cc061SDimitry Andric }
597*b5893f02SDimitry Andric
598*b5893f02SDimitry Andric /// Assign new LoopIDs for the partition's cloned loop.
setNewLoopID(MDNode * OrigLoopID,InstPartition * Part)599*b5893f02SDimitry Andric void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
600*b5893f02SDimitry Andric Optional<MDNode *> PartitionID = makeFollowupLoopID(
601*b5893f02SDimitry Andric OrigLoopID,
602*b5893f02SDimitry Andric {LLVMLoopDistributeFollowupAll,
603*b5893f02SDimitry Andric Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
604*b5893f02SDimitry Andric : LLVMLoopDistributeFollowupCoincident});
605*b5893f02SDimitry Andric if (PartitionID.hasValue()) {
606*b5893f02SDimitry Andric Loop *NewLoop = Part->getDistributedLoop();
607*b5893f02SDimitry Andric NewLoop->setLoopID(PartitionID.getValue());
608*b5893f02SDimitry Andric }
609*b5893f02SDimitry Andric }
610ff0cc061SDimitry Andric };
611ff0cc061SDimitry Andric
6124ba319b5SDimitry Andric /// For each memory instruction, this class maintains difference of the
613ff0cc061SDimitry Andric /// number of unsafe dependences that start out from this instruction minus
614ff0cc061SDimitry Andric /// those that end here.
615ff0cc061SDimitry Andric ///
616ff0cc061SDimitry Andric /// By traversing the memory instructions in program order and accumulating this
617ff0cc061SDimitry Andric /// number, we know whether any unsafe dependence crosses over a program point.
618ff0cc061SDimitry Andric class MemoryInstructionDependences {
6192cab237bSDimitry Andric using Dependence = MemoryDepChecker::Dependence;
620ff0cc061SDimitry Andric
621ff0cc061SDimitry Andric public:
622ff0cc061SDimitry Andric struct Entry {
623ff0cc061SDimitry Andric Instruction *Inst;
6242cab237bSDimitry Andric unsigned NumUnsafeDependencesStartOrEnd = 0;
625ff0cc061SDimitry Andric
Entry__anonfcfe86310111::MemoryInstructionDependences::Entry6262cab237bSDimitry Andric Entry(Instruction *Inst) : Inst(Inst) {}
627ff0cc061SDimitry Andric };
628ff0cc061SDimitry Andric
6292cab237bSDimitry Andric using AccessesType = SmallVector<Entry, 8>;
630ff0cc061SDimitry Andric
begin() const631ff0cc061SDimitry Andric AccessesType::const_iterator begin() const { return Accesses.begin(); }
end() const632ff0cc061SDimitry Andric AccessesType::const_iterator end() const { return Accesses.end(); }
633ff0cc061SDimitry Andric
MemoryInstructionDependences(const SmallVectorImpl<Instruction * > & Instructions,const SmallVectorImpl<Dependence> & Dependences)634ff0cc061SDimitry Andric MemoryInstructionDependences(
635ff0cc061SDimitry Andric const SmallVectorImpl<Instruction *> &Instructions,
6367d523365SDimitry Andric const SmallVectorImpl<Dependence> &Dependences) {
637ff0cc061SDimitry Andric Accesses.append(Instructions.begin(), Instructions.end());
638ff0cc061SDimitry Andric
6394ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Backward dependences:\n");
6407d523365SDimitry Andric for (auto &Dep : Dependences)
641ff0cc061SDimitry Andric if (Dep.isPossiblyBackward()) {
642ff0cc061SDimitry Andric // Note that the designations source and destination follow the program
643ff0cc061SDimitry Andric // order, i.e. source is always first. (The direction is given by the
644ff0cc061SDimitry Andric // DepType.)
645ff0cc061SDimitry Andric ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
646ff0cc061SDimitry Andric --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
647ff0cc061SDimitry Andric
6484ba319b5SDimitry Andric LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
649ff0cc061SDimitry Andric }
650ff0cc061SDimitry Andric }
651ff0cc061SDimitry Andric
652ff0cc061SDimitry Andric private:
653ff0cc061SDimitry Andric AccessesType Accesses;
654ff0cc061SDimitry Andric };
655ff0cc061SDimitry Andric
6564ba319b5SDimitry Andric /// The actual class performing the per-loop work.
6573ca95b02SDimitry Andric class LoopDistributeForLoop {
658ff0cc061SDimitry Andric public:
LoopDistributeForLoop(Loop * L,Function * F,LoopInfo * LI,DominatorTree * DT,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)6593ca95b02SDimitry Andric LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
6603ca95b02SDimitry Andric ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
6612cab237bSDimitry Andric : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
6623ca95b02SDimitry Andric setForced();
663ff0cc061SDimitry Andric }
664ff0cc061SDimitry Andric
6654ba319b5SDimitry Andric /// Try to distribute an inner-most loop.
processLoop(std::function<const LoopAccessInfo & (Loop &)> & GetLAA)6663ca95b02SDimitry Andric bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
6673ca95b02SDimitry Andric assert(L->empty() && "Only process inner loops.");
668ff0cc061SDimitry Andric
6694ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nLDist: In \""
6704ba319b5SDimitry Andric << L->getHeader()->getParent()->getName()
6713ca95b02SDimitry Andric << "\" checking " << *L << "\n");
672ff0cc061SDimitry Andric
6733ca95b02SDimitry Andric if (!L->getExitBlock())
674d88c1a5aSDimitry Andric return fail("MultipleExitBlocks", "multiple exit blocks");
675d88c1a5aSDimitry Andric if (!L->isLoopSimplifyForm())
676d88c1a5aSDimitry Andric return fail("NotLoopSimplifyForm",
677d88c1a5aSDimitry Andric "loop is not in loop-simplify form");
678d88c1a5aSDimitry Andric
679d88c1a5aSDimitry Andric BasicBlock *PH = L->getLoopPreheader();
680ff0cc061SDimitry Andric
6813ca95b02SDimitry Andric // LAA will check that we only have a single exiting block.
6823ca95b02SDimitry Andric LAI = &GetLAA(*L);
683ff0cc061SDimitry Andric
6843ca95b02SDimitry Andric // Currently, we only distribute to isolate the part of the loop with
6853ca95b02SDimitry Andric // dependence cycles to enable partial vectorization.
6863ca95b02SDimitry Andric if (LAI->canVectorizeMemory())
687d88c1a5aSDimitry Andric return fail("MemOpsCanBeVectorized",
688d88c1a5aSDimitry Andric "memory operations are safe for vectorization");
6893ca95b02SDimitry Andric
6903ca95b02SDimitry Andric auto *Dependences = LAI->getDepChecker().getDependences();
6913ca95b02SDimitry Andric if (!Dependences || Dependences->empty())
692d88c1a5aSDimitry Andric return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
6933ca95b02SDimitry Andric
6943ca95b02SDimitry Andric InstPartitionContainer Partitions(L, LI, DT);
6953ca95b02SDimitry Andric
6963ca95b02SDimitry Andric // First, go through each memory operation and assign them to consecutive
6973ca95b02SDimitry Andric // partitions (the order of partitions follows program order). Put those
6983ca95b02SDimitry Andric // with unsafe dependences into "cyclic" partition otherwise put each store
6993ca95b02SDimitry Andric // in its own "non-cyclic" partition (we'll merge these later).
7003ca95b02SDimitry Andric //
7013ca95b02SDimitry Andric // Note that a memory operation (e.g. Load2 below) at a program point that
7023ca95b02SDimitry Andric // has an unsafe dependence (Store3->Load1) spanning over it must be
7033ca95b02SDimitry Andric // included in the same cyclic partition as the dependent operations. This
7043ca95b02SDimitry Andric // is to preserve the original program order after distribution. E.g.:
7053ca95b02SDimitry Andric //
7063ca95b02SDimitry Andric // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
7073ca95b02SDimitry Andric // Load1 -. 1 0->1
7083ca95b02SDimitry Andric // Load2 | /Unsafe/ 0 1
7093ca95b02SDimitry Andric // Store3 -' -1 1->0
7103ca95b02SDimitry Andric // Load4 0 0
7113ca95b02SDimitry Andric //
7123ca95b02SDimitry Andric // NumUnsafeDependencesActive > 0 indicates this situation and in this case
7133ca95b02SDimitry Andric // we just keep assigning to the same cyclic partition until
7143ca95b02SDimitry Andric // NumUnsafeDependencesActive reaches 0.
7153ca95b02SDimitry Andric const MemoryDepChecker &DepChecker = LAI->getDepChecker();
7163ca95b02SDimitry Andric MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
7173ca95b02SDimitry Andric *Dependences);
7183ca95b02SDimitry Andric
7193ca95b02SDimitry Andric int NumUnsafeDependencesActive = 0;
7203ca95b02SDimitry Andric for (auto &InstDep : MID) {
7213ca95b02SDimitry Andric Instruction *I = InstDep.Inst;
7223ca95b02SDimitry Andric // We update NumUnsafeDependencesActive post-instruction, catch the
7233ca95b02SDimitry Andric // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
7243ca95b02SDimitry Andric if (NumUnsafeDependencesActive ||
7253ca95b02SDimitry Andric InstDep.NumUnsafeDependencesStartOrEnd > 0)
7263ca95b02SDimitry Andric Partitions.addToCyclicPartition(I);
7273ca95b02SDimitry Andric else
7283ca95b02SDimitry Andric Partitions.addToNewNonCyclicPartition(I);
7293ca95b02SDimitry Andric NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
7303ca95b02SDimitry Andric assert(NumUnsafeDependencesActive >= 0 &&
7313ca95b02SDimitry Andric "Negative number of dependences active");
732ff0cc061SDimitry Andric }
733ff0cc061SDimitry Andric
7343ca95b02SDimitry Andric // Add partitions for values used outside. These partitions can be out of
7353ca95b02SDimitry Andric // order from the original program order. This is OK because if the
7363ca95b02SDimitry Andric // partition uses a load we will merge this partition with the original
7373ca95b02SDimitry Andric // partition of the load that we set up in the previous loop (see
7383ca95b02SDimitry Andric // mergeToAvoidDuplicatedLoads).
7393ca95b02SDimitry Andric auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
7403ca95b02SDimitry Andric for (auto *Inst : DefsUsedOutside)
7413ca95b02SDimitry Andric Partitions.addToNewNonCyclicPartition(Inst);
7423ca95b02SDimitry Andric
7434ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
7443ca95b02SDimitry Andric if (Partitions.getSize() < 2)
745d88c1a5aSDimitry Andric return fail("CantIsolateUnsafeDeps",
746d88c1a5aSDimitry Andric "cannot isolate unsafe dependencies");
7473ca95b02SDimitry Andric
7483ca95b02SDimitry Andric // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
7493ca95b02SDimitry Andric // should be able to vectorize these together.
7503ca95b02SDimitry Andric Partitions.mergeBeforePopulating();
7514ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
7523ca95b02SDimitry Andric if (Partitions.getSize() < 2)
753d88c1a5aSDimitry Andric return fail("CantIsolateUnsafeDeps",
754d88c1a5aSDimitry Andric "cannot isolate unsafe dependencies");
7553ca95b02SDimitry Andric
7563ca95b02SDimitry Andric // Now, populate the partitions with non-memory operations.
7573ca95b02SDimitry Andric Partitions.populateUsedSet();
7584ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
7593ca95b02SDimitry Andric
7603ca95b02SDimitry Andric // In order to preserve original lexical order for loads, keep them in the
7613ca95b02SDimitry Andric // partition that we set up in the MemoryInstructionDependences loop.
7623ca95b02SDimitry Andric if (Partitions.mergeToAvoidDuplicatedLoads()) {
7634ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
7643ca95b02SDimitry Andric << Partitions);
7653ca95b02SDimitry Andric if (Partitions.getSize() < 2)
766d88c1a5aSDimitry Andric return fail("CantIsolateUnsafeDeps",
767d88c1a5aSDimitry Andric "cannot isolate unsafe dependencies");
768ff0cc061SDimitry Andric }
769ff0cc061SDimitry Andric
7703ca95b02SDimitry Andric // Don't distribute the loop if we need too many SCEV run-time checks.
7713ca95b02SDimitry Andric const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
7723ca95b02SDimitry Andric if (Pred.getComplexity() > (IsForced.getValueOr(false)
7733ca95b02SDimitry Andric ? PragmaDistributeSCEVCheckThreshold
7743ca95b02SDimitry Andric : DistributeSCEVCheckThreshold))
775d88c1a5aSDimitry Andric return fail("TooManySCEVRuntimeChecks",
776d88c1a5aSDimitry Andric "too many SCEV run-time checks needed.\n");
7773ca95b02SDimitry Andric
778*b5893f02SDimitry Andric if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
779*b5893f02SDimitry Andric return fail("HeuristicDisabled", "distribution heuristic disabled");
780*b5893f02SDimitry Andric
7814ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
7823ca95b02SDimitry Andric // We're done forming the partitions set up the reverse mapping from
7833ca95b02SDimitry Andric // instructions to partitions.
7843ca95b02SDimitry Andric Partitions.setupPartitionIdOnInstructions();
7853ca95b02SDimitry Andric
7863ca95b02SDimitry Andric // To keep things simple have an empty preheader before we version or clone
7873ca95b02SDimitry Andric // the loop. (Also split if this has no predecessor, i.e. entry, because we
7883ca95b02SDimitry Andric // rely on PH having a predecessor.)
7893ca95b02SDimitry Andric if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
7903ca95b02SDimitry Andric SplitBlock(PH, PH->getTerminator(), DT, LI);
7913ca95b02SDimitry Andric
7923ca95b02SDimitry Andric // If we need run-time checks, version the loop now.
7933ca95b02SDimitry Andric auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
7943ca95b02SDimitry Andric const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
7953ca95b02SDimitry Andric const auto &AllChecks = RtPtrChecking->getChecks();
7963ca95b02SDimitry Andric auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
7973ca95b02SDimitry Andric RtPtrChecking);
7983ca95b02SDimitry Andric
7993ca95b02SDimitry Andric if (!Pred.isAlwaysTrue() || !Checks.empty()) {
800*b5893f02SDimitry Andric MDNode *OrigLoopID = L->getLoopID();
801*b5893f02SDimitry Andric
8024ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nPointers:\n");
8034ba319b5SDimitry Andric LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
8043ca95b02SDimitry Andric LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
8053ca95b02SDimitry Andric LVer.setAliasChecks(std::move(Checks));
8063ca95b02SDimitry Andric LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
8073ca95b02SDimitry Andric LVer.versionLoop(DefsUsedOutside);
8083ca95b02SDimitry Andric LVer.annotateLoopWithNoAlias();
809*b5893f02SDimitry Andric
810*b5893f02SDimitry Andric // The unversioned loop will not be changed, so we inherit all attributes
811*b5893f02SDimitry Andric // from the original loop, but remove the loop distribution metadata to
812*b5893f02SDimitry Andric // avoid to distribute it again.
813*b5893f02SDimitry Andric MDNode *UnversionedLoopID =
814*b5893f02SDimitry Andric makeFollowupLoopID(OrigLoopID,
815*b5893f02SDimitry Andric {LLVMLoopDistributeFollowupAll,
816*b5893f02SDimitry Andric LLVMLoopDistributeFollowupFallback},
817*b5893f02SDimitry Andric "llvm.loop.distribute.", true)
818*b5893f02SDimitry Andric .getValue();
819*b5893f02SDimitry Andric LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
8203ca95b02SDimitry Andric }
8213ca95b02SDimitry Andric
8223ca95b02SDimitry Andric // Create identical copies of the original loop for each partition and hook
8233ca95b02SDimitry Andric // them up sequentially.
8243ca95b02SDimitry Andric Partitions.cloneLoops();
8253ca95b02SDimitry Andric
8263ca95b02SDimitry Andric // Now, we remove the instruction from each loop that don't belong to that
8273ca95b02SDimitry Andric // partition.
8283ca95b02SDimitry Andric Partitions.removeUnusedInsts();
8294ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
8304ba319b5SDimitry Andric LLVM_DEBUG(Partitions.printBlocks());
8313ca95b02SDimitry Andric
8323ca95b02SDimitry Andric if (LDistVerify) {
833d88c1a5aSDimitry Andric LI->verify(*DT);
8344ba319b5SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Fast));
8353ca95b02SDimitry Andric }
8363ca95b02SDimitry Andric
8373ca95b02SDimitry Andric ++NumLoopsDistributed;
8383ca95b02SDimitry Andric // Report the success.
8392cab237bSDimitry Andric ORE->emit([&]() {
8402cab237bSDimitry Andric return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
841d88c1a5aSDimitry Andric L->getHeader())
8422cab237bSDimitry Andric << "distributed loop";
8432cab237bSDimitry Andric });
8443ca95b02SDimitry Andric return true;
8453ca95b02SDimitry Andric }
8463ca95b02SDimitry Andric
8474ba319b5SDimitry Andric /// Provide diagnostics then \return with false.
fail(StringRef RemarkName,StringRef Message)848d88c1a5aSDimitry Andric bool fail(StringRef RemarkName, StringRef Message) {
8493ca95b02SDimitry Andric LLVMContext &Ctx = F->getContext();
8503ca95b02SDimitry Andric bool Forced = isForced().getValueOr(false);
8513ca95b02SDimitry Andric
8524ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
8533ca95b02SDimitry Andric
8543ca95b02SDimitry Andric // With Rpass-missed report that distribution failed.
8552cab237bSDimitry Andric ORE->emit([&]() {
8562cab237bSDimitry Andric return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
8572cab237bSDimitry Andric L->getStartLoc(), L->getHeader())
8582cab237bSDimitry Andric << "loop not distributed: use -Rpass-analysis=loop-distribute for "
8592cab237bSDimitry Andric "more "
8602cab237bSDimitry Andric "info";
8612cab237bSDimitry Andric });
8623ca95b02SDimitry Andric
8633ca95b02SDimitry Andric // With Rpass-analysis report why. This is on by default if distribution
8643ca95b02SDimitry Andric // was requested explicitly.
865d88c1a5aSDimitry Andric ORE->emit(OptimizationRemarkAnalysis(
866d88c1a5aSDimitry Andric Forced ? OptimizationRemarkAnalysis::AlwaysPrint : LDIST_NAME,
867d88c1a5aSDimitry Andric RemarkName, L->getStartLoc(), L->getHeader())
868d88c1a5aSDimitry Andric << "loop not distributed: " << Message);
8693ca95b02SDimitry Andric
8703ca95b02SDimitry Andric // Also issue a warning if distribution was requested explicitly but it
8713ca95b02SDimitry Andric // failed.
8723ca95b02SDimitry Andric if (Forced)
8733ca95b02SDimitry Andric Ctx.diagnose(DiagnosticInfoOptimizationFailure(
8743ca95b02SDimitry Andric *F, L->getStartLoc(), "loop not distributed: failed "
8753ca95b02SDimitry Andric "explicitly specified loop distribution"));
8763ca95b02SDimitry Andric
8773ca95b02SDimitry Andric return false;
8783ca95b02SDimitry Andric }
8793ca95b02SDimitry Andric
8804ba319b5SDimitry Andric /// Return if distribution forced to be enabled/disabled for the loop.
8813ca95b02SDimitry Andric ///
8823ca95b02SDimitry Andric /// If the optional has a value, it indicates whether distribution was forced
8833ca95b02SDimitry Andric /// to be enabled (true) or disabled (false). If the optional has no value
8843ca95b02SDimitry Andric /// distribution was not forced either way.
isForced() const8853ca95b02SDimitry Andric const Optional<bool> &isForced() const { return IsForced; }
886ff0cc061SDimitry Andric
887ff0cc061SDimitry Andric private:
8884ba319b5SDimitry Andric /// Filter out checks between pointers from the same partition.
8897d523365SDimitry Andric ///
8907d523365SDimitry Andric /// \p PtrToPartition contains the partition number for pointers. Partition
8917d523365SDimitry Andric /// number -1 means that the pointer is used in multiple partitions. In this
8927d523365SDimitry Andric /// case we can't safely omit the check.
8937d523365SDimitry Andric SmallVector<RuntimePointerChecking::PointerCheck, 4>
includeOnlyCrossPartitionChecks(const SmallVectorImpl<RuntimePointerChecking::PointerCheck> & AllChecks,const SmallVectorImpl<int> & PtrToPartition,const RuntimePointerChecking * RtPtrChecking)8947d523365SDimitry Andric includeOnlyCrossPartitionChecks(
8957d523365SDimitry Andric const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
8967d523365SDimitry Andric const SmallVectorImpl<int> &PtrToPartition,
8977d523365SDimitry Andric const RuntimePointerChecking *RtPtrChecking) {
8987d523365SDimitry Andric SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
8997d523365SDimitry Andric
9007a7e6055SDimitry Andric copy_if(AllChecks, std::back_inserter(Checks),
9017d523365SDimitry Andric [&](const RuntimePointerChecking::PointerCheck &Check) {
9027d523365SDimitry Andric for (unsigned PtrIdx1 : Check.first->Members)
9037d523365SDimitry Andric for (unsigned PtrIdx2 : Check.second->Members)
9047d523365SDimitry Andric // Only include this check if there is a pair of pointers
9057d523365SDimitry Andric // that require checking and the pointers fall into
9067d523365SDimitry Andric // separate partitions.
9077d523365SDimitry Andric //
9087d523365SDimitry Andric // (Note that we already know at this point that the two
9097d523365SDimitry Andric // pointer groups need checking but it doesn't follow
9107d523365SDimitry Andric // that each pair of pointers within the two groups need
9117d523365SDimitry Andric // checking as well.
9127d523365SDimitry Andric //
9137d523365SDimitry Andric // In other words we don't want to include a check just
9147d523365SDimitry Andric // because there is a pair of pointers between the two
9157d523365SDimitry Andric // pointer groups that require checks and a different
9167d523365SDimitry Andric // pair whose pointers fall into different partitions.)
9177d523365SDimitry Andric if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
9187d523365SDimitry Andric !RuntimePointerChecking::arePointersInSamePartition(
9197d523365SDimitry Andric PtrToPartition, PtrIdx1, PtrIdx2))
9207d523365SDimitry Andric return true;
9217d523365SDimitry Andric return false;
9227d523365SDimitry Andric });
9237d523365SDimitry Andric
9247d523365SDimitry Andric return Checks;
9257d523365SDimitry Andric }
9267d523365SDimitry Andric
9274ba319b5SDimitry Andric /// Check whether the loop metadata is forcing distribution to be
9283ca95b02SDimitry Andric /// enabled/disabled.
setForced()9293ca95b02SDimitry Andric void setForced() {
9303ca95b02SDimitry Andric Optional<const MDOperand *> Value =
9313ca95b02SDimitry Andric findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
9323ca95b02SDimitry Andric if (!Value)
9333ca95b02SDimitry Andric return;
934ff0cc061SDimitry Andric
9353ca95b02SDimitry Andric const MDOperand *Op = *Value;
9363ca95b02SDimitry Andric assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
9373ca95b02SDimitry Andric IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
938ff0cc061SDimitry Andric }
939ff0cc061SDimitry Andric
9403ca95b02SDimitry Andric Loop *L;
9413ca95b02SDimitry Andric Function *F;
942ff0cc061SDimitry Andric
943ff0cc061SDimitry Andric // Analyses used.
944ff0cc061SDimitry Andric LoopInfo *LI;
9452cab237bSDimitry Andric const LoopAccessInfo *LAI = nullptr;
946ff0cc061SDimitry Andric DominatorTree *DT;
9477d523365SDimitry Andric ScalarEvolution *SE;
9483ca95b02SDimitry Andric OptimizationRemarkEmitter *ORE;
9493ca95b02SDimitry Andric
9504ba319b5SDimitry Andric /// Indicates whether distribution is forced to be enabled/disabled for
9513ca95b02SDimitry Andric /// the loop.
9523ca95b02SDimitry Andric ///
9533ca95b02SDimitry Andric /// If the optional has a value, it indicates whether distribution was forced
9543ca95b02SDimitry Andric /// to be enabled (true) or disabled (false). If the optional has no value
9553ca95b02SDimitry Andric /// distribution was not forced either way.
9563ca95b02SDimitry Andric Optional<bool> IsForced;
9573ca95b02SDimitry Andric };
9583ca95b02SDimitry Andric
9592cab237bSDimitry Andric } // end anonymous namespace
9602cab237bSDimitry Andric
9613ca95b02SDimitry Andric /// Shared implementation between new and old PMs.
runImpl(Function & F,LoopInfo * LI,DominatorTree * DT,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE,std::function<const LoopAccessInfo & (Loop &)> & GetLAA)9623ca95b02SDimitry Andric static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
9633ca95b02SDimitry Andric ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
964d88c1a5aSDimitry Andric std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
9653ca95b02SDimitry Andric // Build up a worklist of inner-loops to vectorize. This is necessary as the
9663ca95b02SDimitry Andric // act of distributing a loop creates new loops and can invalidate iterators
9673ca95b02SDimitry Andric // across the loops.
9683ca95b02SDimitry Andric SmallVector<Loop *, 8> Worklist;
9693ca95b02SDimitry Andric
9703ca95b02SDimitry Andric for (Loop *TopLevelLoop : *LI)
9713ca95b02SDimitry Andric for (Loop *L : depth_first(TopLevelLoop))
9723ca95b02SDimitry Andric // We only handle inner-most loops.
9733ca95b02SDimitry Andric if (L->empty())
9743ca95b02SDimitry Andric Worklist.push_back(L);
9753ca95b02SDimitry Andric
9763ca95b02SDimitry Andric // Now walk the identified inner loops.
9773ca95b02SDimitry Andric bool Changed = false;
9783ca95b02SDimitry Andric for (Loop *L : Worklist) {
9793ca95b02SDimitry Andric LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
9803ca95b02SDimitry Andric
9813ca95b02SDimitry Andric // If distribution was forced for the specific loop to be
9823ca95b02SDimitry Andric // enabled/disabled, follow that. Otherwise use the global flag.
983d88c1a5aSDimitry Andric if (LDL.isForced().getValueOr(EnableLoopDistribute))
9843ca95b02SDimitry Andric Changed |= LDL.processLoop(GetLAA);
9853ca95b02SDimitry Andric }
9863ca95b02SDimitry Andric
9873ca95b02SDimitry Andric // Process each loop nest in the function.
9883ca95b02SDimitry Andric return Changed;
9893ca95b02SDimitry Andric }
9903ca95b02SDimitry Andric
9912cab237bSDimitry Andric namespace {
9922cab237bSDimitry Andric
9934ba319b5SDimitry Andric /// The pass class.
9943ca95b02SDimitry Andric class LoopDistributeLegacy : public FunctionPass {
9953ca95b02SDimitry Andric public:
9962cab237bSDimitry Andric static char ID;
9972cab237bSDimitry Andric
LoopDistributeLegacy()998d88c1a5aSDimitry Andric LoopDistributeLegacy() : FunctionPass(ID) {
9993ca95b02SDimitry Andric // The default is set by the caller.
10003ca95b02SDimitry Andric initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
10013ca95b02SDimitry Andric }
10023ca95b02SDimitry Andric
runOnFunction(Function & F)10033ca95b02SDimitry Andric bool runOnFunction(Function &F) override {
10043ca95b02SDimitry Andric if (skipFunction(F))
10053ca95b02SDimitry Andric return false;
10063ca95b02SDimitry Andric
10073ca95b02SDimitry Andric auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
10083ca95b02SDimitry Andric auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
10093ca95b02SDimitry Andric auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
10103ca95b02SDimitry Andric auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
10113ca95b02SDimitry Andric auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
10123ca95b02SDimitry Andric std::function<const LoopAccessInfo &(Loop &)> GetLAA =
10133ca95b02SDimitry Andric [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
10143ca95b02SDimitry Andric
1015d88c1a5aSDimitry Andric return runImpl(F, LI, DT, SE, ORE, GetLAA);
10163ca95b02SDimitry Andric }
10173ca95b02SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const10183ca95b02SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
10193ca95b02SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
10203ca95b02SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
10213ca95b02SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
10223ca95b02SDimitry Andric AU.addRequired<LoopAccessLegacyAnalysis>();
10233ca95b02SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
10243ca95b02SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
10253ca95b02SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1026d88c1a5aSDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>();
10273ca95b02SDimitry Andric }
1028ff0cc061SDimitry Andric };
10292cab237bSDimitry Andric
10302cab237bSDimitry Andric } // end anonymous namespace
1031ff0cc061SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)10323ca95b02SDimitry Andric PreservedAnalyses LoopDistributePass::run(Function &F,
10333ca95b02SDimitry Andric FunctionAnalysisManager &AM) {
10343ca95b02SDimitry Andric auto &LI = AM.getResult<LoopAnalysis>(F);
10353ca95b02SDimitry Andric auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
10363ca95b02SDimitry Andric auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
10373ca95b02SDimitry Andric auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
10383ca95b02SDimitry Andric
1039f1a29dd3SDimitry Andric // We don't directly need these analyses but they're required for loop
1040f1a29dd3SDimitry Andric // analyses so provide them below.
1041f1a29dd3SDimitry Andric auto &AA = AM.getResult<AAManager>(F);
1042f1a29dd3SDimitry Andric auto &AC = AM.getResult<AssumptionAnalysis>(F);
1043f1a29dd3SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1044f1a29dd3SDimitry Andric auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1045f1a29dd3SDimitry Andric
10463ca95b02SDimitry Andric auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
10473ca95b02SDimitry Andric std::function<const LoopAccessInfo &(Loop &)> GetLAA =
10483ca95b02SDimitry Andric [&](Loop &L) -> const LoopAccessInfo & {
10492cab237bSDimitry Andric LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1050f1a29dd3SDimitry Andric return LAM.getResult<LoopAccessAnalysis>(L, AR);
10513ca95b02SDimitry Andric };
10523ca95b02SDimitry Andric
1053d88c1a5aSDimitry Andric bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
10543ca95b02SDimitry Andric if (!Changed)
10553ca95b02SDimitry Andric return PreservedAnalyses::all();
10563ca95b02SDimitry Andric PreservedAnalyses PA;
10573ca95b02SDimitry Andric PA.preserve<LoopAnalysis>();
10583ca95b02SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
1059d88c1a5aSDimitry Andric PA.preserve<GlobalsAA>();
10603ca95b02SDimitry Andric return PA;
10613ca95b02SDimitry Andric }
10623ca95b02SDimitry Andric
10633ca95b02SDimitry Andric char LoopDistributeLegacy::ID;
10642cab237bSDimitry Andric
1065d88c1a5aSDimitry Andric static const char ldist_name[] = "Loop Distribution";
1066ff0cc061SDimitry Andric
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy,LDIST_NAME,ldist_name,false,false)10673ca95b02SDimitry Andric INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
10683ca95b02SDimitry Andric false)
1069ff0cc061SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
10703ca95b02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
1071ff0cc061SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
10727d523365SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
10733ca95b02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
10743ca95b02SDimitry Andric INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1075ff0cc061SDimitry Andric
10762cab237bSDimitry Andric FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
1077