1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "isl/ctx.h"
39 #include "isl/isl-noexceptions.h"
40 #include <cassert>
41 #include <memory>
42 
43 #define DEBUG_TYPE "polly-optree"
44 
45 using namespace llvm;
46 using namespace polly;
47 
48 static cl::opt<bool>
49     AnalyzeKnown("polly-optree-analyze-known",
50                  cl::desc("Analyze array contents for load forwarding"),
51                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
52 
53 static cl::opt<bool>
54     NormalizePHIs("polly-optree-normalize-phi",
55                   cl::desc("Replace PHIs by their incoming values"),
56                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
57 
58 static cl::opt<unsigned>
59     MaxOps("polly-optree-max-ops",
60            cl::desc("Maximum number of ISL operations to invest for known "
61                     "analysis; 0=no limit"),
62            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
63 
64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota,
66           "Analyses aborted because max_operations was reached");
67 
68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded,
70           "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts,
75           "Number of statements with at least one forwarded tree");
76 
77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
78 
79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops,
81           "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops,
84           "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops,
87           "Number of singleton writes nested in affine loops after OpTree");
88 
89 namespace {
90 
91 /// The state of whether an operand tree was/can be forwarded.
92 ///
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision {
97   /// An uninitialized value.
98   FD_Unknown,
99 
100   /// The root instruction or value cannot be forwarded at all.
101   FD_CannotForward,
102 
103   /// The root instruction or value can be forwarded as a leaf of a larger
104   /// operand tree.
105   /// It does not make sense to move the value itself, it would just replace it
106   /// by a use of itself. For instance, a constant "5" used in a statement can
107   /// be forwarded, but it would just replace it by the same constant "5".
108   /// However, it makes sense to move as an operand of
109   ///
110   ///   %add = add 5, 5
111   ///
112   /// where "5" is moved as part of a larger operand tree. "5" would be placed
113   /// (disregarding for a moment that literal constants don't have a location
114   /// and can be used anywhere) into the same statement as %add would.
115   FD_CanForwardLeaf,
116 
117   /// The root instruction can be forwarded and doing so avoids a scalar
118   /// dependency.
119   ///
120   /// This can be either because the operand tree can be moved to the target
121   /// statement, or a memory access is redirected to read from a different
122   /// location.
123   FD_CanForwardProfitably,
124 
125   /// A forwarding method cannot be applied to the operand tree.
126   /// The difference to FD_CannotForward is that there might be other methods
127   /// that can handle it.
128   FD_NotApplicable
129 };
130 
131 /// Represents the evaluation of and action to taken when forwarding a value
132 /// from an operand tree.
133 struct ForwardingAction {
134   using KeyTy = std::pair<Value *, ScopStmt *>;
135 
136   /// Evaluation of forwarding a value.
137   ForwardingDecision Decision = FD_Unknown;
138 
139   /// Callback to execute the forwarding.
140   /// Returning true allows deleting the polly::MemoryAccess if the value is the
141   /// root of the operand tree (and its elimination the reason why the
142   /// forwarding is done). Return false if the MemoryAccess is reused or there
143   /// might be other users of the read accesses. In the letter case the
144   /// polly::SimplifyPass can remove dead MemoryAccesses.
145   std::function<bool()> Execute = []() -> bool {
146     llvm_unreachable("unspecified how to forward");
147   };
148 
149   /// Other values that need to be forwarded if this action is executed. Their
150   /// actions are executed after this one.
151   SmallVector<KeyTy, 4> Depends;
152 
153   /// Named ctor: The method creating this object does not apply to the kind of
154   /// value, but other methods may.
155   static ForwardingAction notApplicable() {
156     ForwardingAction Result;
157     Result.Decision = FD_NotApplicable;
158     return Result;
159   }
160 
161   /// Named ctor: The value cannot be forwarded.
162   static ForwardingAction cannotForward() {
163     ForwardingAction Result;
164     Result.Decision = FD_CannotForward;
165     return Result;
166   }
167 
168   /// Named ctor: The value can just be used without any preparation.
169   static ForwardingAction triviallyForwardable(bool IsProfitable) {
170     ForwardingAction Result;
171     Result.Decision =
172         IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
173     Result.Execute = []() { return true; };
174     return Result;
175   }
176 
177   /// Name ctor: The value can be forwarded by executing an action.
178   static ForwardingAction canForward(std::function<bool()> Execute,
179                                      ArrayRef<KeyTy> Depends,
180                                      bool IsProfitable) {
181     ForwardingAction Result;
182     Result.Decision =
183         IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
184     Result.Execute = std::move(Execute);
185     Result.Depends.append(Depends.begin(), Depends.end());
186     return Result;
187   }
188 };
189 
190 /// Implementation of operand tree forwarding for a specific SCoP.
191 ///
192 /// For a statement that requires a scalar value (through a value read
193 /// MemoryAccess), see if its operand can be moved into the statement. If so,
194 /// the MemoryAccess is removed and the all the operand tree instructions are
195 /// moved into the statement. All original instructions are left in the source
196 /// statements. The simplification pass can clean these up.
197 class ForwardOpTreeImpl : ZoneAlgorithm {
198 private:
199   using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;
200 
201   /// Scope guard to limit the number of isl operations for this pass.
202   IslMaxOperationsGuard &MaxOpGuard;
203 
204   /// How many instructions have been copied to other statements.
205   int NumInstructionsCopied = 0;
206 
207   /// Number of loads forwarded because their value was known.
208   int NumKnownLoadsForwarded = 0;
209 
210   /// Number of values reloaded from known array elements.
211   int NumReloads = 0;
212 
213   /// How many read-only accesses have been copied.
214   int NumReadOnlyCopied = 0;
215 
216   /// How many operand trees have been forwarded.
217   int NumForwardedTrees = 0;
218 
219   /// Number of statements with at least one forwarded operand tree.
220   int NumModifiedStmts = 0;
221 
222   /// Whether we carried out at least one change to the SCoP.
223   bool Modified = false;
224 
225   /// Cache of how to forward values.
226   /// The key of this map is the llvm::Value to be forwarded and the
227   /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
228   /// can evaluate differently depending on where it is evaluate. For instance,
229   /// a synthesizable Scev represents a recurrence with an loop but the loop's
230   /// exit value if evaluated after the loop.
231   /// The cached results are only valid for the current TargetStmt.
232   /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
233   /// exit value when instantiated outside of the loop. The primary concern is
234   /// ambiguity when crossing PHI nodes, which currently is not supported.
235   MemoizationTy ForwardingActions;
236 
237   /// Contains the zones where array elements are known to contain a specific
238   /// value.
239   /// { [Element[] -> Zone[]] -> ValInst[] }
240   /// @see computeKnown()
241   isl::union_map Known;
242 
243   /// Translator for newly introduced ValInsts to already existing ValInsts such
244   /// that new introduced load instructions can reuse the Known analysis of its
245   /// original load. { ValInst[] -> ValInst[] }
246   isl::union_map Translator;
247 
248   /// Get list of array elements that do contain the same ValInst[] at Domain[].
249   ///
250   /// @param ValInst { Domain[] -> ValInst[] }
251   ///                The values for which we search for alternative locations,
252   ///                per statement instance.
253   ///
254   /// @return { Domain[] -> Element[] }
255   ///         For each statement instance, the array elements that contain the
256   ///         same ValInst.
257   isl::union_map findSameContentElements(isl::union_map ValInst) {
258     assert(!ValInst.is_single_valued().is_false());
259 
260     // { Domain[] }
261     isl::union_set Domain = ValInst.domain();
262 
263     // { Domain[] -> Scatter[] }
264     isl::union_map Schedule = getScatterFor(Domain);
265 
266     // { Element[] -> [Scatter[] -> ValInst[]] }
267     isl::union_map MustKnownCurried =
268         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
269 
270     // { [Domain[] -> ValInst[]] -> Scatter[] }
271     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
272 
273     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
274     isl::union_map SchedValDomVal =
275         DomValSched.range_product(ValInst.range_map()).reverse();
276 
277     // { Element[] -> [Domain[] -> ValInst[]] }
278     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
279 
280     // { Domain[] -> Element[] }
281     isl::union_map MustKnownMap =
282         MustKnownInst.uncurry().domain().unwrap().reverse();
283     simplify(MustKnownMap);
284 
285     return MustKnownMap;
286   }
287 
288   /// Find a single array element for each statement instance, within a single
289   /// array.
290   ///
291   /// @param MustKnown { Domain[] -> Element[] }
292   ///                  Set of candidate array elements.
293   /// @param Domain    { Domain[] }
294   ///                  The statement instance for which we need elements for.
295   ///
296   /// @return { Domain[] -> Element[] }
297   ///         For each statement instance, an array element out of @p MustKnown.
298   ///         All array elements must be in the same array (Polly does not yet
299   ///         support reading from different accesses using the same
300   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
301   ///         null.
302   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
303     // { Domain[] -> Element[] }
304     isl::map Result;
305 
306     // MemoryAccesses can read only elements from a single array
307     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
308     // Look through all spaces until we find one that contains at least the
309     // wanted statement instance.s
310     for (isl::map Map : MustKnown.get_map_list()) {
311       // Get the array this is accessing.
312       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
313       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
314 
315       // No support for generation of indirect array accesses.
316       if (SAI->getBasePtrOriginSAI())
317         continue;
318 
319       // Determine whether this map contains all wanted values.
320       isl::set MapDom = Map.domain();
321       if (!Domain.is_subset(MapDom).is_true())
322         continue;
323 
324       // There might be multiple array elements that contain the same value, but
325       // choose only one of them. lexmin is used because it returns a one-value
326       // mapping, we do not care about which one.
327       // TODO: Get the simplest access function.
328       Result = Map.lexmin();
329       break;
330     }
331 
332     return Result;
333   }
334 
335 public:
336   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
337       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
338 
339   /// Compute the zones of known array element contents.
340   ///
341   /// @return True if the computed #Known is usable.
342   bool computeKnownValues() {
343     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
344 
345     // Check that nothing strange occurs.
346     collectCompatibleElts();
347 
348     {
349       IslQuotaScope QuotaScope = MaxOpGuard.enter();
350 
351       computeCommon();
352       if (NormalizePHIs)
353         computeNormalizedPHIs();
354       Known = computeKnown(true, true);
355 
356       // Preexisting ValInsts use the known content analysis of themselves.
357       Translator = makeIdentityMap(Known.range(), false);
358     }
359 
360     if (!Known || !Translator || !NormalizeMap) {
361       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
362       Known = nullptr;
363       Translator = nullptr;
364       NormalizeMap = nullptr;
365       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
366       return false;
367     }
368 
369     KnownAnalyzed++;
370     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
371 
372     return true;
373   }
374 
375   void printStatistics(raw_ostream &OS, int Indent = 0) {
376     OS.indent(Indent) << "Statistics {\n";
377     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
378                           << '\n';
379     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
380                           << '\n';
381     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
382     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
383                           << '\n';
384     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
385                           << '\n';
386     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
387                           << NumModifiedStmts << '\n';
388     OS.indent(Indent) << "}\n";
389   }
390 
391   void printStatements(raw_ostream &OS, int Indent = 0) const {
392     OS.indent(Indent) << "After statements {\n";
393     for (auto &Stmt : *S) {
394       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
395       for (auto *MA : Stmt)
396         MA->print(OS);
397 
398       OS.indent(Indent + 12);
399       Stmt.printInstructions(OS);
400     }
401     OS.indent(Indent) << "}\n";
402   }
403 
404   /// Create a new MemoryAccess of type read and MemoryKind::Array.
405   ///
406   /// @param Stmt           The statement in which the access occurs.
407   /// @param LI             The instruction that does the access.
408   /// @param AccessRelation The array element that each statement instance
409   ///                       accesses.
410   ///
411   /// @param The newly created access.
412   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
413                                     isl::map AccessRelation) {
414     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
415     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
416 
417     // Create a dummy SCEV access, to be replaced anyway.
418     SmallVector<const SCEV *, 4> Sizes;
419     Sizes.reserve(SAI->getNumberOfDimensions());
420     SmallVector<const SCEV *, 4> Subscripts;
421     Subscripts.reserve(SAI->getNumberOfDimensions());
422     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
423       Sizes.push_back(SAI->getDimensionSize(i));
424       Subscripts.push_back(nullptr);
425     }
426 
427     MemoryAccess *Access =
428         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
429                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
430     S->addAccessFunction(Access);
431     Stmt->addAccess(Access, true);
432 
433     Access->setNewAccessRelation(AccessRelation);
434 
435     return Access;
436   }
437 
438   /// Forward a load by reading from an array element that contains the same
439   /// value. Typically the location it was loaded from.
440   ///
441   /// @param TargetStmt  The statement the operand tree will be copied to.
442   /// @param Inst        The (possibly speculatable) instruction to forward.
443   /// @param UseStmt     The statement that uses @p Inst.
444   /// @param UseLoop     The loop @p Inst is used in.
445   /// @param DefStmt     The statement @p Inst is defined in.
446   /// @param DefLoop     The loop which contains @p Inst.
447   ///
448   /// @return A ForwardingAction object describing the feasibility and
449   ///         profitability evaluation and the callback carrying-out the value
450   ///         forwarding.
451   ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
452                                     ScopStmt *UseStmt, Loop *UseLoop,
453                                     ScopStmt *DefStmt, Loop *DefLoop) {
454     // Cannot do anything without successful known analysis.
455     if (Known.is_null() || Translator.is_null() ||
456         MaxOpGuard.hasQuotaExceeded())
457       return ForwardingAction::notApplicable();
458 
459     LoadInst *LI = dyn_cast<LoadInst>(Inst);
460     if (!LI)
461       return ForwardingAction::notApplicable();
462 
463     ForwardingDecision OpDecision =
464         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
465     switch (OpDecision) {
466     case FD_CanForwardProfitably:
467     case FD_CanForwardLeaf:
468       break;
469     case FD_CannotForward:
470       return ForwardingAction::cannotForward();
471     default:
472       llvm_unreachable("Shouldn't return this");
473     }
474 
475     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
476     if (Access) {
477       // If the load is already in the statement, no forwarding is necessary.
478       // However, it might happen that the LoadInst is already present in the
479       // statement's instruction list. In that case we do as follows:
480       // - For the evaluation, we can trivially forward it as it is
481       //   benefit of forwarding an already present instruction.
482       // - For the execution, prepend the instruction (to make it
483       //   available to all instructions following in the instruction list), but
484       //   do not add another MemoryAccess.
485       auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
486         TargetStmt->prependInstruction(LI);
487         LLVM_DEBUG(
488             dbgs() << "    forwarded known load with preexisting MemoryAccess"
489                    << Access << "\n");
490 
491         NumKnownLoadsForwarded++;
492         TotalKnownLoadsForwarded++;
493         return true;
494       };
495       return ForwardingAction::canForward(
496           ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
497     }
498 
499     // Allow the following Isl calculations (until we return the
500     // ForwardingAction, excluding the code inside the lambda that will be
501     // executed later) to fail.
502     IslQuotaScope QuotaScope = MaxOpGuard.enter();
503 
504     // { DomainDef[] -> ValInst[] }
505     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
506     assert(!isNormalized(ExpectedVal).is_false() &&
507            "LoadInsts are always normalized");
508 
509     // { DomainUse[] -> DomainTarget[] }
510     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
511 
512     // { DomainTarget[] -> ValInst[] }
513     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
514     isl::union_map TranslatedExpectedVal =
515         isl::union_map(TargetExpectedVal).apply_range(Translator);
516 
517     // { DomainTarget[] -> Element[] }
518     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
519 
520     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
521     if (!SameVal)
522       return ForwardingAction::notApplicable();
523 
524     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
525                       << "\n");
526     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
527                       << "\n");
528 
529     // { ValInst[] }
530     isl::space ValInstSpace = ExpectedVal.get_space().range();
531 
532     // After adding a new load to the SCoP, also update the Known content
533     // about it. The new load will have a known ValInst of
534     // { [DomainTarget[] -> Value[]] }
535     // but which -- because it is a copy of it -- has same value as the
536     // { [DomainDef[] -> Value[]] }
537     // that it replicates. Instead of  cloning the known content of
538     // [DomainDef[] -> Value[]]
539     // for DomainTarget[], we add a 'translator' that maps
540     // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
541     // before comparing to the known content.
542     // TODO: 'Translator' could also be used to map PHINodes to their incoming
543     // ValInsts.
544     isl::map LocalTranslator;
545     if (!ValInstSpace.is_wrapping().is_false()) {
546       // { DefDomain[] -> Value[] }
547       isl::map ValInsts = ExpectedVal.range().unwrap();
548 
549       // { DefDomain[] }
550       isl::set DefDomain = ValInsts.domain();
551 
552       // { Value[] }
553       isl::space ValSpace = ValInstSpace.unwrap().range();
554 
555       // { Value[] -> Value[] }
556       isl::map ValToVal =
557           isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
558 
559       // { DomainDef[] -> DomainTarget[] }
560       isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
561 
562       // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
563       LocalTranslator = DefToTarget.reverse().product(ValToVal);
564       LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
565                         << "\n");
566 
567       if (!LocalTranslator)
568         return ForwardingAction::notApplicable();
569     }
570 
571     auto ExecAction = [this, TargetStmt, LI, SameVal,
572                        LocalTranslator]() -> bool {
573       TargetStmt->prependInstruction(LI);
574       MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
575       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
576                         << Access << "\n");
577 
578       if (LocalTranslator)
579         Translator = Translator.add_map(LocalTranslator);
580 
581       NumKnownLoadsForwarded++;
582       TotalKnownLoadsForwarded++;
583       return true;
584     };
585     return ForwardingAction::canForward(
586         ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
587   }
588 
589   /// Forward a scalar by redirecting the access to an array element that stores
590   /// the same value.
591   ///
592   /// @param TargetStmt  The statement the operand tree will be copied to.
593   /// @param Inst        The scalar to forward.
594   /// @param UseStmt     The statement that uses @p Inst.
595   /// @param UseLoop     The loop @p Inst is used in.
596   /// @param DefStmt     The statement @p Inst is defined in.
597   /// @param DefLoop     The loop which contains @p Inst.
598   ///
599   /// @return A ForwardingAction object describing the feasibility and
600   ///         profitability evaluation and the callback carrying-out the value
601   ///         forwarding.
602   ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
603                                       ScopStmt *UseStmt, Loop *UseLoop,
604                                       ScopStmt *DefStmt, Loop *DefLoop) {
605     // Cannot do anything without successful known analysis.
606     if (Known.is_null() || Translator.is_null() ||
607         MaxOpGuard.hasQuotaExceeded())
608       return ForwardingAction::notApplicable();
609 
610     // Don't spend too much time analyzing whether it can be reloaded.
611     IslQuotaScope QuotaScope = MaxOpGuard.enter();
612 
613     // { DomainDef[] -> ValInst[] }
614     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
615 
616     // { DomainUse[] -> DomainTarget[] }
617     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
618 
619     // { DomainTarget[] -> ValInst[] }
620     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
621     isl::union_map TranslatedExpectedVal =
622         TargetExpectedVal.apply_range(Translator);
623 
624     // { DomainTarget[] -> Element[] }
625     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
626 
627     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
628     simplify(SameVal);
629     if (!SameVal)
630       return ForwardingAction::notApplicable();
631 
632     auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
633       MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
634       if (!Access)
635         Access = TargetStmt->ensureValueRead(Inst);
636       Access->setNewAccessRelation(SameVal);
637 
638       TotalReloads++;
639       NumReloads++;
640       return false;
641     };
642 
643     return ForwardingAction::canForward(ExecAction, {}, true);
644   }
645 
646   /// Forwards a speculatively executable instruction.
647   ///
648   /// @param TargetStmt  The statement the operand tree will be copied to.
649   /// @param UseInst     The (possibly speculatable) instruction to forward.
650   /// @param DefStmt     The statement @p UseInst is defined in.
651   /// @param DefLoop     The loop which contains @p UseInst.
652   ///
653   /// @return A ForwardingAction object describing the feasibility and
654   ///         profitability evaluation and the callback carrying-out the value
655   ///         forwarding.
656   ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
657                                        Instruction *UseInst, ScopStmt *DefStmt,
658                                        Loop *DefLoop) {
659     // PHIs, unless synthesizable, are not yet supported.
660     if (isa<PHINode>(UseInst))
661       return ForwardingAction::notApplicable();
662 
663     // Compatible instructions must satisfy the following conditions:
664     // 1. Idempotent (instruction will be copied, not moved; although its
665     //    original instance might be removed by simplification)
666     // 2. Not access memory (There might be memory writes between)
667     // 3. Not cause undefined behaviour (we might copy to a location when the
668     //    original instruction was no executed; this is currently not possible
669     //    because we do not forward PHINodes)
670     // 4. Not leak memory if executed multiple times (i.e. malloc)
671     //
672     // Instruction::mayHaveSideEffects is not sufficient because it considers
673     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
674     // not sufficient because it allows memory accesses.
675     if (mayBeMemoryDependent(*UseInst))
676       return ForwardingAction::notApplicable();
677 
678     SmallVector<ForwardingAction::KeyTy, 4> Depends;
679     Depends.reserve(UseInst->getNumOperands());
680     for (Value *OpVal : UseInst->operand_values()) {
681       ForwardingDecision OpDecision =
682           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
683       switch (OpDecision) {
684       case FD_CannotForward:
685         return ForwardingAction::cannotForward();
686 
687       case FD_CanForwardLeaf:
688       case FD_CanForwardProfitably:
689         Depends.emplace_back(OpVal, DefStmt);
690         break;
691 
692       case FD_NotApplicable:
693       case FD_Unknown:
694         llvm_unreachable(
695             "forwardTree should never return FD_NotApplicable/FD_Unknown");
696       }
697     }
698 
699     auto ExecAction = [this, TargetStmt, UseInst]() {
700       // To ensure the right order, prepend this instruction before its
701       // operands. This ensures that its operands are inserted before the
702       // instruction using them.
703       TargetStmt->prependInstruction(UseInst);
704       NumInstructionsCopied++;
705       TotalInstructionsCopied++;
706       return true;
707     };
708     return ForwardingAction::canForward(ExecAction, Depends, true);
709   }
710 
711   /// Determines whether an operand tree can be forwarded and returns
712   /// instructions how to do so in the form of a ForwardingAction object.
713   ///
714   /// @param TargetStmt  The statement the operand tree will be copied to.
715   /// @param UseVal      The value (usually an instruction) which is root of an
716   ///                    operand tree.
717   /// @param UseStmt     The statement that uses @p UseVal.
718   /// @param UseLoop     The loop @p UseVal is used in.
719   ///
720   /// @return A ForwardingAction object describing the feasibility and
721   ///         profitability evaluation and the callback carrying-out the value
722   ///         forwarding.
723   ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
724                                    ScopStmt *UseStmt, Loop *UseLoop) {
725     ScopStmt *DefStmt = nullptr;
726     Loop *DefLoop = nullptr;
727 
728     // { DefDomain[] -> TargetDomain[] }
729     isl::map DefToTarget;
730 
731     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
732     switch (VUse.getKind()) {
733     case VirtualUse::Constant:
734     case VirtualUse::Block:
735     case VirtualUse::Hoisted:
736       // These can be used anywhere without special considerations.
737       return ForwardingAction::triviallyForwardable(false);
738 
739     case VirtualUse::Synthesizable: {
740       // Check if the value is synthesizable at the new location as well. This
741       // might be possible when leaving a loop for which ScalarEvolution is
742       // unable to derive the exit value for.
743       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
744       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
745       // do not forward past its loop header. This would require us to use a
746       // previous loop induction variable instead the current one. We currently
747       // do not allow forwarding PHI nodes, thus this should never occur (the
748       // only exception where no phi is necessary being an unreachable loop
749       // without edge from the outside).
750       VirtualUse TargetUse = VirtualUse::create(
751           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
752       if (TargetUse.getKind() == VirtualUse::Synthesizable)
753         return ForwardingAction::triviallyForwardable(false);
754 
755       LLVM_DEBUG(
756           dbgs() << "    Synthesizable would not be synthesizable anymore: "
757                  << *UseVal << "\n");
758       return ForwardingAction::cannotForward();
759     }
760 
761     case VirtualUse::ReadOnly: {
762       // If we model read-only scalars, we need to create a MemoryAccess for it.
763       auto ExecAction = [this, TargetStmt, UseVal]() {
764         if (ModelReadOnlyScalars)
765           TargetStmt->ensureValueRead(UseVal);
766 
767         NumReadOnlyCopied++;
768         TotalReadOnlyCopied++;
769 
770         // Note that we cannot return true here. With a operand tree
771         // depth of 0, UseVal is the use in TargetStmt that we try to replace.
772         // With -polly-analyze-read-only-scalars=true we would ensure the
773         // existence of a MemoryAccess (which already exists for a leaf) and be
774         // removed again by tryForwardTree because it's goal is to remove this
775         // scalar MemoryAccess. It interprets FD_CanForwardTree as the
776         // permission to do so.
777         return false;
778       };
779       return ForwardingAction::canForward(ExecAction, {}, false);
780     }
781 
782     case VirtualUse::Intra:
783       // Knowing that UseStmt and DefStmt are the same statement instance, just
784       // reuse the information about UseStmt for DefStmt
785       DefStmt = UseStmt;
786 
787       LLVM_FALLTHROUGH;
788     case VirtualUse::Inter:
789       Instruction *Inst = cast<Instruction>(UseVal);
790 
791       if (!DefStmt) {
792         DefStmt = S->getStmtFor(Inst);
793         if (!DefStmt)
794           return ForwardingAction::cannotForward();
795       }
796 
797       DefLoop = LI->getLoopFor(Inst->getParent());
798 
799       ForwardingAction SpeculativeResult =
800           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
801       if (SpeculativeResult.Decision != FD_NotApplicable)
802         return SpeculativeResult;
803 
804       ForwardingAction KnownResult = forwardKnownLoad(
805           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
806       if (KnownResult.Decision != FD_NotApplicable)
807         return KnownResult;
808 
809       ForwardingAction ReloadResult = reloadKnownContent(
810           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
811       if (ReloadResult.Decision != FD_NotApplicable)
812         return ReloadResult;
813 
814       // When no method is found to forward the operand tree, we effectively
815       // cannot handle it.
816       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
817       return ForwardingAction::cannotForward();
818     }
819 
820     llvm_unreachable("Case unhandled");
821   }
822 
823   /// Determines whether an operand tree can be forwarded. Previous evaluations
824   /// are cached.
825   ///
826   /// @param TargetStmt  The statement the operand tree will be copied to.
827   /// @param UseVal      The value (usually an instruction) which is root of an
828   ///                    operand tree.
829   /// @param UseStmt     The statement that uses @p UseVal.
830   /// @param UseLoop     The loop @p UseVal is used in.
831   ///
832   /// @return FD_CannotForward        if @p UseVal cannot be forwarded.
833   ///         FD_CanForwardLeaf       if @p UseVal is forwardable, but not
834   ///                                 profitable.
835   ///         FD_CanForwardProfitably if @p UseVal is forwardable and useful to
836   ///                                 do.
837   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
838                                  ScopStmt *UseStmt, Loop *UseLoop) {
839     // Lookup any cached evaluation.
840     auto It = ForwardingActions.find({UseVal, UseStmt});
841     if (It != ForwardingActions.end())
842       return It->second.Decision;
843 
844     // Make a new evaluation.
845     ForwardingAction Action =
846         forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
847     ForwardingDecision Result = Action.Decision;
848 
849     // Remember for the next time.
850     assert(!ForwardingActions.count({UseVal, UseStmt}) &&
851            "circular dependency?");
852     ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});
853 
854     return Result;
855   }
856 
857   /// Forward an operand tree using cached actions.
858   ///
859   /// @param Stmt   Statement the operand tree is moved into.
860   /// @param UseVal Root of the operand tree within @p Stmt.
861   /// @param RA     The MemoryAccess for @p UseVal that the forwarding intends
862   ///               to remove.
863   void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
864     using ChildItTy =
865         decltype(std::declval<ForwardingAction>().Depends.begin());
866     using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;
867 
868     DenseSet<ForwardingAction::KeyTy> Visited;
869     SmallVector<EdgeTy, 32> Stack;
870     SmallVector<ForwardingAction *, 32> Ordered;
871 
872     // Seed the tree search using the root value.
873     assert(ForwardingActions.count({UseVal, Stmt}));
874     ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
875     Stack.emplace_back(RootAction, RootAction->Depends.begin());
876 
877     // Compute the postorder of the operand tree: all operands of an instruction
878     // must be visited before the instruction itself. As an additional
879     // requirement, the topological ordering must be 'compact': Any subtree node
880     // must not be interleaved with nodes from a non-shared subtree. This is
881     // because the same llvm::Instruction can be materialized multiple times as
882     // used at different ScopStmts which might be different values. Intersecting
883     // these lifetimes may result in miscompilations.
884     // FIXME: Intersecting lifetimes might still be possible for the roots
885     // themselves, since instructions are just prepended to a ScopStmt's
886     // instruction list.
887     while (!Stack.empty()) {
888       EdgeTy &Top = Stack.back();
889       ForwardingAction *TopAction = Top.first;
890       ChildItTy &TopEdge = Top.second;
891 
892       if (TopEdge == TopAction->Depends.end()) {
893         // Postorder sorting
894         Ordered.push_back(TopAction);
895         Stack.pop_back();
896         continue;
897       }
898       ForwardingAction::KeyTy Key = *TopEdge;
899 
900       // Next edge for this level
901       ++TopEdge;
902 
903       auto VisitIt = Visited.insert(Key);
904       if (!VisitIt.second)
905         continue;
906 
907       assert(ForwardingActions.count(Key) &&
908              "Must not insert new actions during execution phase");
909       ForwardingAction *ChildAction = &ForwardingActions[Key];
910       Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
911     }
912 
913     // Actually, we need the reverse postorder because actions prepend new
914     // instructions. Therefore, the first one will always be the action for the
915     // operand tree's root.
916     assert(Ordered.back() == RootAction);
917     if (RootAction->Execute())
918       Stmt->removeSingleMemoryAccess(RA);
919     Ordered.pop_back();
920     for (auto DepAction : reverse(Ordered)) {
921       assert(DepAction->Decision != FD_Unknown &&
922              DepAction->Decision != FD_CannotForward);
923       assert(DepAction != RootAction);
924       DepAction->Execute();
925     }
926   }
927 
928   /// Try to forward an operand tree rooted in @p RA.
929   bool tryForwardTree(MemoryAccess *RA) {
930     assert(RA->isLatestScalarKind());
931     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
932 
933     ScopStmt *Stmt = RA->getStatement();
934     Loop *InLoop = Stmt->getSurroundingLoop();
935 
936     isl::map TargetToUse;
937     if (!Known.is_null()) {
938       isl::space DomSpace = Stmt->getDomainSpace();
939       TargetToUse =
940           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
941     }
942 
943     ForwardingDecision Assessment =
944         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);
945 
946     // If considered feasible and profitable, forward it.
947     bool Changed = false;
948     if (Assessment == FD_CanForwardProfitably) {
949       applyForwardingActions(Stmt, RA->getAccessValue(), RA);
950       Changed = true;
951     }
952 
953     ForwardingActions.clear();
954     return Changed;
955   }
956 
957   /// Return which SCoP this instance is processing.
958   Scop *getScop() const { return S; }
959 
960   /// Run the algorithm: Use value read accesses as operand tree roots and try
961   /// to forward them into the statement.
962   bool forwardOperandTrees() {
963     for (ScopStmt &Stmt : *S) {
964       bool StmtModified = false;
965 
966       // Because we are modifying the MemoryAccess list, collect them first to
967       // avoid iterator invalidation.
968       SmallVector<MemoryAccess *, 16> Accs;
969       for (MemoryAccess *RA : Stmt) {
970         if (!RA->isRead())
971           continue;
972         if (!RA->isLatestScalarKind())
973           continue;
974 
975         Accs.push_back(RA);
976       }
977 
978       for (MemoryAccess *RA : Accs) {
979         if (tryForwardTree(RA)) {
980           Modified = true;
981           StmtModified = true;
982           NumForwardedTrees++;
983           TotalForwardedTrees++;
984         }
985       }
986 
987       if (StmtModified) {
988         NumModifiedStmts++;
989         TotalModifiedStmts++;
990       }
991     }
992 
993     if (Modified) {
994       ScopsModified++;
995       S->realignParams();
996     }
997     return Modified;
998   }
999 
1000   /// Print the pass result, performed transformations and the SCoP after the
1001   /// transformation.
1002   void print(raw_ostream &OS, int Indent = 0) {
1003     printStatistics(OS, Indent);
1004 
1005     if (!Modified) {
1006       // This line can easily be checked in regression tests.
1007       OS << "ForwardOpTree executed, but did not modify anything\n";
1008       return;
1009     }
1010 
1011     printStatements(OS, Indent);
1012   }
1013 };
1014 
1015 /// Pass that redirects scalar reads to array elements that are known to contain
1016 /// the same value.
1017 ///
1018 /// This reduces the number of scalar accesses and therefore potentially
1019 /// increases the freedom of the scheduler. In the ideal case, all reads of a
1020 /// scalar definition are redirected (We currently do not care about removing
1021 /// the write in this case).  This is also useful for the main DeLICM pass as
1022 /// there are less scalars to be mapped.
1023 class ForwardOpTree : public ScopPass {
1024 private:
1025   /// The pass implementation, also holding per-scop data.
1026   std::unique_ptr<ForwardOpTreeImpl> Impl;
1027 
1028 public:
1029   static char ID;
1030 
1031   explicit ForwardOpTree() : ScopPass(ID) {}
1032   ForwardOpTree(const ForwardOpTree &) = delete;
1033   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
1034 
1035   void getAnalysisUsage(AnalysisUsage &AU) const override {
1036     AU.addRequiredTransitive<ScopInfoRegionPass>();
1037     AU.addRequired<LoopInfoWrapperPass>();
1038     AU.setPreservesAll();
1039   }
1040 
1041   bool runOnScop(Scop &S) override {
1042     // Free resources for previous SCoP's computation, if not yet done.
1043     releaseMemory();
1044 
1045     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1046 
1047     {
1048       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
1049       Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
1050 
1051       if (AnalyzeKnown) {
1052         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1053         Impl->computeKnownValues();
1054       }
1055 
1056       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1057       Impl->forwardOperandTrees();
1058 
1059       if (MaxOpGuard.hasQuotaExceeded()) {
1060         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1061                              "max_operations exceeded\n");
1062         KnownOutOfQuota++;
1063       }
1064     }
1065 
1066     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1067     LLVM_DEBUG(dbgs() << S);
1068 
1069     // Update statistics
1070     auto ScopStats = S.getStatistics();
1071     NumValueWrites += ScopStats.NumValueWrites;
1072     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1073     NumPHIWrites += ScopStats.NumPHIWrites;
1074     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1075     NumSingletonWrites += ScopStats.NumSingletonWrites;
1076     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1077 
1078     return false;
1079   }
1080 
1081   void printScop(raw_ostream &OS, Scop &S) const override {
1082     if (!Impl)
1083       return;
1084 
1085     assert(Impl->getScop() == &S);
1086     Impl->print(OS);
1087   }
1088 
1089   void releaseMemory() override { Impl.reset(); }
1090 }; // class ForwardOpTree
1091 
1092 char ForwardOpTree::ID;
1093 } // namespace
1094 
1095 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1096 
1097 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1098                       "Polly - Forward operand tree", false, false)
1099 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1100 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1101                     "Polly - Forward operand tree", false, false)
1102