1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "isl/ctx.h"
39 #include "isl/isl-noexceptions.h"
40 #include <cassert>
41 #include <memory>
42 
43 #define DEBUG_TYPE "polly-optree"
44 
45 using namespace llvm;
46 using namespace polly;
47 
48 static cl::opt<bool>
49     AnalyzeKnown("polly-optree-analyze-known",
50                  cl::desc("Analyze array contents for load forwarding"),
51                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
52 
53 static cl::opt<bool>
54     NormalizePHIs("polly-optree-normalize-phi",
55                   cl::desc("Replace PHIs by their incoming values"),
56                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
57 
58 static cl::opt<unsigned>
59     MaxOps("polly-optree-max-ops",
60            cl::desc("Maximum number of ISL operations to invest for known "
61                     "analysis; 0=no limit"),
62            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
63 
64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota,
66           "Analyses aborted because max_operations was reached");
67 
68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded,
70           "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts,
75           "Number of statements with at least one forwarded tree");
76 
77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
78 
79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops,
81           "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops,
84           "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops,
87           "Number of singleton writes nested in affine loops after OpTree");
88 
89 namespace {
90 
91 /// The state of whether an operand tree was/can be forwarded.
92 ///
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision {
97   /// An uninitialized value.
98   FD_Unknown,
99 
100   /// The root instruction or value cannot be forwarded at all.
101   FD_CannotForward,
102 
103   /// The root instruction or value can be forwarded as a leaf of a larger
104   /// operand tree.
105   /// It does not make sense to move the value itself, it would just replace it
106   /// by a use of itself. For instance, a constant "5" used in a statement can
107   /// be forwarded, but it would just replace it by the same constant "5".
108   /// However, it makes sense to move as an operand of
109   ///
110   ///   %add = add 5, 5
111   ///
112   /// where "5" is moved as part of a larger operand tree. "5" would be placed
113   /// (disregarding for a moment that literal constants don't have a location
114   /// and can be used anywhere) into the same statement as %add would.
115   FD_CanForwardLeaf,
116 
117   /// The root instruction can be forwarded and doing so avoids a scalar
118   /// dependency.
119   ///
120   /// This can be either because the operand tree can be moved to the target
121   /// statement, or a memory access is redirected to read from a different
122   /// location.
123   FD_CanForwardProfitably,
124 
125   /// A forwarding method cannot be applied to the operand tree.
126   /// The difference to FD_CannotForward is that there might be other methods
127   /// that can handle it.
128   FD_NotApplicable
129 };
130 
131 /// Represents the evaluation of and action to taken when forwarding a value
132 /// from an operand tree.
133 struct ForwardingAction {
134   using KeyTy = std::pair<Value *, ScopStmt *>;
135 
136   /// Evaluation of forwarding a value.
137   ForwardingDecision Decision = FD_Unknown;
138 
139   /// Callback to execute the forwarding.
140   /// Returning true allows deleting the polly::MemoryAccess if the value is the
141   /// root of the operand tree (and its elimination the reason why the
142   /// forwarding is done). Return false if the MemoryAccess is reused or there
143   /// might be other users of the read accesses. In the letter case the
144   /// polly::SimplifyPass can remove dead MemoryAccesses.
145   std::function<bool()> Execute = []() -> bool {
146     llvm_unreachable("unspecified how to forward");
147   };
148 
149   /// Other values that need to be forwarded if this action is executed. Their
150   /// actions are executed after this one.
151   SmallVector<KeyTy, 4> Depends;
152 
153   /// Named ctor: The method creating this object does not apply to the kind of
154   /// value, but other methods may.
155   static ForwardingAction notApplicable() {
156     ForwardingAction Result;
157     Result.Decision = FD_NotApplicable;
158     return Result;
159   }
160 
161   /// Named ctor: The value cannot be forwarded.
162   static ForwardingAction cannotForward() {
163     ForwardingAction Result;
164     Result.Decision = FD_CannotForward;
165     return Result;
166   }
167 
168   /// Named ctor: The value can just be used without any preparation.
169   static ForwardingAction triviallyForwardable(bool IsProfitable) {
170     ForwardingAction Result;
171     Result.Decision =
172         IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
173     Result.Execute = []() { return true; };
174     return Result;
175   }
176 
177   /// Name ctor: The value can be forwarded by executing an action.
178   static ForwardingAction canForward(std::function<bool()> Execute,
179                                      ArrayRef<KeyTy> Depends,
180                                      bool IsProfitable) {
181     ForwardingAction Result;
182     Result.Decision =
183         IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
184     Result.Execute = std::move(Execute);
185     Result.Depends.append(Depends.begin(), Depends.end());
186     return Result;
187   }
188 };
189 
190 /// Implementation of operand tree forwarding for a specific SCoP.
191 ///
192 /// For a statement that requires a scalar value (through a value read
193 /// MemoryAccess), see if its operand can be moved into the statement. If so,
194 /// the MemoryAccess is removed and the all the operand tree instructions are
195 /// moved into the statement. All original instructions are left in the source
196 /// statements. The simplification pass can clean these up.
197 class ForwardOpTreeImpl : ZoneAlgorithm {
198 private:
199   using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;
200 
201   /// Scope guard to limit the number of isl operations for this pass.
202   IslMaxOperationsGuard &MaxOpGuard;
203 
204   /// How many instructions have been copied to other statements.
205   int NumInstructionsCopied = 0;
206 
207   /// Number of loads forwarded because their value was known.
208   int NumKnownLoadsForwarded = 0;
209 
210   /// Number of values reloaded from known array elements.
211   int NumReloads = 0;
212 
213   /// How many read-only accesses have been copied.
214   int NumReadOnlyCopied = 0;
215 
216   /// How many operand trees have been forwarded.
217   int NumForwardedTrees = 0;
218 
219   /// Number of statements with at least one forwarded operand tree.
220   int NumModifiedStmts = 0;
221 
222   /// Whether we carried out at least one change to the SCoP.
223   bool Modified = false;
224 
225   /// Cache of how to forward values.
226   /// The key of this map is the llvm::Value to be forwarded and the
227   /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
228   /// can evaluate differently depending on where it is evaluate. For instance,
229   /// a synthesizable Scev represents a recurrence with an loop but the loop's
230   /// exit value if evaluated after the loop.
231   /// The cached results are only valid for the current TargetStmt.
232   /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
233   /// exit value when instantiated outside of the loop. The primary concern is
234   /// ambiguity when crossing PHI nodes, which currently is not supported.
235   MemoizationTy ForwardingActions;
236 
237   /// Contains the zones where array elements are known to contain a specific
238   /// value.
239   /// { [Element[] -> Zone[]] -> ValInst[] }
240   /// @see computeKnown()
241   isl::union_map Known;
242 
243   /// Translator for newly introduced ValInsts to already existing ValInsts such
244   /// that new introduced load instructions can reuse the Known analysis of its
245   /// original load. { ValInst[] -> ValInst[] }
246   isl::union_map Translator;
247 
248   /// Get list of array elements that do contain the same ValInst[] at Domain[].
249   ///
250   /// @param ValInst { Domain[] -> ValInst[] }
251   ///                The values for which we search for alternative locations,
252   ///                per statement instance.
253   ///
254   /// @return { Domain[] -> Element[] }
255   ///         For each statement instance, the array elements that contain the
256   ///         same ValInst.
257   isl::union_map findSameContentElements(isl::union_map ValInst) {
258     assert(!ValInst.is_single_valued().is_false());
259 
260     // { Domain[] }
261     isl::union_set Domain = ValInst.domain();
262 
263     // { Domain[] -> Scatter[] }
264     isl::union_map Schedule = getScatterFor(Domain);
265 
266     // { Element[] -> [Scatter[] -> ValInst[]] }
267     isl::union_map MustKnownCurried =
268         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
269 
270     // { [Domain[] -> ValInst[]] -> Scatter[] }
271     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
272 
273     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
274     isl::union_map SchedValDomVal =
275         DomValSched.range_product(ValInst.range_map()).reverse();
276 
277     // { Element[] -> [Domain[] -> ValInst[]] }
278     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
279 
280     // { Domain[] -> Element[] }
281     isl::union_map MustKnownMap =
282         MustKnownInst.uncurry().domain().unwrap().reverse();
283     simplify(MustKnownMap);
284 
285     return MustKnownMap;
286   }
287 
288   /// Find a single array element for each statement instance, within a single
289   /// array.
290   ///
291   /// @param MustKnown { Domain[] -> Element[] }
292   ///                  Set of candidate array elements.
293   /// @param Domain    { Domain[] }
294   ///                  The statement instance for which we need elements for.
295   ///
296   /// @return { Domain[] -> Element[] }
297   ///         For each statement instance, an array element out of @p MustKnown.
298   ///         All array elements must be in the same array (Polly does not yet
299   ///         support reading from different accesses using the same
300   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
301   ///         null.
302   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
303     // { Domain[] -> Element[] }
304     isl::map Result;
305 
306     // MemoryAccesses can read only elements from a single array
307     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
308     // Look through all spaces until we find one that contains at least the
309     // wanted statement instance.s
310     for (isl::map Map : MustKnown.get_map_list()) {
311       // Get the array this is accessing.
312       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
313       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
314 
315       // No support for generation of indirect array accesses.
316       if (SAI->getBasePtrOriginSAI())
317         continue;
318 
319       // Determine whether this map contains all wanted values.
320       isl::set MapDom = Map.domain();
321       if (!Domain.is_subset(MapDom).is_true())
322         continue;
323 
324       // There might be multiple array elements that contain the same value, but
325       // choose only one of them. lexmin is used because it returns a one-value
326       // mapping, we do not care about which one.
327       // TODO: Get the simplest access function.
328       Result = Map.lexmin();
329       break;
330     }
331 
332     return Result;
333   }
334 
335 public:
336   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
337       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
338 
339   /// Compute the zones of known array element contents.
340   ///
341   /// @return True if the computed #Known is usable.
342   bool computeKnownValues() {
343     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
344 
345     // Check that nothing strange occurs.
346     collectCompatibleElts();
347 
348     {
349       IslQuotaScope QuotaScope = MaxOpGuard.enter();
350 
351       computeCommon();
352       if (NormalizePHIs)
353         computeNormalizedPHIs();
354       Known = computeKnown(true, true);
355 
356       // Preexisting ValInsts use the known content analysis of themselves.
357       Translator = makeIdentityMap(Known.range(), false);
358     }
359 
360     if (!Known || !Translator || !NormalizeMap) {
361       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
362       Known = nullptr;
363       Translator = nullptr;
364       NormalizeMap = nullptr;
365       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
366       return false;
367     }
368 
369     KnownAnalyzed++;
370     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
371 
372     return true;
373   }
374 
375   void printStatistics(raw_ostream &OS, int Indent = 0) {
376     OS.indent(Indent) << "Statistics {\n";
377     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
378                           << '\n';
379     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
380                           << '\n';
381     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
382     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
383                           << '\n';
384     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
385                           << '\n';
386     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
387                           << NumModifiedStmts << '\n';
388     OS.indent(Indent) << "}\n";
389   }
390 
391   void printStatements(raw_ostream &OS, int Indent = 0) const {
392     OS.indent(Indent) << "After statements {\n";
393     for (auto &Stmt : *S) {
394       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
395       for (auto *MA : Stmt)
396         MA->print(OS);
397 
398       OS.indent(Indent + 12);
399       Stmt.printInstructions(OS);
400     }
401     OS.indent(Indent) << "}\n";
402   }
403 
404   /// Create a new MemoryAccess of type read and MemoryKind::Array.
405   ///
406   /// @param Stmt           The statement in which the access occurs.
407   /// @param LI             The instruction that does the access.
408   /// @param AccessRelation The array element that each statement instance
409   ///                       accesses.
410   ///
411   /// @param The newly created access.
412   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
413                                     isl::map AccessRelation) {
414     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
415     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
416 
417     // Create a dummy SCEV access, to be replaced anyway.
418     SmallVector<const SCEV *, 4> Sizes;
419     Sizes.reserve(SAI->getNumberOfDimensions());
420     SmallVector<const SCEV *, 4> Subscripts;
421     Subscripts.reserve(SAI->getNumberOfDimensions());
422     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
423       Sizes.push_back(SAI->getDimensionSize(i));
424       Subscripts.push_back(nullptr);
425     }
426 
427     MemoryAccess *Access =
428         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
429                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
430     S->addAccessFunction(Access);
431     Stmt->addAccess(Access, true);
432 
433     Access->setNewAccessRelation(AccessRelation);
434 
435     return Access;
436   }
437 
438   /// Forward a load by reading from an array element that contains the same
439   /// value. Typically the location it was loaded from.
440   ///
441   /// @param TargetStmt  The statement the operand tree will be copied to.
442   /// @param Inst        The (possibly speculatable) instruction to forward.
443   /// @param UseStmt     The statement that uses @p Inst.
444   /// @param UseLoop     The loop @p Inst is used in.
445   /// @param DefStmt     The statement @p Inst is defined in.
446   /// @param DefLoop     The loop which contains @p Inst.
447   ///
448   /// @return A ForwardingAction object describing the feasibility and
449   ///         profitability evaluation and the callback carrying-out the value
450   ///         forwarding.
451   ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
452                                     ScopStmt *UseStmt, Loop *UseLoop,
453                                     ScopStmt *DefStmt, Loop *DefLoop) {
454     // Cannot do anything without successful known analysis.
455     if (Known.is_null() || Translator.is_null() ||
456         MaxOpGuard.hasQuotaExceeded())
457       return ForwardingAction::notApplicable();
458 
459     LoadInst *LI = dyn_cast<LoadInst>(Inst);
460     if (!LI)
461       return ForwardingAction::notApplicable();
462 
463     ForwardingDecision OpDecision =
464         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
465     switch (OpDecision) {
466     case FD_CanForwardProfitably:
467     case FD_CanForwardLeaf:
468       break;
469     case FD_CannotForward:
470       return ForwardingAction::cannotForward();
471     default:
472       llvm_unreachable("Shouldn't return this");
473     }
474 
475     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
476     if (Access) {
477       // If the load is already in the statement, no forwarding is necessary.
478       // However, it might happen that the LoadInst is already present in the
479       // statement's instruction list. In that case we do as follows:
480       // - For the evaluation, we can trivially forward it as it is
481       //   benefit of forwarding an already present instruction.
482       // - For the execution, prepend the instruction (to make it
483       //   available to all instructions following in the instruction list), but
484       //   do not add another MemoryAccess.
485       auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
486         TargetStmt->prependInstruction(LI);
487         LLVM_DEBUG(
488             dbgs() << "    forwarded known load with preexisting MemoryAccess"
489                    << Access << "\n");
490         (void)Access;
491 
492         NumKnownLoadsForwarded++;
493         TotalKnownLoadsForwarded++;
494         return true;
495       };
496       return ForwardingAction::canForward(
497           ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
498     }
499 
500     // Allow the following Isl calculations (until we return the
501     // ForwardingAction, excluding the code inside the lambda that will be
502     // executed later) to fail.
503     IslQuotaScope QuotaScope = MaxOpGuard.enter();
504 
505     // { DomainDef[] -> ValInst[] }
506     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
507     assert(!isNormalized(ExpectedVal).is_false() &&
508            "LoadInsts are always normalized");
509 
510     // { DomainUse[] -> DomainTarget[] }
511     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
512 
513     // { DomainTarget[] -> ValInst[] }
514     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
515     isl::union_map TranslatedExpectedVal =
516         isl::union_map(TargetExpectedVal).apply_range(Translator);
517 
518     // { DomainTarget[] -> Element[] }
519     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
520 
521     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
522     if (!SameVal)
523       return ForwardingAction::notApplicable();
524 
525     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
526                       << "\n");
527     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
528                       << "\n");
529 
530     // { ValInst[] }
531     isl::space ValInstSpace = ExpectedVal.get_space().range();
532 
533     // After adding a new load to the SCoP, also update the Known content
534     // about it. The new load will have a known ValInst of
535     // { [DomainTarget[] -> Value[]] }
536     // but which -- because it is a copy of it -- has same value as the
537     // { [DomainDef[] -> Value[]] }
538     // that it replicates. Instead of  cloning the known content of
539     // [DomainDef[] -> Value[]]
540     // for DomainTarget[], we add a 'translator' that maps
541     // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
542     // before comparing to the known content.
543     // TODO: 'Translator' could also be used to map PHINodes to their incoming
544     // ValInsts.
545     isl::map LocalTranslator;
546     if (!ValInstSpace.is_wrapping().is_false()) {
547       // { DefDomain[] -> Value[] }
548       isl::map ValInsts = ExpectedVal.range().unwrap();
549 
550       // { DefDomain[] }
551       isl::set DefDomain = ValInsts.domain();
552 
553       // { Value[] }
554       isl::space ValSpace = ValInstSpace.unwrap().range();
555 
556       // { Value[] -> Value[] }
557       isl::map ValToVal =
558           isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
559 
560       // { DomainDef[] -> DomainTarget[] }
561       isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
562 
563       // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
564       LocalTranslator = DefToTarget.reverse().product(ValToVal);
565       LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
566                         << "\n");
567 
568       if (!LocalTranslator)
569         return ForwardingAction::notApplicable();
570     }
571 
572     auto ExecAction = [this, TargetStmt, LI, SameVal,
573                        LocalTranslator]() -> bool {
574       TargetStmt->prependInstruction(LI);
575       MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
576       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
577                         << Access << "\n");
578       (void)Access;
579 
580       if (LocalTranslator)
581         Translator = Translator.add_map(LocalTranslator);
582 
583       NumKnownLoadsForwarded++;
584       TotalKnownLoadsForwarded++;
585       return true;
586     };
587     return ForwardingAction::canForward(
588         ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
589   }
590 
591   /// Forward a scalar by redirecting the access to an array element that stores
592   /// the same value.
593   ///
594   /// @param TargetStmt  The statement the operand tree will be copied to.
595   /// @param Inst        The scalar to forward.
596   /// @param UseStmt     The statement that uses @p Inst.
597   /// @param UseLoop     The loop @p Inst is used in.
598   /// @param DefStmt     The statement @p Inst is defined in.
599   /// @param DefLoop     The loop which contains @p Inst.
600   ///
601   /// @return A ForwardingAction object describing the feasibility and
602   ///         profitability evaluation and the callback carrying-out the value
603   ///         forwarding.
604   ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
605                                       ScopStmt *UseStmt, Loop *UseLoop,
606                                       ScopStmt *DefStmt, Loop *DefLoop) {
607     // Cannot do anything without successful known analysis.
608     if (Known.is_null() || Translator.is_null() ||
609         MaxOpGuard.hasQuotaExceeded())
610       return ForwardingAction::notApplicable();
611 
612     // Don't spend too much time analyzing whether it can be reloaded.
613     IslQuotaScope QuotaScope = MaxOpGuard.enter();
614 
615     // { DomainDef[] -> ValInst[] }
616     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
617 
618     // { DomainUse[] -> DomainTarget[] }
619     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
620 
621     // { DomainTarget[] -> ValInst[] }
622     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
623     isl::union_map TranslatedExpectedVal =
624         TargetExpectedVal.apply_range(Translator);
625 
626     // { DomainTarget[] -> Element[] }
627     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
628 
629     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
630     simplify(SameVal);
631     if (!SameVal)
632       return ForwardingAction::notApplicable();
633 
634     auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
635       MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
636       if (!Access)
637         Access = TargetStmt->ensureValueRead(Inst);
638       Access->setNewAccessRelation(SameVal);
639 
640       TotalReloads++;
641       NumReloads++;
642       return false;
643     };
644 
645     return ForwardingAction::canForward(ExecAction, {}, true);
646   }
647 
648   /// Forwards a speculatively executable instruction.
649   ///
650   /// @param TargetStmt  The statement the operand tree will be copied to.
651   /// @param UseInst     The (possibly speculatable) instruction to forward.
652   /// @param DefStmt     The statement @p UseInst is defined in.
653   /// @param DefLoop     The loop which contains @p UseInst.
654   ///
655   /// @return A ForwardingAction object describing the feasibility and
656   ///         profitability evaluation and the callback carrying-out the value
657   ///         forwarding.
658   ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
659                                        Instruction *UseInst, ScopStmt *DefStmt,
660                                        Loop *DefLoop) {
661     // PHIs, unless synthesizable, are not yet supported.
662     if (isa<PHINode>(UseInst))
663       return ForwardingAction::notApplicable();
664 
665     // Compatible instructions must satisfy the following conditions:
666     // 1. Idempotent (instruction will be copied, not moved; although its
667     //    original instance might be removed by simplification)
668     // 2. Not access memory (There might be memory writes between)
669     // 3. Not cause undefined behaviour (we might copy to a location when the
670     //    original instruction was no executed; this is currently not possible
671     //    because we do not forward PHINodes)
672     // 4. Not leak memory if executed multiple times (i.e. malloc)
673     //
674     // Instruction::mayHaveSideEffects is not sufficient because it considers
675     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
676     // not sufficient because it allows memory accesses.
677     if (mayBeMemoryDependent(*UseInst))
678       return ForwardingAction::notApplicable();
679 
680     SmallVector<ForwardingAction::KeyTy, 4> Depends;
681     Depends.reserve(UseInst->getNumOperands());
682     for (Value *OpVal : UseInst->operand_values()) {
683       ForwardingDecision OpDecision =
684           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
685       switch (OpDecision) {
686       case FD_CannotForward:
687         return ForwardingAction::cannotForward();
688 
689       case FD_CanForwardLeaf:
690       case FD_CanForwardProfitably:
691         Depends.emplace_back(OpVal, DefStmt);
692         break;
693 
694       case FD_NotApplicable:
695       case FD_Unknown:
696         llvm_unreachable(
697             "forwardTree should never return FD_NotApplicable/FD_Unknown");
698       }
699     }
700 
701     auto ExecAction = [this, TargetStmt, UseInst]() {
702       // To ensure the right order, prepend this instruction before its
703       // operands. This ensures that its operands are inserted before the
704       // instruction using them.
705       TargetStmt->prependInstruction(UseInst);
706       NumInstructionsCopied++;
707       TotalInstructionsCopied++;
708       return true;
709     };
710     return ForwardingAction::canForward(ExecAction, Depends, true);
711   }
712 
713   /// Determines whether an operand tree can be forwarded and returns
714   /// instructions how to do so in the form of a ForwardingAction object.
715   ///
716   /// @param TargetStmt  The statement the operand tree will be copied to.
717   /// @param UseVal      The value (usually an instruction) which is root of an
718   ///                    operand tree.
719   /// @param UseStmt     The statement that uses @p UseVal.
720   /// @param UseLoop     The loop @p UseVal is used in.
721   ///
722   /// @return A ForwardingAction object describing the feasibility and
723   ///         profitability evaluation and the callback carrying-out the value
724   ///         forwarding.
725   ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
726                                    ScopStmt *UseStmt, Loop *UseLoop) {
727     ScopStmt *DefStmt = nullptr;
728     Loop *DefLoop = nullptr;
729 
730     // { DefDomain[] -> TargetDomain[] }
731     isl::map DefToTarget;
732 
733     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
734     switch (VUse.getKind()) {
735     case VirtualUse::Constant:
736     case VirtualUse::Block:
737     case VirtualUse::Hoisted:
738       // These can be used anywhere without special considerations.
739       return ForwardingAction::triviallyForwardable(false);
740 
741     case VirtualUse::Synthesizable: {
742       // Check if the value is synthesizable at the new location as well. This
743       // might be possible when leaving a loop for which ScalarEvolution is
744       // unable to derive the exit value for.
745       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
746       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
747       // do not forward past its loop header. This would require us to use a
748       // previous loop induction variable instead the current one. We currently
749       // do not allow forwarding PHI nodes, thus this should never occur (the
750       // only exception where no phi is necessary being an unreachable loop
751       // without edge from the outside).
752       VirtualUse TargetUse = VirtualUse::create(
753           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
754       if (TargetUse.getKind() == VirtualUse::Synthesizable)
755         return ForwardingAction::triviallyForwardable(false);
756 
757       LLVM_DEBUG(
758           dbgs() << "    Synthesizable would not be synthesizable anymore: "
759                  << *UseVal << "\n");
760       return ForwardingAction::cannotForward();
761     }
762 
763     case VirtualUse::ReadOnly: {
764       // If we model read-only scalars, we need to create a MemoryAccess for it.
765       auto ExecAction = [this, TargetStmt, UseVal]() {
766         if (ModelReadOnlyScalars)
767           TargetStmt->ensureValueRead(UseVal);
768 
769         NumReadOnlyCopied++;
770         TotalReadOnlyCopied++;
771 
772         // Note that we cannot return true here. With a operand tree
773         // depth of 0, UseVal is the use in TargetStmt that we try to replace.
774         // With -polly-analyze-read-only-scalars=true we would ensure the
775         // existence of a MemoryAccess (which already exists for a leaf) and be
776         // removed again by tryForwardTree because it's goal is to remove this
777         // scalar MemoryAccess. It interprets FD_CanForwardTree as the
778         // permission to do so.
779         return false;
780       };
781       return ForwardingAction::canForward(ExecAction, {}, false);
782     }
783 
784     case VirtualUse::Intra:
785       // Knowing that UseStmt and DefStmt are the same statement instance, just
786       // reuse the information about UseStmt for DefStmt
787       DefStmt = UseStmt;
788 
789       LLVM_FALLTHROUGH;
790     case VirtualUse::Inter:
791       Instruction *Inst = cast<Instruction>(UseVal);
792 
793       if (!DefStmt) {
794         DefStmt = S->getStmtFor(Inst);
795         if (!DefStmt)
796           return ForwardingAction::cannotForward();
797       }
798 
799       DefLoop = LI->getLoopFor(Inst->getParent());
800 
801       ForwardingAction SpeculativeResult =
802           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
803       if (SpeculativeResult.Decision != FD_NotApplicable)
804         return SpeculativeResult;
805 
806       ForwardingAction KnownResult = forwardKnownLoad(
807           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
808       if (KnownResult.Decision != FD_NotApplicable)
809         return KnownResult;
810 
811       ForwardingAction ReloadResult = reloadKnownContent(
812           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
813       if (ReloadResult.Decision != FD_NotApplicable)
814         return ReloadResult;
815 
816       // When no method is found to forward the operand tree, we effectively
817       // cannot handle it.
818       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
819       return ForwardingAction::cannotForward();
820     }
821 
822     llvm_unreachable("Case unhandled");
823   }
824 
825   /// Determines whether an operand tree can be forwarded. Previous evaluations
826   /// are cached.
827   ///
828   /// @param TargetStmt  The statement the operand tree will be copied to.
829   /// @param UseVal      The value (usually an instruction) which is root of an
830   ///                    operand tree.
831   /// @param UseStmt     The statement that uses @p UseVal.
832   /// @param UseLoop     The loop @p UseVal is used in.
833   ///
834   /// @return FD_CannotForward        if @p UseVal cannot be forwarded.
835   ///         FD_CanForwardLeaf       if @p UseVal is forwardable, but not
836   ///                                 profitable.
837   ///         FD_CanForwardProfitably if @p UseVal is forwardable and useful to
838   ///                                 do.
839   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
840                                  ScopStmt *UseStmt, Loop *UseLoop) {
841     // Lookup any cached evaluation.
842     auto It = ForwardingActions.find({UseVal, UseStmt});
843     if (It != ForwardingActions.end())
844       return It->second.Decision;
845 
846     // Make a new evaluation.
847     ForwardingAction Action =
848         forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
849     ForwardingDecision Result = Action.Decision;
850 
851     // Remember for the next time.
852     assert(!ForwardingActions.count({UseVal, UseStmt}) &&
853            "circular dependency?");
854     ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});
855 
856     return Result;
857   }
858 
859   /// Forward an operand tree using cached actions.
860   ///
861   /// @param Stmt   Statement the operand tree is moved into.
862   /// @param UseVal Root of the operand tree within @p Stmt.
863   /// @param RA     The MemoryAccess for @p UseVal that the forwarding intends
864   ///               to remove.
865   void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
866     using ChildItTy =
867         decltype(std::declval<ForwardingAction>().Depends.begin());
868     using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;
869 
870     DenseSet<ForwardingAction::KeyTy> Visited;
871     SmallVector<EdgeTy, 32> Stack;
872     SmallVector<ForwardingAction *, 32> Ordered;
873 
874     // Seed the tree search using the root value.
875     assert(ForwardingActions.count({UseVal, Stmt}));
876     ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
877     Stack.emplace_back(RootAction, RootAction->Depends.begin());
878 
879     // Compute the postorder of the operand tree: all operands of an instruction
880     // must be visited before the instruction itself. As an additional
881     // requirement, the topological ordering must be 'compact': Any subtree node
882     // must not be interleaved with nodes from a non-shared subtree. This is
883     // because the same llvm::Instruction can be materialized multiple times as
884     // used at different ScopStmts which might be different values. Intersecting
885     // these lifetimes may result in miscompilations.
886     // FIXME: Intersecting lifetimes might still be possible for the roots
887     // themselves, since instructions are just prepended to a ScopStmt's
888     // instruction list.
889     while (!Stack.empty()) {
890       EdgeTy &Top = Stack.back();
891       ForwardingAction *TopAction = Top.first;
892       ChildItTy &TopEdge = Top.second;
893 
894       if (TopEdge == TopAction->Depends.end()) {
895         // Postorder sorting
896         Ordered.push_back(TopAction);
897         Stack.pop_back();
898         continue;
899       }
900       ForwardingAction::KeyTy Key = *TopEdge;
901 
902       // Next edge for this level
903       ++TopEdge;
904 
905       auto VisitIt = Visited.insert(Key);
906       if (!VisitIt.second)
907         continue;
908 
909       assert(ForwardingActions.count(Key) &&
910              "Must not insert new actions during execution phase");
911       ForwardingAction *ChildAction = &ForwardingActions[Key];
912       Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
913     }
914 
915     // Actually, we need the reverse postorder because actions prepend new
916     // instructions. Therefore, the first one will always be the action for the
917     // operand tree's root.
918     assert(Ordered.back() == RootAction);
919     if (RootAction->Execute())
920       Stmt->removeSingleMemoryAccess(RA);
921     Ordered.pop_back();
922     for (auto DepAction : reverse(Ordered)) {
923       assert(DepAction->Decision != FD_Unknown &&
924              DepAction->Decision != FD_CannotForward);
925       assert(DepAction != RootAction);
926       DepAction->Execute();
927     }
928   }
929 
930   /// Try to forward an operand tree rooted in @p RA.
931   bool tryForwardTree(MemoryAccess *RA) {
932     assert(RA->isLatestScalarKind());
933     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
934 
935     ScopStmt *Stmt = RA->getStatement();
936     Loop *InLoop = Stmt->getSurroundingLoop();
937 
938     isl::map TargetToUse;
939     if (!Known.is_null()) {
940       isl::space DomSpace = Stmt->getDomainSpace();
941       TargetToUse =
942           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
943     }
944 
945     ForwardingDecision Assessment =
946         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);
947 
948     // If considered feasible and profitable, forward it.
949     bool Changed = false;
950     if (Assessment == FD_CanForwardProfitably) {
951       applyForwardingActions(Stmt, RA->getAccessValue(), RA);
952       Changed = true;
953     }
954 
955     ForwardingActions.clear();
956     return Changed;
957   }
958 
959   /// Return which SCoP this instance is processing.
960   Scop *getScop() const { return S; }
961 
962   /// Run the algorithm: Use value read accesses as operand tree roots and try
963   /// to forward them into the statement.
964   bool forwardOperandTrees() {
965     for (ScopStmt &Stmt : *S) {
966       bool StmtModified = false;
967 
968       // Because we are modifying the MemoryAccess list, collect them first to
969       // avoid iterator invalidation.
970       SmallVector<MemoryAccess *, 16> Accs;
971       for (MemoryAccess *RA : Stmt) {
972         if (!RA->isRead())
973           continue;
974         if (!RA->isLatestScalarKind())
975           continue;
976 
977         Accs.push_back(RA);
978       }
979 
980       for (MemoryAccess *RA : Accs) {
981         if (tryForwardTree(RA)) {
982           Modified = true;
983           StmtModified = true;
984           NumForwardedTrees++;
985           TotalForwardedTrees++;
986         }
987       }
988 
989       if (StmtModified) {
990         NumModifiedStmts++;
991         TotalModifiedStmts++;
992       }
993     }
994 
995     if (Modified) {
996       ScopsModified++;
997       S->realignParams();
998     }
999     return Modified;
1000   }
1001 
1002   /// Print the pass result, performed transformations and the SCoP after the
1003   /// transformation.
1004   void print(raw_ostream &OS, int Indent = 0) {
1005     printStatistics(OS, Indent);
1006 
1007     if (!Modified) {
1008       // This line can easily be checked in regression tests.
1009       OS << "ForwardOpTree executed, but did not modify anything\n";
1010       return;
1011     }
1012 
1013     printStatements(OS, Indent);
1014   }
1015 };
1016 
1017 /// Pass that redirects scalar reads to array elements that are known to contain
1018 /// the same value.
1019 ///
1020 /// This reduces the number of scalar accesses and therefore potentially
1021 /// increases the freedom of the scheduler. In the ideal case, all reads of a
1022 /// scalar definition are redirected (We currently do not care about removing
1023 /// the write in this case).  This is also useful for the main DeLICM pass as
1024 /// there are less scalars to be mapped.
1025 class ForwardOpTree : public ScopPass {
1026 private:
1027   /// The pass implementation, also holding per-scop data.
1028   std::unique_ptr<ForwardOpTreeImpl> Impl;
1029 
1030 public:
1031   static char ID;
1032 
1033   explicit ForwardOpTree() : ScopPass(ID) {}
1034   ForwardOpTree(const ForwardOpTree &) = delete;
1035   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
1036 
1037   void getAnalysisUsage(AnalysisUsage &AU) const override {
1038     AU.addRequiredTransitive<ScopInfoRegionPass>();
1039     AU.addRequired<LoopInfoWrapperPass>();
1040     AU.setPreservesAll();
1041   }
1042 
1043   bool runOnScop(Scop &S) override {
1044     // Free resources for previous SCoP's computation, if not yet done.
1045     releaseMemory();
1046 
1047     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1048 
1049     {
1050       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
1051       Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
1052 
1053       if (AnalyzeKnown) {
1054         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1055         Impl->computeKnownValues();
1056       }
1057 
1058       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1059       Impl->forwardOperandTrees();
1060 
1061       if (MaxOpGuard.hasQuotaExceeded()) {
1062         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1063                              "max_operations exceeded\n");
1064         KnownOutOfQuota++;
1065       }
1066     }
1067 
1068     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1069     LLVM_DEBUG(dbgs() << S);
1070 
1071     // Update statistics
1072     auto ScopStats = S.getStatistics();
1073     NumValueWrites += ScopStats.NumValueWrites;
1074     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1075     NumPHIWrites += ScopStats.NumPHIWrites;
1076     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1077     NumSingletonWrites += ScopStats.NumSingletonWrites;
1078     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1079 
1080     return false;
1081   }
1082 
1083   void printScop(raw_ostream &OS, Scop &S) const override {
1084     if (!Impl)
1085       return;
1086 
1087     assert(Impl->getScop() == &S);
1088     Impl->print(OS);
1089   }
1090 
1091   void releaseMemory() override { Impl.reset(); }
1092 }; // class ForwardOpTree
1093 
1094 char ForwardOpTree::ID;
1095 } // namespace
1096 
1097 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1098 
1099 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1100                       "Polly - Forward operand tree", false, false)
1101 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1102 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1103                     "Polly - Forward operand tree", false, false)
1104