1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/ForwardOpTree.h"
15 #include "polly/Options.h"
16 #include "polly/ScopBuilder.h"
17 #include "polly/ScopInfo.h"
18 #include "polly/ScopPass.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/ISLOStream.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "polly/ZoneAlgo.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "isl/ctx.h"
40 #include "isl/isl-noexceptions.h"
41 #include <cassert>
42 #include <memory>
43 
44 #define DEBUG_TYPE "polly-optree"
45 
46 using namespace llvm;
47 using namespace polly;
48 
49 static cl::opt<bool>
50     AnalyzeKnown("polly-optree-analyze-known",
51                  cl::desc("Analyze array contents for load forwarding"),
52                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53 
54 static cl::opt<unsigned>
55     MaxOps("polly-optree-max-ops",
56            cl::desc("Maximum number of ISL operations to invest for known "
57                     "analysis; 0=no limit"),
58            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
59 
60 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
61 STATISTIC(KnownOutOfQuota,
62           "Analyses aborted because max_operations was reached");
63 
64 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
65 STATISTIC(TotalKnownLoadsForwarded,
66           "Number of forwarded loads because their value was known");
67 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
68 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
69 STATISTIC(TotalModifiedStmts,
70           "Number of statements with at least one forwarded tree");
71 
72 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
73 
74 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
75 STATISTIC(NumValueWritesInLoops,
76           "Number of scalar value writes nested in affine loops after OpTree");
77 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
78 STATISTIC(NumPHIWritesInLoops,
79           "Number of scalar phi writes nested in affine loops after OpTree");
80 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
81 STATISTIC(NumSingletonWritesInLoops,
82           "Number of singleton writes nested in affine loops after OpTree");
83 
84 namespace {
85 
86 /// The state of whether an operand tree was/can be forwarded.
87 ///
88 /// The items apply to an instructions and its operand tree with the instruction
89 /// as the root element. If the value in question is not an instruction in the
90 /// SCoP, it can be a leaf of an instruction's operand tree.
91 enum ForwardingDecision {
92   /// The root instruction or value cannot be forwarded at all.
93   FD_CannotForward,
94 
95   /// The root instruction or value can be forwarded as a leaf of a larger
96   /// operand tree.
97   /// It does not make sense to move the value itself, it would just replace it
98   /// by a use of itself. For instance, a constant "5" used in a statement can
99   /// be forwarded, but it would just replace it by the same constant "5".
100   /// However, it makes sense to move as an operand of
101   ///
102   ///   %add = add 5, 5
103   ///
104   /// where "5" is moved as part of a larger operand tree. "5" would be placed
105   /// (disregarding for a moment that literal constants don't have a location
106   /// and can be used anywhere) into the same statement as %add would.
107   FD_CanForwardLeaf,
108 
109   /// The root instruction can be forwarded in a non-trivial way. This requires
110   /// the operand tree root to be an instruction in some statement.
111   FD_CanForwardTree,
112 
113   /// Used to indicate that a forwarding has be carried out successfully.
114   FD_DidForward,
115 
116   /// A forwarding method cannot be applied to the operand tree.
117   /// The difference to FD_CannotForward is that there might be other methods
118   /// that can handle it.
119   /// The conditions that make an operand tree applicable must be checked even
120   /// with DoIt==true because a method following the one that returned
121   /// FD_NotApplicable might have returned FD_CanForwardTree.
122   FD_NotApplicable
123 };
124 
125 /// Implementation of operand tree forwarding for a specific SCoP.
126 ///
127 /// For a statement that requires a scalar value (through a value read
128 /// MemoryAccess), see if its operand can be moved into the statement. If so,
129 /// the MemoryAccess is removed and the all the operand tree instructions are
130 /// moved into the statement. All original instructions are left in the source
131 /// statements. The simplification pass can clean these up.
132 class ForwardOpTreeImpl : ZoneAlgorithm {
133 private:
134   /// Scope guard to limit the number of isl operations for this pass.
135   IslMaxOperationsGuard &MaxOpGuard;
136 
137   /// How many instructions have been copied to other statements.
138   int NumInstructionsCopied = 0;
139 
140   /// Number of loads forwarded because their value was known.
141   int NumKnownLoadsForwarded = 0;
142 
143   /// How many read-only accesses have been copied.
144   int NumReadOnlyCopied = 0;
145 
146   /// How many operand trees have been forwarded.
147   int NumForwardedTrees = 0;
148 
149   /// Number of statements with at least one forwarded operand tree.
150   int NumModifiedStmts = 0;
151 
152   /// Whether we carried out at least one change to the SCoP.
153   bool Modified = false;
154 
155   /// Contains the zones where array elements are known to contain a specific
156   /// value.
157   /// { [Element[] -> Zone[]] -> ValInst[] }
158   /// @see computeKnown()
159   isl::union_map Known;
160 
161   /// Translator for newly introduced ValInsts to already existing ValInsts such
162   /// that new introduced load instructions can reuse the Known analysis of its
163   /// original load. { ValInst[] -> ValInst[] }
164   isl::union_map Translator;
165 
166   /// Get list of array elements that do contain the same ValInst[] at Domain[].
167   ///
168   /// @param ValInst { Domain[] -> ValInst[] }
169   ///                The values for which we search for alternative locations,
170   ///                per statement instance.
171   ///
172   /// @return { Domain[] -> Element[] }
173   ///         For each statement instance, the array elements that contain the
174   ///         same ValInst.
175   isl::union_map findSameContentElements(isl::union_map ValInst) {
176     assert(ValInst.is_single_valued().is_true());
177 
178     // { Domain[] }
179     isl::union_set Domain = ValInst.domain();
180 
181     // { Domain[] -> Scatter[] }
182     isl::union_map Schedule = getScatterFor(Domain);
183 
184     // { Element[] -> [Scatter[] -> ValInst[]] }
185     isl::union_map MustKnownCurried =
186         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
187 
188     // { [Domain[] -> ValInst[]] -> Scatter[] }
189     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
190 
191     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
192     isl::union_map SchedValDomVal =
193         DomValSched.range_product(ValInst.range_map()).reverse();
194 
195     // { Element[] -> [Domain[] -> ValInst[]] }
196     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
197 
198     // { Domain[] -> Element[] }
199     isl::union_map MustKnownMap =
200         MustKnownInst.uncurry().domain().unwrap().reverse();
201     simplify(MustKnownMap);
202 
203     return MustKnownMap;
204   }
205 
206   /// Find a single array element for each statement instance, within a single
207   /// array.
208   ///
209   /// @param MustKnown { Domain[] -> Element[] }
210   ///                  Set of candidate array elements.
211   /// @param Domain    { Domain[] }
212   ///                  The statement instance for which we need elements for.
213   ///
214   /// @return { Domain[] -> Element[] }
215   ///         For each statement instance, an array element out of @p MustKnown.
216   ///         All array elements must be in the same array (Polly does not yet
217   ///         support reading from different accesses using the same
218   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
219   ///         null.
220   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
221     // { Domain[] -> Element[] }
222     isl::map Result;
223 
224     // MemoryAccesses can read only elements from a single array
225     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
226     // Look through all spaces until we find one that contains at least the
227     // wanted statement instance.s
228     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
229       // Get the array this is accessing.
230       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
231       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
232 
233       // No support for generation of indirect array accesses.
234       if (SAI->getBasePtrOriginSAI())
235         return isl::stat::ok; // continue
236 
237       // Determine whether this map contains all wanted values.
238       isl::set MapDom = Map.domain();
239       if (!Domain.is_subset(MapDom).is_true())
240         return isl::stat::ok; // continue
241 
242       // There might be multiple array elements that contain the same value, but
243       // choose only one of them. lexmin is used because it returns a one-value
244       // mapping, we do not care about which one.
245       // TODO: Get the simplest access function.
246       Result = Map.lexmin();
247       return isl::stat::error; // break
248     });
249 
250     return Result;
251   }
252 
253 public:
254   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
255       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
256 
257   /// Compute the zones of known array element contents.
258   ///
259   /// @return True if the computed #Known is usable.
260   bool computeKnownValues() {
261     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
262 
263     // Check that nothing strange occurs.
264     collectCompatibleElts();
265 
266     {
267       IslQuotaScope QuotaScope = MaxOpGuard.enter();
268 
269       computeCommon();
270       Known = computeKnown(true, true);
271 
272       // Preexisting ValInsts use the known content analysis of themselves.
273       Translator = makeIdentityMap(Known.range(), false);
274     }
275 
276     if (!Known || !Translator) {
277       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
278       Known = nullptr;
279       Translator = nullptr;
280       DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
281       return false;
282     }
283 
284     KnownAnalyzed++;
285     DEBUG(dbgs() << "All known: " << Known << "\n");
286 
287     return true;
288   }
289 
290   void printStatistics(raw_ostream &OS, int Indent = 0) {
291     OS.indent(Indent) << "Statistics {\n";
292     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
293                           << '\n';
294     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
295                           << '\n';
296     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
297                           << '\n';
298     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
299                           << '\n';
300     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
301                           << NumModifiedStmts << '\n';
302     OS.indent(Indent) << "}\n";
303   }
304 
305   void printStatements(raw_ostream &OS, int Indent = 0) const {
306     OS.indent(Indent) << "After statements {\n";
307     for (auto &Stmt : *S) {
308       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
309       for (auto *MA : Stmt)
310         MA->print(OS);
311 
312       OS.indent(Indent + 12);
313       Stmt.printInstructions(OS);
314     }
315     OS.indent(Indent) << "}\n";
316   }
317 
318   /// Create a new MemoryAccess of type read and MemoryKind::Array.
319   ///
320   /// @param Stmt           The statement in which the access occurs.
321   /// @param LI             The instruction that does the access.
322   /// @param AccessRelation The array element that each statement instance
323   ///                       accesses.
324   ///
325   /// @param The newly created access.
326   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
327                                     isl::map AccessRelation) {
328     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
329     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
330 
331     // Create a dummy SCEV access, to be replaced anyway.
332     SmallVector<const SCEV *, 4> Sizes;
333     Sizes.reserve(SAI->getNumberOfDimensions());
334     SmallVector<const SCEV *, 4> Subscripts;
335     Subscripts.reserve(SAI->getNumberOfDimensions());
336     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
337       Sizes.push_back(SAI->getDimensionSize(i));
338       Subscripts.push_back(nullptr);
339     }
340 
341     MemoryAccess *Access =
342         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
343                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
344     S->addAccessFunction(Access);
345     Stmt->addAccess(Access, true);
346 
347     Access->setNewAccessRelation(AccessRelation);
348 
349     return Access;
350   }
351 
352   /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
353   /// use in every instance of @p UseStmt.
354   ///
355   /// @param UseStmt Statement a scalar is used in.
356   /// @param DefStmt Statement a scalar is defined in.
357   ///
358   /// @return { DomainUse[] -> DomainDef[] }
359   isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
360     // { DomainUse[] -> Scatter[] }
361     isl::map UseScatter = getScatterFor(UseStmt);
362 
363     // { Zone[] -> DomainDef[] }
364     isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
365 
366     // { Scatter[] -> DomainDef[] }
367     isl::map ReachDefTimepoints =
368         convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
369 
370     // { DomainUse[] -> DomainDef[] }
371     return UseScatter.apply_range(ReachDefTimepoints);
372   }
373 
374   /// Forward a load by reading from an array element that contains the same
375   /// value. Typically the location it was loaded from.
376   ///
377   /// @param TargetStmt  The statement the operand tree will be copied to.
378   /// @param Inst        The (possibly speculatable) instruction to forward.
379   /// @param UseStmt     The statement that uses @p Inst.
380   /// @param UseLoop     The loop @p Inst is used in.
381   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
382   ///                    A mapping from the statement instance @p Inst is used
383   ///                    to the statement instance it is forwarded to.
384   /// @param DefStmt     The statement @p Inst is defined in.
385   /// @param DefLoop     The loop which contains @p Inst.
386   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
387   ///                    A mapping from the statement instance @p Inst is
388   ///                    defined to the statement instance it is forwarded to.
389   /// @param DoIt        If false, only determine whether an operand tree can be
390   ///                    forwarded. If true, carry out the forwarding. Do not
391   ///                    use DoIt==true if an operand tree is not known to be
392   ///                    forwardable.
393   ///
394   /// @return FD_NotApplicable  if @p Inst is not a LoadInst.
395   ///         FD_CannotForward  if no array element to load from was found.
396   ///         FD_CanForwardLeaf if the load is already in the target statement
397   ///                           instance.
398   ///         FD_CanForwardTree if @p Inst is forwardable.
399   ///         FD_DidForward     if @p DoIt was true.
400   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
401                                       ScopStmt *UseStmt, Loop *UseLoop,
402                                       isl::map UseToTarget, ScopStmt *DefStmt,
403                                       Loop *DefLoop, isl::map DefToTarget,
404                                       bool DoIt) {
405     // Cannot do anything without successful known analysis.
406     if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
407         DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
408       return FD_NotApplicable;
409 
410     LoadInst *LI = dyn_cast<LoadInst>(Inst);
411     if (!LI)
412       return FD_NotApplicable;
413 
414     // If the load is already in the statement, no forwarding is necessary.
415     // However, it might happen that the LoadInst is already present in the
416     // statement's instruction list. In that case we do as follows:
417     // - For the evaluation (DoIt==false), we can trivially forward it as it is
418     //   benefit of forwarding an already present instruction.
419     // - For the execution (DoIt==true), prepend the instruction (to make it
420     //   available to all instructions following in the instruction list), but
421     //   do not add another MemoryAccess.
422     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
423     if (Access && !DoIt)
424       return FD_CanForwardTree;
425 
426     if (DoIt)
427       TargetStmt->prependInstruction(LI);
428 
429     ForwardingDecision OpDecision =
430         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
431                     DefToTarget, DoIt);
432     switch (OpDecision) {
433     case FD_CannotForward:
434       assert(!DoIt);
435       return OpDecision;
436 
437     case FD_CanForwardLeaf:
438     case FD_CanForwardTree:
439       assert(!DoIt);
440       break;
441 
442     case FD_DidForward:
443       assert(DoIt);
444       break;
445 
446     default:
447       llvm_unreachable("Shouldn't return this");
448     }
449 
450     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
451 
452     // { DomainDef[] -> ValInst[] }
453     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
454 
455     // { DomainTarget[] -> ValInst[] }
456     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
457     isl::union_map TranslatedExpectedVal =
458         isl::union_map(TargetExpectedVal).apply_range(Translator);
459 
460     // { DomainTarget[] -> Element[] }
461     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
462 
463     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
464     if (!SameVal)
465       return FD_CannotForward;
466 
467     if (!DoIt)
468       return FD_CanForwardTree;
469 
470     if (Access) {
471       DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
472                    << Access << "\n");
473     } else {
474       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
475       DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
476                    << "\n");
477 
478       // { ValInst[] }
479       isl::space ValInstSpace = ExpectedVal.get_space().range();
480 
481       // After adding a new load to the SCoP, also update the Known content
482       // about it. The new load will have a known ValInst of
483       // { [DomainTarget[] -> Value[]] }
484       // but which -- because it is a copy of it -- has same value as the
485       // { [DomainDef[] -> Value[]] }
486       // that it replicates. Instead of  cloning the known content of
487       // [DomainDef[] -> Value[]]
488       // for DomainTarget[], we add a 'translator' that maps
489       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
490       // before comparing to the known content.
491       // TODO: 'Translator' could also be used to map PHINodes to their incoming
492       // ValInsts.
493       if (ValInstSpace.is_wrapping()) {
494         // { DefDomain[] -> Value[] }
495         isl::map ValInsts = ExpectedVal.range().unwrap();
496 
497         // { DefDomain[] }
498         isl::set DefDomain = ValInsts.domain();
499 
500         // { Value[] }
501         isl::space ValSpace = ValInstSpace.unwrap().range();
502 
503         // { Value[] -> Value[] }
504         isl::map ValToVal =
505             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
506 
507         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
508         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
509 
510         Translator = Translator.add_map(LocalTranslator);
511         DEBUG(dbgs() << "      local translator is " << LocalTranslator
512                      << "\n");
513       }
514     }
515     DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
516                  << "\n");
517     DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
518     assert(Access);
519 
520     NumKnownLoadsForwarded++;
521     TotalKnownLoadsForwarded++;
522     return FD_DidForward;
523   }
524 
525   /// Forwards a speculatively executable instruction.
526   ///
527   /// @param TargetStmt  The statement the operand tree will be copied to.
528   /// @param UseInst     The (possibly speculatable) instruction to forward.
529   /// @param DefStmt     The statement @p UseInst is defined in.
530   /// @param DefLoop     The loop which contains @p UseInst.
531   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
532   ///                    A mapping from the statement instance @p UseInst is
533   ///                    defined to the statement instance it is forwarded to.
534   /// @param DoIt        If false, only determine whether an operand tree can be
535   ///                    forwarded. If true, carry out the forwarding. Do not
536   ///                    use DoIt==true if an operand tree is not known to be
537   ///                    forwardable.
538   ///
539   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
540   ///         FD_CannotForward  if one of @p UseInst's operands is not
541   ///                           forwardable.
542   ///         FD_CanForwardTree if @p UseInst is forwardable.
543   ///         FD_DidForward     if @p DoIt was true.
544   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
545                                          Instruction *UseInst,
546                                          ScopStmt *DefStmt, Loop *DefLoop,
547                                          isl::map DefToTarget, bool DoIt) {
548     // PHIs, unless synthesizable, are not yet supported.
549     if (isa<PHINode>(UseInst))
550       return FD_NotApplicable;
551 
552     // Compatible instructions must satisfy the following conditions:
553     // 1. Idempotent (instruction will be copied, not moved; although its
554     //    original instance might be removed by simplification)
555     // 2. Not access memory (There might be memory writes between)
556     // 3. Not cause undefined behaviour (we might copy to a location when the
557     //    original instruction was no executed; this is currently not possible
558     //    because we do not forward PHINodes)
559     // 4. Not leak memory if executed multiple times (i.e. malloc)
560     //
561     // Instruction::mayHaveSideEffects is not sufficient because it considers
562     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
563     // not sufficient because it allows memory accesses.
564     if (mayBeMemoryDependent(*UseInst))
565       return FD_NotApplicable;
566 
567     if (DoIt) {
568       // To ensure the right order, prepend this instruction before its
569       // operands. This ensures that its operands are inserted before the
570       // instruction using them.
571       // TODO: The operand tree is not really a tree, but a DAG. We should be
572       // able to handle DAGs without duplication.
573       TargetStmt->prependInstruction(UseInst);
574       NumInstructionsCopied++;
575       TotalInstructionsCopied++;
576     }
577 
578     for (Value *OpVal : UseInst->operand_values()) {
579       ForwardingDecision OpDecision =
580           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
581       switch (OpDecision) {
582       case FD_CannotForward:
583         assert(!DoIt);
584         return FD_CannotForward;
585 
586       case FD_CanForwardLeaf:
587       case FD_CanForwardTree:
588         assert(!DoIt);
589         break;
590 
591       case FD_DidForward:
592         assert(DoIt);
593         break;
594 
595       case FD_NotApplicable:
596         llvm_unreachable("forwardTree should never return FD_NotApplicable");
597       }
598     }
599 
600     if (DoIt)
601       return FD_DidForward;
602     return FD_CanForwardTree;
603   }
604 
605   /// Determines whether an operand tree can be forwarded or carries out a
606   /// forwarding, depending on the @p DoIt flag.
607   ///
608   /// @param TargetStmt  The statement the operand tree will be copied to.
609   /// @param UseVal      The value (usually an instruction) which is root of an
610   ///                    operand tree.
611   /// @param UseStmt     The statement that uses @p UseVal.
612   /// @param UseLoop     The loop @p UseVal is used in.
613   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
614   ///                    A mapping from the statement instance @p UseVal is used
615   ///                    to the statement instance it is forwarded to.
616   /// @param DoIt        If false, only determine whether an operand tree can be
617   ///                    forwarded. If true, carry out the forwarding. Do not
618   ///                    use DoIt==true if an operand tree is not known to be
619   ///                    forwardable.
620   ///
621   /// @return If DoIt==false, return whether the operand tree can be forwarded.
622   ///         If DoIt==true, return FD_DidForward.
623   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
624                                  ScopStmt *UseStmt, Loop *UseLoop,
625                                  isl::map UseToTarget, bool DoIt) {
626     ScopStmt *DefStmt = nullptr;
627     Loop *DefLoop = nullptr;
628 
629     // { DefDomain[] -> TargetDomain[] }
630     isl::map DefToTarget;
631 
632     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
633     switch (VUse.getKind()) {
634     case VirtualUse::Constant:
635     case VirtualUse::Block:
636     case VirtualUse::Hoisted:
637       // These can be used anywhere without special considerations.
638       if (DoIt)
639         return FD_DidForward;
640       return FD_CanForwardLeaf;
641 
642     case VirtualUse::Synthesizable: {
643       // ScopExpander will take care for of generating the code at the new
644       // location.
645       if (DoIt)
646         return FD_DidForward;
647 
648       // Check if the value is synthesizable at the new location as well. This
649       // might be possible when leaving a loop for which ScalarEvolution is
650       // unable to derive the exit value for.
651       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
652       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
653       // do not forward past its loop header. This would require us to use a
654       // previous loop induction variable instead the current one. We currently
655       // do not allow forwarding PHI nodes, thus this should never occur (the
656       // only exception where no phi is necessary being an unreachable loop
657       // without edge from the outside).
658       VirtualUse TargetUse = VirtualUse::create(
659           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
660       if (TargetUse.getKind() == VirtualUse::Synthesizable)
661         return FD_CanForwardLeaf;
662 
663       DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
664                    << *UseVal << "\n");
665       return FD_CannotForward;
666     }
667 
668     case VirtualUse::ReadOnly:
669       // Note that we cannot return FD_CanForwardTree here. With a operand tree
670       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
671       // With -polly-analyze-read-only-scalars=true we would ensure the
672       // existence of a MemoryAccess (which already exists for a leaf) and be
673       // removed again by tryForwardTree because it's goal is to remove this
674       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
675       // to do so.
676       if (!DoIt)
677         return FD_CanForwardLeaf;
678 
679       // If we model read-only scalars, we need to create a MemoryAccess for it.
680       if (ModelReadOnlyScalars)
681         TargetStmt->ensureValueRead(UseVal);
682 
683       NumReadOnlyCopied++;
684       TotalReadOnlyCopied++;
685       return FD_DidForward;
686 
687     case VirtualUse::Intra:
688       // Knowing that UseStmt and DefStmt are the same statement instance, just
689       // reuse the information about UseStmt for DefStmt
690       DefStmt = UseStmt;
691       DefToTarget = UseToTarget;
692 
693       LLVM_FALLTHROUGH;
694     case VirtualUse::Inter:
695       Instruction *Inst = cast<Instruction>(UseVal);
696 
697       if (!DefStmt) {
698         DefStmt = S->getStmtFor(Inst);
699         if (!DefStmt)
700           return FD_CannotForward;
701       }
702 
703       DefLoop = LI->getLoopFor(Inst->getParent());
704 
705       if (DefToTarget.is_null() && !Known.is_null()) {
706         IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
707 
708         // { UseDomain[] -> DefDomain[] }
709         isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
710 
711         // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
712         // { DefDomain[] -> TargetDomain[] }
713         DefToTarget = UseToTarget.apply_domain(UseToDef);
714         simplify(DefToTarget);
715       }
716 
717       ForwardingDecision SpeculativeResult = forwardSpeculatable(
718           TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
719       if (SpeculativeResult != FD_NotApplicable)
720         return SpeculativeResult;
721 
722       ForwardingDecision KnownResult =
723           forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
724                            DefStmt, DefLoop, DefToTarget, DoIt);
725       if (KnownResult != FD_NotApplicable)
726         return KnownResult;
727 
728       // When no method is found to forward the operand tree, we effectively
729       // cannot handle it.
730       DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
731       return FD_CannotForward;
732     }
733 
734     llvm_unreachable("Case unhandled");
735   }
736 
737   /// Try to forward an operand tree rooted in @p RA.
738   bool tryForwardTree(MemoryAccess *RA) {
739     assert(RA->isLatestScalarKind());
740     DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
741 
742     ScopStmt *Stmt = RA->getStatement();
743     Loop *InLoop = Stmt->getSurroundingLoop();
744 
745     isl::map TargetToUse;
746     if (!Known.is_null()) {
747       isl::space DomSpace = Stmt->getDomainSpace();
748       TargetToUse =
749           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
750     }
751 
752     ForwardingDecision Assessment = forwardTree(
753         Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
754     assert(Assessment != FD_DidForward);
755     if (Assessment != FD_CanForwardTree)
756       return false;
757 
758     ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
759                                                InLoop, TargetToUse, true);
760     assert(Execution == FD_DidForward &&
761            "A previous positive assessment must also be executable");
762     (void)Execution;
763 
764     Stmt->removeSingleMemoryAccess(RA);
765     return true;
766   }
767 
768   /// Return which SCoP this instance is processing.
769   Scop *getScop() const { return S; }
770 
771   /// Run the algorithm: Use value read accesses as operand tree roots and try
772   /// to forward them into the statement.
773   bool forwardOperandTrees() {
774     for (ScopStmt &Stmt : *S) {
775       bool StmtModified = false;
776 
777       // Because we are modifying the MemoryAccess list, collect them first to
778       // avoid iterator invalidation.
779       SmallVector<MemoryAccess *, 16> Accs;
780       for (MemoryAccess *RA : Stmt) {
781         if (!RA->isRead())
782           continue;
783         if (!RA->isLatestScalarKind())
784           continue;
785 
786         Accs.push_back(RA);
787       }
788 
789       for (MemoryAccess *RA : Accs) {
790         if (tryForwardTree(RA)) {
791           Modified = true;
792           StmtModified = true;
793           NumForwardedTrees++;
794           TotalForwardedTrees++;
795         }
796       }
797 
798       if (StmtModified) {
799         NumModifiedStmts++;
800         TotalModifiedStmts++;
801       }
802     }
803 
804     if (Modified)
805       ScopsModified++;
806     return Modified;
807   }
808 
809   /// Print the pass result, performed transformations and the SCoP after the
810   /// transformation.
811   void print(raw_ostream &OS, int Indent = 0) {
812     printStatistics(OS, Indent);
813 
814     if (!Modified) {
815       // This line can easily be checked in regression tests.
816       OS << "ForwardOpTree executed, but did not modify anything\n";
817       return;
818     }
819 
820     printStatements(OS, Indent);
821   }
822 };
823 
824 /// Pass that redirects scalar reads to array elements that are known to contain
825 /// the same value.
826 ///
827 /// This reduces the number of scalar accesses and therefore potentially
828 /// increases the freedom of the scheduler. In the ideal case, all reads of a
829 /// scalar definition are redirected (We currently do not care about removing
830 /// the write in this case).  This is also useful for the main DeLICM pass as
831 /// there are less scalars to be mapped.
832 class ForwardOpTree : public ScopPass {
833 private:
834   /// The pass implementation, also holding per-scop data.
835   std::unique_ptr<ForwardOpTreeImpl> Impl;
836 
837 public:
838   static char ID;
839 
840   explicit ForwardOpTree() : ScopPass(ID) {}
841   ForwardOpTree(const ForwardOpTree &) = delete;
842   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
843 
844   void getAnalysisUsage(AnalysisUsage &AU) const override {
845     AU.addRequiredTransitive<ScopInfoRegionPass>();
846     AU.addRequired<LoopInfoWrapperPass>();
847     AU.setPreservesAll();
848   }
849 
850   bool runOnScop(Scop &S) override {
851     // Free resources for previous SCoP's computation, if not yet done.
852     releaseMemory();
853 
854     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
855 
856     {
857       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx(), MaxOps, false);
858       Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
859 
860       if (AnalyzeKnown) {
861         DEBUG(dbgs() << "Prepare forwarders...\n");
862         Impl->computeKnownValues();
863       }
864 
865       DEBUG(dbgs() << "Forwarding operand trees...\n");
866       Impl->forwardOperandTrees();
867 
868       if (MaxOpGuard.hasQuotaExceeded()) {
869         DEBUG(dbgs() << "Not all operations completed because of "
870                         "max_operations exceeded\n");
871         KnownOutOfQuota++;
872       }
873     }
874 
875     DEBUG(dbgs() << "\nFinal Scop:\n");
876     DEBUG(dbgs() << S);
877 
878     // Update statistics
879     auto ScopStats = S.getStatistics();
880     NumValueWrites += ScopStats.NumValueWrites;
881     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
882     NumPHIWrites += ScopStats.NumPHIWrites;
883     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
884     NumSingletonWrites += ScopStats.NumSingletonWrites;
885     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
886 
887     return false;
888   }
889 
890   void printScop(raw_ostream &OS, Scop &S) const override {
891     if (!Impl)
892       return;
893 
894     assert(Impl->getScop() == &S);
895     Impl->print(OS);
896   }
897 
898   void releaseMemory() override { Impl.reset(); }
899 }; // class ForwardOpTree
900 
901 char ForwardOpTree::ID;
902 
903 } // namespace
904 
905 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
906 
907 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
908                       "Polly - Forward operand tree", false, false)
909 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
910 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
911                     "Polly - Forward operand tree", false, false)
912