1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/ForwardOpTree.h"
15 #include "polly/Options.h"
16 #include "polly/ScopBuilder.h"
17 #include "polly/ScopInfo.h"
18 #include "polly/ScopPass.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/ISLOStream.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "polly/ZoneAlgo.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "isl/ctx.h"
40 #include "isl/isl-noexceptions.h"
41 #include <cassert>
42 #include <memory>
43 
44 #define DEBUG_TYPE "polly-optree"
45 
46 using namespace llvm;
47 using namespace polly;
48 
49 static cl::opt<bool>
50     AnalyzeKnown("polly-optree-analyze-known",
51                  cl::desc("Analyze array contents for load forwarding"),
52                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53 
54 static cl::opt<unsigned long>
55     MaxOps("polly-optree-max-ops",
56            cl::desc("Maximum number of ISL operations to invest for known "
57                     "analysis; 0=no limit"),
58            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
59 
60 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
61 STATISTIC(KnownOutOfQuota,
62           "Analyses aborted because max_operations was reached");
63 STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis");
64 
65 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
66 STATISTIC(TotalKnownLoadsForwarded,
67           "Number of forwarded loads because their value was known");
68 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
69 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
70 STATISTIC(TotalModifiedStmts,
71           "Number of statements with at least one forwarded tree");
72 
73 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
74 
75 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
76 STATISTIC(NumValueWritesInLoops,
77           "Number of scalar value writes nested in affine loops after OpTree");
78 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
79 STATISTIC(NumPHIWritesInLoops,
80           "Number of scalar phi writes nested in affine loops after OpTree");
81 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
82 STATISTIC(NumSingletonWritesInLoops,
83           "Number of singleton writes nested in affine loops after OpTree");
84 
85 namespace {
86 
87 /// The state of whether an operand tree was/can be forwarded.
88 ///
89 /// The items apply to an instructions and its operand tree with the instruction
90 /// as the root element. If the value in question is not an instruction in the
91 /// SCoP, it can be a leaf of an instruction's operand tree.
92 enum ForwardingDecision {
93   /// The root instruction or value cannot be forwarded at all.
94   FD_CannotForward,
95 
96   /// The root instruction or value can be forwarded as a leaf of a larger
97   /// operand tree.
98   /// It does not make sense to move the value itself, it would just replace it
99   /// by a use of itself. For instance, a constant "5" used in a statement can
100   /// be forwarded, but it would just replace it by the same constant "5".
101   /// However, it makes sense to move as an operand of
102   ///
103   ///   %add = add 5, 5
104   ///
105   /// where "5" is moved as part of a larger operand tree. "5" would be placed
106   /// (disregarding for a moment that literal constants don't have a location
107   /// and can be used anywhere) into the same statement as %add would.
108   FD_CanForwardLeaf,
109 
110   /// The root instruction can be forwarded in a non-trivial way. This requires
111   /// the operand tree root to be an instruction in some statement.
112   FD_CanForwardTree,
113 
114   /// Used to indicate that a forwarding has be carried out successfully.
115   FD_DidForward,
116 
117   /// A forwarding method cannot be applied to the operand tree.
118   /// The difference to FD_CannotForward is that there might be other methods
119   /// that can handle it.
120   /// The conditions that make an operand tree applicable must be checked even
121   /// with DoIt==true because a method following the one that returned
122   /// FD_NotApplicable might have returned FD_CanForwardTree.
123   FD_NotApplicable
124 };
125 
126 /// Implementation of operand tree forwarding for a specific SCoP.
127 ///
128 /// For a statement that requires a scalar value (through a value read
129 /// MemoryAccess), see if its operand can be moved into the statement. If so,
130 /// the MemoryAccess is removed and the all the operand tree instructions are
131 /// moved into the statement. All original instructions are left in the source
132 /// statements. The simplification pass can clean these up.
133 class ForwardOpTreeImpl : ZoneAlgorithm {
134 private:
135   /// How many instructions have been copied to other statements.
136   int NumInstructionsCopied = 0;
137 
138   /// Number of loads forwarded because their value was known.
139   int NumKnownLoadsForwarded = 0;
140 
141   /// How many read-only accesses have been copied.
142   int NumReadOnlyCopied = 0;
143 
144   /// How many operand trees have been forwarded.
145   int NumForwardedTrees = 0;
146 
147   /// Number of statements with at least one forwarded operand tree.
148   int NumModifiedStmts = 0;
149 
150   /// Whether we carried out at least one change to the SCoP.
151   bool Modified = false;
152 
153   /// Contains the zones where array elements are known to contain a specific
154   /// value.
155   /// { [Element[] -> Zone[]] -> ValInst[] }
156   /// @see computeKnown()
157   isl::union_map Known;
158 
159   /// Translator for newly introduced ValInsts to already existing ValInsts such
160   /// that new introduced load instructions can reuse the Known analysis of its
161   /// original load. { ValInst[] -> ValInst[] }
162   isl::union_map Translator;
163 
164   /// Get list of array elements that do contain the same ValInst[] at Domain[].
165   ///
166   /// @param ValInst { Domain[] -> ValInst[] }
167   ///                The values for which we search for alternative locations,
168   ///                per statement instance.
169   ///
170   /// @return { Domain[] -> Element[] }
171   ///         For each statement instance, the array elements that contain the
172   ///         same ValInst.
173   isl::union_map findSameContentElements(isl::union_map ValInst) {
174     assert(ValInst.is_single_valued().is_true());
175 
176     // { Domain[] }
177     isl::union_set Domain = ValInst.domain();
178 
179     // { Domain[] -> Scatter[] }
180     isl::union_map Schedule = getScatterFor(Domain);
181 
182     // { Element[] -> [Scatter[] -> ValInst[]] }
183     isl::union_map MustKnownCurried =
184         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
185 
186     // { [Domain[] -> ValInst[]] -> Scatter[] }
187     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
188 
189     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
190     isl::union_map SchedValDomVal =
191         DomValSched.range_product(ValInst.range_map()).reverse();
192 
193     // { Element[] -> [Domain[] -> ValInst[]] }
194     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
195 
196     // { Domain[] -> Element[] }
197     isl::union_map MustKnownMap =
198         MustKnownInst.uncurry().domain().unwrap().reverse();
199     simplify(MustKnownMap);
200 
201     return MustKnownMap;
202   }
203 
204   /// Find a single array element for each statement instance, within a single
205   /// array.
206   ///
207   /// @param MustKnown { Domain[] -> Element[] }
208   ///                  Set of candidate array elements.
209   /// @param Domain    { Domain[] }
210   ///                  The statement instance for which we need elements for.
211   ///
212   /// @return { Domain[] -> Element[] }
213   ///         For each statement instance, an array element out of @p MustKnown.
214   ///         All array elements must be in the same array (Polly does not yet
215   ///         support reading from different accesses using the same
216   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
217   ///         null.
218   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
219     // { Domain[] -> Element[] }
220     isl::map Result;
221 
222     // MemoryAccesses can read only elements from a single array
223     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
224     // Look through all spaces until we find one that contains at least the
225     // wanted statement instance.s
226     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
227       // Get the array this is accessing.
228       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
229       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
230 
231       // No support for generation of indirect array accesses.
232       if (SAI->getBasePtrOriginSAI())
233         return isl::stat::ok; // continue
234 
235       // Determine whether this map contains all wanted values.
236       isl::set MapDom = Map.domain();
237       if (!Domain.is_subset(MapDom).is_true())
238         return isl::stat::ok; // continue
239 
240       // There might be multiple array elements that contain the same value, but
241       // choose only one of them. lexmin is used because it returns a one-value
242       // mapping, we do not care about which one.
243       // TODO: Get the simplest access function.
244       Result = Map.lexmin();
245       return isl::stat::error; // break
246     });
247 
248     return Result;
249   }
250 
251 public:
252   ForwardOpTreeImpl(Scop *S, LoopInfo *LI)
253       : ZoneAlgorithm("polly-optree", S, LI) {}
254 
255   /// Compute the zones of known array element contents.
256   ///
257   /// @return True if the computed #Known is usable.
258   bool computeKnownValues() {
259     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
260 
261     // Check that nothing strange occurs.
262     if (!isCompatibleScop()) {
263       KnownIncompatible++;
264       return false;
265     }
266 
267     isl_ctx_reset_error(IslCtx.get());
268     {
269       IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps);
270 
271       computeCommon();
272       Known = computeKnown(true, true);
273       simplify(Known);
274 
275       // Preexisting ValInsts use the known content analysis of themselves.
276       Translator = makeIdentityMap(Known.range(), false);
277     }
278 
279     if (!Known || !Translator) {
280       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
281       KnownOutOfQuota++;
282       Known = nullptr;
283       Translator = nullptr;
284       DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
285       return false;
286     }
287 
288     KnownAnalyzed++;
289     DEBUG(dbgs() << "All known: " << Known << "\n");
290 
291     return true;
292   }
293 
294   void printStatistics(raw_ostream &OS, int Indent = 0) {
295     OS.indent(Indent) << "Statistics {\n";
296     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
297                           << '\n';
298     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
299                           << '\n';
300     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
301                           << '\n';
302     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
303                           << '\n';
304     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
305                           << NumModifiedStmts << '\n';
306     OS.indent(Indent) << "}\n";
307   }
308 
309   void printStatements(raw_ostream &OS, int Indent = 0) const {
310     OS.indent(Indent) << "After statements {\n";
311     for (auto &Stmt : *S) {
312       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
313       for (auto *MA : Stmt)
314         MA->print(OS);
315 
316       OS.indent(Indent + 12);
317       Stmt.printInstructions(OS);
318     }
319     OS.indent(Indent) << "}\n";
320   }
321 
322   /// Create a new MemoryAccess of type read and MemoryKind::Array.
323   ///
324   /// @param Stmt           The statement in which the access occurs.
325   /// @param LI             The instruction that does the access.
326   /// @param AccessRelation The array element that each statement instance
327   ///                       accesses.
328   ///
329   /// @param The newly created access.
330   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
331                                     isl::map AccessRelation) {
332     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
333     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
334 
335     // Create a dummy SCEV access, to be replaced anyway.
336     SmallVector<const SCEV *, 4> Sizes;
337     Sizes.reserve(SAI->getNumberOfDimensions());
338     SmallVector<const SCEV *, 4> Subscripts;
339     Subscripts.reserve(SAI->getNumberOfDimensions());
340     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
341       Sizes.push_back(SAI->getDimensionSize(i));
342       Subscripts.push_back(nullptr);
343     }
344 
345     MemoryAccess *Access =
346         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
347                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
348     S->addAccessFunction(Access);
349     Stmt->addAccess(Access, true);
350 
351     Access->setNewAccessRelation(AccessRelation);
352 
353     return Access;
354   }
355 
356   /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
357   /// use in every instance of @p UseStmt.
358   ///
359   /// @param UseStmt Statement a scalar is used in.
360   /// @param DefStmt Statement a scalar is defined in.
361   ///
362   /// @return { DomainUse[] -> DomainDef[] }
363   isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
364     // { DomainUse[] -> Scatter[] }
365     isl::map UseScatter = getScatterFor(UseStmt);
366 
367     // { Zone[] -> DomainDef[] }
368     isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
369 
370     // { Scatter[] -> DomainDef[] }
371     isl::map ReachDefTimepoints =
372         convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
373 
374     // { DomainUse[] -> DomainDef[] }
375     return UseScatter.apply_range(ReachDefTimepoints);
376   }
377 
378   /// Forward a load by reading from an array element that contains the same
379   /// value. Typically the location it was loaded from.
380   ///
381   /// @param TargetStmt  The statement the operand tree will be copied to.
382   /// @param Inst        The (possibly speculatable) instruction to forward.
383   /// @param UseStmt     The statement that uses @p Inst.
384   /// @param UseLoop     The loop @p Inst is used in.
385   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
386   ///                    A mapping from the statement instance @p Inst is used
387   ///                    to the statement instance it is forwarded to.
388   /// @param DefStmt     The statement @p Inst is defined in.
389   /// @param DefLoop     The loop which contains @p Inst.
390   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
391   ///                    A mapping from the statement instance @p Inst is
392   ///                    defined to the statement instance it is forwarded to.
393   /// @param DoIt        If false, only determine whether an operand tree can be
394   ///                    forwarded. If true, carry out the forwarding. Do not
395   ///                    use DoIt==true if an operand tree is not known to be
396   ///                    forwardable.
397   ///
398   /// @return FD_NotApplicable  if @p Inst is not a LoadInst.
399   ///         FD_CannotForward  if no array element to load from was found.
400   ///         FD_CanForwardLeaf if the load is already in the target statement
401   ///                           instance.
402   ///         FD_CanForwardTree if @p Inst is forwardable.
403   ///         FD_DidForward     if @p DoIt was true.
404   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
405                                       ScopStmt *UseStmt, Loop *UseLoop,
406                                       isl::map UseToTarget, ScopStmt *DefStmt,
407                                       Loop *DefLoop, isl::map DefToTarget,
408                                       bool DoIt) {
409     // Cannot do anything without successful known analysis.
410     if (Known.is_null())
411       return FD_NotApplicable;
412 
413     LoadInst *LI = dyn_cast<LoadInst>(Inst);
414     if (!LI)
415       return FD_NotApplicable;
416 
417     // If the load is already in the statement, not forwarding is necessary.
418     // However, it might happen that the LoadInst is already present in the
419     // statement's instruction list. In that case we do as follows:
420     // - For the evaluation (DoIt==false), we can trivially forward it as it is
421     //   benefit of forwarding an already present instruction.
422     // - For the execution (DoIt==false), prepend the instruction (to make it
423     //   available to all instructions following in the instruction list), but
424     //   do not add another MemoryAccess.
425     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
426     if (Access && !DoIt)
427       return FD_CanForwardLeaf;
428 
429     if (DoIt)
430       TargetStmt->prependInstruction(LI);
431 
432     ForwardingDecision OpDecision =
433         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
434                     DefToTarget, DoIt);
435     switch (OpDecision) {
436     case FD_CannotForward:
437       assert(!DoIt);
438       return OpDecision;
439 
440     case FD_CanForwardLeaf:
441     case FD_CanForwardTree:
442       assert(!DoIt);
443       break;
444 
445     case FD_DidForward:
446       assert(DoIt);
447       break;
448 
449     default:
450       llvm_unreachable("Shouldn't return this");
451     }
452 
453     // { DomainDef[] -> ValInst[] }
454     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
455 
456     // { DomainTarget[] -> ValInst[] }
457     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
458     isl::union_map TranslatedExpectedVal =
459         isl::union_map(TargetExpectedVal).apply_range(Translator);
460 
461     // { DomainTarget[] -> Element[] }
462     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
463 
464     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
465     if (!SameVal)
466       return FD_CannotForward;
467 
468     if (!DoIt)
469       return FD_CanForwardTree;
470 
471     if (Access) {
472       DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
473                    << Access << "\n");
474     } else {
475       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
476       DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
477                    << "\n");
478 
479       // { ValInst[] }
480       isl::space ValInstSpace = ExpectedVal.get_space().range();
481 
482       // After adding a new load to the SCoP, also update the Known content
483       // about it. The new load will have a known ValInst of
484       // { [DomainTarget[] -> Value[]] }
485       // but which -- because it is a copy of it -- has same value as the
486       // { [DomainDef[] -> Value[]] }
487       // that it replicates. Instead of  cloning the known content of
488       // [DomainDef[] -> Value[]]
489       // for DomainTarget[], we add a 'translator' that maps
490       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
491       // before comparing to the known content.
492       // TODO: 'Translator' could also be used to map PHINodes to their incoming
493       // ValInsts.
494       if (ValInstSpace.is_wrapping()) {
495         // { DefDomain[] -> Value[] }
496         isl::map ValInsts = ExpectedVal.range().unwrap();
497 
498         // { DefDomain[] }
499         isl::set DefDomain = ValInsts.domain();
500 
501         // { Value[] }
502         isl::space ValSpace = ValInstSpace.unwrap().range();
503 
504         // { Value[] -> Value[] }
505         isl::map ValToVal =
506             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
507 
508         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
509         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
510 
511         Translator = Translator.add_map(LocalTranslator);
512         DEBUG(dbgs() << "      local translator is " << LocalTranslator
513                      << "\n");
514       }
515     }
516     DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
517                  << "\n");
518     DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
519     assert(Access);
520 
521     NumKnownLoadsForwarded++;
522     TotalKnownLoadsForwarded++;
523     return FD_DidForward;
524   }
525 
526   /// Forwards a speculatively executable instruction.
527   ///
528   /// @param TargetStmt  The statement the operand tree will be copied to.
529   /// @param UseInst     The (possibly speculatable) instruction to forward.
530   /// @param DefStmt     The statement @p UseInst is defined in.
531   /// @param DefLoop     The loop which contains @p UseInst.
532   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
533   ///                    A mapping from the statement instance @p UseInst is
534   ///                    defined to the statement instance it is forwarded to.
535   /// @param DoIt        If false, only determine whether an operand tree can be
536   ///                    forwarded. If true, carry out the forwarding. Do not
537   ///                    use DoIt==true if an operand tree is not known to be
538   ///                    forwardable.
539   ///
540   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
541   ///         FD_CannotForward  if one of @p UseInst's operands is not
542   ///                           forwardable.
543   ///         FD_CanForwardTree if @p UseInst is forwardable.
544   ///         FD_DidForward     if @p DoIt was true.
545   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
546                                          Instruction *UseInst,
547                                          ScopStmt *DefStmt, Loop *DefLoop,
548                                          isl::map DefToTarget, bool DoIt) {
549     // PHIs, unless synthesizable, are not yet supported.
550     if (isa<PHINode>(UseInst))
551       return FD_NotApplicable;
552 
553     // Compatible instructions must satisfy the following conditions:
554     // 1. Idempotent (instruction will be copied, not moved; although its
555     //    original instance might be removed by simplification)
556     // 2. Not access memory (There might be memory writes between)
557     // 3. Not cause undefined behaviour (we might copy to a location when the
558     //    original instruction was no executed; this is currently not possible
559     //    because we do not forward PHINodes)
560     // 4. Not leak memory if executed multiple times (i.e. malloc)
561     //
562     // Instruction::mayHaveSideEffects is not sufficient because it considers
563     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
564     // not sufficient because it allows memory accesses.
565     if (mayBeMemoryDependent(*UseInst))
566       return FD_NotApplicable;
567 
568     if (DoIt) {
569       // To ensure the right order, prepend this instruction before its
570       // operands. This ensures that its operands are inserted before the
571       // instruction using them.
572       // TODO: The operand tree is not really a tree, but a DAG. We should be
573       // able to handle DAGs without duplication.
574       TargetStmt->prependInstruction(UseInst);
575       NumInstructionsCopied++;
576       TotalInstructionsCopied++;
577     }
578 
579     for (Value *OpVal : UseInst->operand_values()) {
580       ForwardingDecision OpDecision =
581           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
582       switch (OpDecision) {
583       case FD_CannotForward:
584         assert(!DoIt);
585         return FD_CannotForward;
586 
587       case FD_CanForwardLeaf:
588       case FD_CanForwardTree:
589         assert(!DoIt);
590         break;
591 
592       case FD_DidForward:
593         assert(DoIt);
594         break;
595 
596       case FD_NotApplicable:
597         llvm_unreachable("forwardTree should never return FD_NotApplicable");
598       }
599     }
600 
601     if (DoIt)
602       return FD_DidForward;
603     return FD_CanForwardTree;
604   }
605 
606   /// Determines whether an operand tree can be forwarded or carries out a
607   /// forwarding, depending on the @p DoIt flag.
608   ///
609   /// @param TargetStmt  The statement the operand tree will be copied to.
610   /// @param UseVal      The value (usually an instruction) which is root of an
611   ///                    operand tree.
612   /// @param UseStmt     The statement that uses @p UseVal.
613   /// @param UseLoop     The loop @p UseVal is used in.
614   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
615   ///                    A mapping from the statement instance @p UseVal is used
616   ///                    to the statement instance it is forwarded to.
617   /// @param DoIt        If false, only determine whether an operand tree can be
618   ///                    forwarded. If true, carry out the forwarding. Do not
619   ///                    use DoIt==true if an operand tree is not known to be
620   ///                    forwardable.
621   ///
622   /// @return If DoIt==false, return whether the operand tree can be forwarded.
623   ///         If DoIt==true, return FD_DidForward.
624   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
625                                  ScopStmt *UseStmt, Loop *UseLoop,
626                                  isl::map UseToTarget, bool DoIt) {
627     ScopStmt *DefStmt = nullptr;
628     Loop *DefLoop = nullptr;
629 
630     // { DefDomain[] -> TargetDomain[] }
631     isl::map DefToTarget;
632 
633     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
634     switch (VUse.getKind()) {
635     case VirtualUse::Constant:
636     case VirtualUse::Block:
637     case VirtualUse::Hoisted:
638       // These can be used anywhere without special considerations.
639       if (DoIt)
640         return FD_DidForward;
641       return FD_CanForwardLeaf;
642 
643     case VirtualUse::Synthesizable: {
644       // ScopExpander will take care for of generating the code at the new
645       // location.
646       if (DoIt)
647         return FD_DidForward;
648 
649       // Check if the value is synthesizable at the new location as well. This
650       // might be possible when leaving a loop for which ScalarEvolution is
651       // unable to derive the exit value for.
652       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
653       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
654       // do not forward past its loop header. This would require us to use a
655       // previous loop induction variable instead the current one. We currently
656       // do not allow forwarding PHI nodes, thus this should never occur (the
657       // only exception where no phi is necessary being an unreachable loop
658       // without edge from the outside).
659       VirtualUse TargetUse = VirtualUse::create(
660           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
661       if (TargetUse.getKind() == VirtualUse::Synthesizable)
662         return FD_CanForwardLeaf;
663 
664       DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
665                    << *UseVal << "\n");
666       return FD_CannotForward;
667     }
668 
669     case VirtualUse::ReadOnly:
670       // Note that we cannot return FD_CanForwardTree here. With a operand tree
671       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
672       // With -polly-analyze-read-only-scalars=true we would ensure the
673       // existence of a MemoryAccess (which already exists for a leaf) and be
674       // removed again by tryForwardTree because it's goal is to remove this
675       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
676       // to do so.
677       if (!DoIt)
678         return FD_CanForwardLeaf;
679 
680       // If we model read-only scalars, we need to create a MemoryAccess for it.
681       if (ModelReadOnlyScalars)
682         TargetStmt->ensureValueRead(UseVal);
683 
684       NumReadOnlyCopied++;
685       TotalReadOnlyCopied++;
686       return FD_DidForward;
687 
688     case VirtualUse::Intra:
689       // Knowing that UseStmt and DefStmt are the same statement instance, just
690       // reuse the information about UseStmt for DefStmt
691       DefStmt = UseStmt;
692       DefToTarget = UseToTarget;
693 
694       LLVM_FALLTHROUGH;
695     case VirtualUse::Inter:
696       Instruction *Inst = cast<Instruction>(UseVal);
697 
698       if (!DefStmt) {
699         DefStmt = S->getStmtFor(Inst);
700         if (!DefStmt)
701           return FD_CannotForward;
702       }
703 
704       DefLoop = LI->getLoopFor(Inst->getParent());
705 
706       if (DefToTarget.is_null() && !Known.is_null()) {
707         // { UseDomain[] -> DefDomain[] }
708         isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
709 
710         // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
711         // { DefDomain[] -> TargetDomain[] }
712         DefToTarget = UseToTarget.apply_domain(UseToDef);
713         simplify(DefToTarget);
714       }
715 
716       ForwardingDecision SpeculativeResult = forwardSpeculatable(
717           TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
718       if (SpeculativeResult != FD_NotApplicable)
719         return SpeculativeResult;
720 
721       ForwardingDecision KnownResult =
722           forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
723                            DefStmt, DefLoop, DefToTarget, DoIt);
724       if (KnownResult != FD_NotApplicable)
725         return KnownResult;
726 
727       // When no method is found to forward the operand tree, we effectively
728       // cannot handle it.
729       DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
730       return FD_CannotForward;
731     }
732 
733     llvm_unreachable("Case unhandled");
734   }
735 
736   /// Try to forward an operand tree rooted in @p RA.
737   bool tryForwardTree(MemoryAccess *RA) {
738     assert(RA->isLatestScalarKind());
739     DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
740 
741     ScopStmt *Stmt = RA->getStatement();
742     Loop *InLoop = Stmt->getSurroundingLoop();
743 
744     isl::map TargetToUse;
745     if (!Known.is_null()) {
746       isl::space DomSpace = Stmt->getDomainSpace();
747       TargetToUse =
748           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
749     }
750 
751     ForwardingDecision Assessment = forwardTree(
752         Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
753     assert(Assessment != FD_DidForward);
754     if (Assessment != FD_CanForwardTree)
755       return false;
756 
757     ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
758                                                InLoop, TargetToUse, true);
759     assert(Execution == FD_DidForward &&
760            "A previous positive assessment must also be executable");
761     (void)Execution;
762 
763     Stmt->removeSingleMemoryAccess(RA);
764     return true;
765   }
766 
767   /// Return which SCoP this instance is processing.
768   Scop *getScop() const { return S; }
769 
770   /// Run the algorithm: Use value read accesses as operand tree roots and try
771   /// to forward them into the statement.
772   bool forwardOperandTrees() {
773     for (ScopStmt &Stmt : *S) {
774       // Currently we cannot modify the instruction list of region statements.
775       if (!Stmt.isBlockStmt())
776         continue;
777 
778       bool StmtModified = false;
779 
780       // Because we are modifying the MemoryAccess list, collect them first to
781       // avoid iterator invalidation.
782       SmallVector<MemoryAccess *, 16> Accs;
783       for (MemoryAccess *RA : Stmt) {
784         if (!RA->isRead())
785           continue;
786         if (!RA->isLatestScalarKind())
787           continue;
788 
789         Accs.push_back(RA);
790       }
791 
792       for (MemoryAccess *RA : Accs) {
793         if (tryForwardTree(RA)) {
794           Modified = true;
795           StmtModified = true;
796           NumForwardedTrees++;
797           TotalForwardedTrees++;
798         }
799       }
800 
801       if (StmtModified) {
802         NumModifiedStmts++;
803         TotalModifiedStmts++;
804       }
805     }
806 
807     if (Modified)
808       ScopsModified++;
809     return Modified;
810   }
811 
812   /// Print the pass result, performed transformations and the SCoP after the
813   /// transformation.
814   void print(raw_ostream &OS, int Indent = 0) {
815     printStatistics(OS, Indent);
816 
817     if (!Modified) {
818       // This line can easily be checked in regression tests.
819       OS << "ForwardOpTree executed, but did not modify anything\n";
820       return;
821     }
822 
823     printStatements(OS, Indent);
824   }
825 };
826 
827 /// Pass that redirects scalar reads to array elements that are known to contain
828 /// the same value.
829 ///
830 /// This reduces the number of scalar accesses and therefore potentially
831 /// increases the freedom of the scheduler. In the ideal case, all reads of a
832 /// scalar definition are redirected (We currently do not care about removing
833 /// the write in this case).  This is also useful for the main DeLICM pass as
834 /// there are less scalars to be mapped.
835 class ForwardOpTree : public ScopPass {
836 private:
837   /// The pass implementation, also holding per-scop data.
838   std::unique_ptr<ForwardOpTreeImpl> Impl;
839 
840 public:
841   static char ID;
842 
843   explicit ForwardOpTree() : ScopPass(ID) {}
844   ForwardOpTree(const ForwardOpTree &) = delete;
845   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
846 
847   void getAnalysisUsage(AnalysisUsage &AU) const override {
848     AU.addRequiredTransitive<ScopInfoRegionPass>();
849     AU.addRequired<LoopInfoWrapperPass>();
850     AU.setPreservesAll();
851   }
852 
853   bool runOnScop(Scop &S) override {
854     // Free resources for previous SCoP's computation, if not yet done.
855     releaseMemory();
856 
857     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
858     Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI);
859 
860     if (AnalyzeKnown) {
861       DEBUG(dbgs() << "Prepare forwarders...\n");
862       Impl->computeKnownValues();
863     }
864 
865     DEBUG(dbgs() << "Forwarding operand trees...\n");
866     Impl->forwardOperandTrees();
867 
868     DEBUG(dbgs() << "\nFinal Scop:\n");
869     DEBUG(dbgs() << S);
870 
871     // Update statistics
872     auto ScopStats = S.getStatistics();
873     NumValueWrites += ScopStats.NumValueWrites;
874     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
875     NumPHIWrites += ScopStats.NumPHIWrites;
876     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
877     NumSingletonWrites += ScopStats.NumSingletonWrites;
878     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
879 
880     return false;
881   }
882 
883   void printScop(raw_ostream &OS, Scop &S) const override {
884     if (!Impl)
885       return;
886 
887     assert(Impl->getScop() == &S);
888     Impl->print(OS);
889   }
890 
891   void releaseMemory() override { Impl.reset(); }
892 }; // class ForwardOpTree
893 
894 char ForwardOpTree::ID;
895 
896 } // namespace
897 
898 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
899 
900 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
901                       "Polly - Forward operand tree", false, false)
902 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
903 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
904                     "Polly - Forward operand tree", false, false)
905