1 //===------ ForwardOpTree.h -------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/ForwardOpTree.h"
15 
16 #include "polly/Options.h"
17 #include "polly/RegisterPasses.h"
18 #include "polly/ScopBuilder.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/ScopPass.h"
21 #include "polly/Support/GICHelper.h"
22 #include "polly/Support/ISLOStream.h"
23 #include "polly/Support/ISLTools.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "polly/ZoneAlgo.h"
26 #include "llvm/Analysis/ValueTracking.h"
27 
28 #define DEBUG_TYPE "polly-optree"
29 
30 using namespace polly;
31 using namespace llvm;
32 
33 static cl::opt<bool>
34     AnalyzeKnown("polly-optree-analyze-known",
35                  cl::desc("Analyze array contents for load forwarding"),
36                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
37 
38 static cl::opt<unsigned long>
39     MaxOps("polly-optree-max-ops",
40            cl::desc("Maximum number of ISL operations to invest for known "
41                     "analysis; 0=no limit"),
42            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
43 
44 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
45 STATISTIC(KnownOutOfQuota,
46           "Analyses aborted because max_operations was reached");
47 STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis");
48 
49 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
50 STATISTIC(TotalKnownLoadsForwarded,
51           "Number of forwarded loads because their value was known");
52 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
53 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
54 STATISTIC(TotalModifiedStmts,
55           "Number of statements with at least one forwarded tree");
56 
57 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
58 
59 namespace {
60 
61 /// The state of whether an operand tree was/can be forwarded.
62 ///
63 /// The items apply to an instructions and its operand tree with the instruction
64 /// as the root element. If the value in question is not an instruction in the
65 /// SCoP, it can be a leaf of an instruction's operand tree.
66 enum ForwardingDecision {
67   /// The root instruction or value cannot be forwarded at all.
68   FD_CannotForward,
69 
70   /// The root instruction or value can be forwarded as a leaf of a larger
71   /// operand tree.
72   /// It does not make sense to move the value itself, it would just replace it
73   /// by a use of itself. For instance, a constant "5" used in a statement can
74   /// be forwarded, but it would just replace it by the same constant "5".
75   /// However, it makes sense to move as an operand of
76   ///
77   ///   %add = add 5, 5
78   ///
79   /// where "5" is moved as part of a larger operand tree. "5" would be placed
80   /// (disregarding for a moment that literal constants don't have a location
81   /// and can be used anywhere) into the same statement as %add would.
82   FD_CanForwardLeaf,
83 
84   /// The root instruction can be forwarded in a non-trivial way. This requires
85   /// the operand tree root to be an instruction in some statement.
86   FD_CanForwardTree,
87 
88   /// Used to indicate that a forwarding has be carried out successfully.
89   FD_DidForward,
90 
91   /// A forwarding method cannot be applied to the operand tree.
92   /// The difference to FD_CannotForward is that there might be other methods
93   /// that can handle it.
94   /// The conditions that make an operand tree applicable must be checked even
95   /// with DoIt==true because a method following the one that returned
96   /// FD_NotApplicable might have returned FD_CanForwardTree.
97   FD_NotApplicable
98 };
99 
100 /// Implementation of operand tree forwarding for a specific SCoP.
101 ///
102 /// For a statement that requires a scalar value (through a value read
103 /// MemoryAccess), see if its operand can be moved into the statement. If so,
104 /// the MemoryAccess is removed and the all the operand tree instructions are
105 /// moved into the statement. All original instructions are left in the source
106 /// statements. The simplification pass can clean these up.
107 class ForwardOpTreeImpl : ZoneAlgorithm {
108 private:
109   /// How many instructions have been copied to other statements.
110   int NumInstructionsCopied = 0;
111 
112   /// Number of loads forwarded because their value was known.
113   int NumKnownLoadsForwarded = 0;
114 
115   /// How many read-only accesses have been copied.
116   int NumReadOnlyCopied = 0;
117 
118   /// How many operand trees have been forwarded.
119   int NumForwardedTrees = 0;
120 
121   /// Number of statements with at least one forwarded operand tree.
122   int NumModifiedStmts = 0;
123 
124   /// Whether we carried out at least one change to the SCoP.
125   bool Modified = false;
126 
127   /// Contains the zones where array elements are known to contain a specific
128   /// value.
129   /// { [Element[] -> Zone[]] -> ValInst[] }
130   /// @see computeKnown()
131   isl::union_map Known;
132 
133   /// Translator for newly introduced ValInsts to already existing ValInsts such
134   /// that new introduced load instructions can reuse the Known analysis of its
135   /// original load. { ValInst[] -> ValInst[] }
136   isl::union_map Translator;
137 
138   /// Get list of array elements that do contain the same ValInst[] at Domain[].
139   ///
140   /// @param ValInst { Domain[] -> ValInst[] }
141   ///                The values for which we search for alternative locations,
142   ///                per statement instance.
143   ///
144   /// @return { Domain[] -> Element[] }
145   ///         For each statement instance, the array elements that contain the
146   ///         same ValInst.
147   isl::union_map findSameContentElements(isl::union_map ValInst) {
148     assert(ValInst.is_single_valued().is_true());
149 
150     // { Domain[] }
151     isl::union_set Domain = ValInst.domain();
152 
153     // { Domain[] -> Scatter[] }
154     isl::union_map Schedule = getScatterFor(Domain);
155 
156     // { Element[] -> [Scatter[] -> ValInst[]] }
157     isl::union_map MustKnownCurried =
158         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
159 
160     // { [Domain[] -> ValInst[]] -> Scatter[] }
161     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
162 
163     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
164     isl::union_map SchedValDomVal =
165         DomValSched.range_product(ValInst.range_map()).reverse();
166 
167     // { Element[] -> [Domain[] -> ValInst[]] }
168     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
169 
170     // { Domain[] -> Element[] }
171     isl::union_map MustKnownMap =
172         MustKnownInst.uncurry().domain().unwrap().reverse();
173     simplify(MustKnownMap);
174 
175     return MustKnownMap;
176   }
177 
178   /// Find a single array element for each statement instance, within a single
179   /// array.
180   ///
181   /// @param MustKnown { Domain[] -> Element[] }
182   ///                  Set of candidate array elements.
183   /// @param Domain    { Domain[] }
184   ///                  The statement instance for which we need elements for.
185   ///
186   /// @return { Domain[] -> Element[] }
187   ///         For each statement instance, an array element out of @p MustKnown.
188   ///         All array elements must be in the same array (Polly does not yet
189   ///         support reading from different accesses using the same
190   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
191   ///         null.
192   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
193     // { Domain[] -> Element[] }
194     isl::map Result;
195 
196     // MemoryAccesses can read only elements from a single array
197     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
198     // Look through all spaces until we find one that contains at least the
199     // wanted statement instance.s
200     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
201       // Get the array this is accessing.
202       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
203       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
204 
205       // No support for generation of indirect array accesses.
206       if (SAI->getBasePtrOriginSAI())
207         return isl::stat::ok; // continue
208 
209       // Determine whether this map contains all wanted values.
210       isl::set MapDom = Map.domain();
211       if (!Domain.is_subset(MapDom).is_true())
212         return isl::stat::ok; // continue
213 
214       // There might be multiple array elements that contain the same value, but
215       // choose only one of them. lexmin is used because it returns a one-value
216       // mapping, we do not care about which one.
217       // TODO: Get the simplest access function.
218       Result = Map.lexmin();
219       return isl::stat::error; // break
220     });
221 
222     return Result;
223   }
224 
225 public:
226   /// Compute the zones of known array element contents.
227   ///
228   /// @return True if the computed #Known is usable.
229   bool computeKnownValues() {
230     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
231 
232     // Check that nothing strange occurs.
233     if (!isCompatibleScop()) {
234       KnownIncompatible++;
235       return false;
236     }
237 
238     isl_ctx_reset_error(IslCtx.get());
239     {
240       IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps);
241 
242       computeCommon();
243       Known = computeKnown(true, true);
244       simplify(Known);
245 
246       // Preexisting ValInsts use the known content analysis of themselves.
247       Translator = makeIdentityMap(Known.range(), false);
248     }
249 
250     if (!Known || !Translator) {
251       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
252       KnownOutOfQuota++;
253       Known = nullptr;
254       Translator = nullptr;
255       DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
256       return false;
257     }
258 
259     KnownAnalyzed++;
260     DEBUG(dbgs() << "All known: " << Known << "\n");
261 
262     return true;
263   }
264 
265   void printStatistics(raw_ostream &OS, int Indent = 0) {
266     OS.indent(Indent) << "Statistics {\n";
267     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
268                           << '\n';
269     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
270                           << '\n';
271     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
272                           << '\n';
273     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
274                           << '\n';
275     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
276                           << NumModifiedStmts << '\n';
277     OS.indent(Indent) << "}\n";
278   }
279 
280   void printStatements(llvm::raw_ostream &OS, int Indent = 0) const {
281     OS.indent(Indent) << "After statements {\n";
282     for (auto &Stmt : *S) {
283       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
284       for (auto *MA : Stmt)
285         MA->print(OS);
286 
287       OS.indent(Indent + 12);
288       Stmt.printInstructions(OS);
289     }
290     OS.indent(Indent) << "}\n";
291   }
292 
293   /// Create a new MemoryAccess of type read and MemoryKind::Array.
294   ///
295   /// @param Stmt           The statement in which the access occurs.
296   /// @param LI             The instruction that does the access.
297   /// @param AccessRelation The array element that each statement instance
298   ///                       accesses.
299   ///
300   /// @param The newly created access.
301   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
302                                     isl::map AccessRelation) {
303     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
304     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
305 
306     // Create a dummy SCEV access, to be replaced anyway.
307     SmallVector<const SCEV *, 4> Sizes;
308     Sizes.reserve(SAI->getNumberOfDimensions());
309     SmallVector<const SCEV *, 4> Subscripts;
310     Subscripts.reserve(SAI->getNumberOfDimensions());
311     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
312       Sizes.push_back(SAI->getDimensionSize(i));
313       Subscripts.push_back(nullptr);
314     }
315 
316     MemoryAccess *Access =
317         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
318                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
319     S->addAccessFunction(Access);
320     Stmt->addAccess(Access, true);
321 
322     Access->setNewAccessRelation(AccessRelation);
323 
324     return Access;
325   }
326 
327   /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
328   /// use in every instance of @p UseStmt.
329   ///
330   /// @param UseStmt Statement a scalar is used in.
331   /// @param DefStmt Statement a scalar is defined in.
332   ///
333   /// @return { DomainUse[] -> DomainDef[] }
334   isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
335     // { DomainUse[] -> Scatter[] }
336     isl::map UseScatter = getScatterFor(UseStmt);
337 
338     // { Zone[] -> DomainDef[] }
339     isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
340 
341     // { Scatter[] -> DomainDef[] }
342     isl::map ReachDefTimepoints =
343         convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
344 
345     // { DomainUse[] -> DomainDef[] }
346     return UseScatter.apply_range(ReachDefTimepoints);
347   }
348 
349   /// Forward a load by reading from an array element that contains the same
350   /// value. Typically the location it was loaded from.
351   ///
352   /// @param TargetStmt  The statement the operand tree will be copied to.
353   /// @param Inst        The (possibly speculatable) instruction to forward.
354   /// @param UseStmt     The statement that uses @p Inst.
355   /// @param UseLoop     The loop @p Inst is used in.
356   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
357   ///                    A mapping from the statement instance @p Inst is used
358   ///                    to the statement instance it is forwarded to.
359   /// @param DefStmt     The statement @p Inst is defined in.
360   /// @param DefLoop     The loop which contains @p Inst.
361   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
362   ///                    A mapping from the statement instance @p Inst is
363   ///                    defined to the statement instance it is forwarded to.
364   /// @param DoIt        If false, only determine whether an operand tree can be
365   ///                    forwarded. If true, carry out the forwarding. Do not
366   ///                    use DoIt==true if an operand tree is not known to be
367   ///                    forwardable.
368   ///
369   /// @return FD_NotApplicable  if @p Inst is not a LoadInst.
370   ///         FD_CannotForward  if no array element to load from was found.
371   ///         FD_CanForwardLeaf if the load is already in the target statement
372   ///                           instance.
373   ///         FD_CanForwardTree if @p Inst is forwardable.
374   ///         FD_DidForward     if @p DoIt was true.
375   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
376                                       ScopStmt *UseStmt, Loop *UseLoop,
377                                       isl::map UseToTarget, ScopStmt *DefStmt,
378                                       Loop *DefLoop, isl::map DefToTarget,
379                                       bool DoIt) {
380     // Cannot do anything without successful known analysis.
381     if (Known.is_null())
382       return FD_NotApplicable;
383 
384     LoadInst *LI = dyn_cast<LoadInst>(Inst);
385     if (!LI)
386       return FD_NotApplicable;
387 
388     // If the load is already in the statement, not forwarding is necessary.
389     // However, it might happen that the LoadInst is already present in the
390     // statement's instruction list. In that case we do as follows:
391     // - For the evaluation (DoIt==false), we can trivially forward it as it is
392     //   benefit of forwarding an already present instruction.
393     // - For the execution (DoIt==false), prepend the instruction (to make it
394     //   available to all instructions following in the instruction list), but
395     //   do not add another MemoryAccess.
396     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
397     if (Access && !DoIt)
398       return FD_CanForwardLeaf;
399 
400     if (DoIt)
401       TargetStmt->prependInstruction(LI);
402 
403     ForwardingDecision OpDecision =
404         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
405                     DefToTarget, DoIt);
406     switch (OpDecision) {
407     case FD_CannotForward:
408       assert(!DoIt);
409       return OpDecision;
410 
411     case FD_CanForwardLeaf:
412     case FD_CanForwardTree:
413       assert(!DoIt);
414       break;
415 
416     case FD_DidForward:
417       assert(DoIt);
418       break;
419 
420     default:
421       llvm_unreachable("Shouldn't return this");
422     }
423 
424     // { DomainDef[] -> ValInst[] }
425     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
426 
427     // { DomainTarget[] -> ValInst[] }
428     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
429     isl::union_map TranslatedExpectedVal =
430         isl::union_map(TargetExpectedVal).apply_range(Translator);
431 
432     // { DomainTarget[] -> Element[] }
433     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
434 
435     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
436     if (!SameVal)
437       return FD_CannotForward;
438 
439     if (!DoIt)
440       return FD_CanForwardTree;
441 
442     if (Access) {
443       DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
444                    << Access << "\n");
445     } else {
446       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
447       DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
448                    << "\n");
449 
450       // { ValInst[] }
451       isl::space ValInstSpace = ExpectedVal.get_space().range();
452 
453       // After adding a new load to the SCoP, also update the Known content
454       // about it. The new load will have a known ValInst of
455       // { [DomainTarget[] -> Value[]] }
456       // but which -- because it is a copy of it -- has same value as the
457       // { [DomainDef[] -> Value[]] }
458       // that it replicates. Instead of  cloning the known content of
459       // [DomainDef[] -> Value[]]
460       // for DomainTarget[], we add a 'translator' that maps
461       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
462       // before comparing to the known content.
463       // TODO: 'Translator' could also be used to map PHINodes to their incoming
464       // ValInsts.
465       if (ValInstSpace.is_wrapping()) {
466         // { DefDomain[] -> Value[] }
467         isl::map ValInsts = ExpectedVal.range().unwrap();
468 
469         // { DefDomain[] }
470         isl::set DefDomain = ValInsts.domain();
471 
472         // { Value[] }
473         isl::space ValSpace = ValInstSpace.unwrap().range();
474 
475         // { Value[] -> Value[] }
476         isl::map ValToVal =
477             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
478 
479         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
480         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
481 
482         Translator = Translator.add_map(LocalTranslator);
483         DEBUG(dbgs() << "      local translator is " << LocalTranslator
484                      << "\n");
485       }
486     }
487     DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
488                  << "\n");
489     DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
490     assert(Access);
491 
492     NumKnownLoadsForwarded++;
493     TotalKnownLoadsForwarded++;
494     return FD_DidForward;
495   }
496 
497   /// Forwards a speculatively executable instruction.
498   ///
499   /// @param TargetStmt  The statement the operand tree will be copied to.
500   /// @param UseInst     The (possibly speculatable) instruction to forward.
501   /// @param DefStmt     The statement @p UseInst is defined in.
502   /// @param DefLoop     The loop which contains @p UseInst.
503   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
504   ///                    A mapping from the statement instance @p UseInst is
505   ///                    defined to the statement instance it is forwarded to.
506   /// @param DoIt        If false, only determine whether an operand tree can be
507   ///                    forwarded. If true, carry out the forwarding. Do not
508   ///                    use DoIt==true if an operand tree is not known to be
509   ///                    forwardable.
510   ///
511   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
512   ///         FD_CannotForward  if one of @p UseInst's operands is not
513   ///                           forwardable.
514   ///         FD_CanForwardTree if @p UseInst is forwardable.
515   ///         FD_DidForward     if @p DoIt was true.
516   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
517                                          Instruction *UseInst,
518                                          ScopStmt *DefStmt, Loop *DefLoop,
519                                          isl::map DefToTarget, bool DoIt) {
520     // PHIs, unless synthesizable, are not yet supported.
521     if (isa<PHINode>(UseInst))
522       return FD_NotApplicable;
523 
524     // Compatible instructions must satisfy the following conditions:
525     // 1. Idempotent (instruction will be copied, not moved; although its
526     //    original instance might be removed by simplification)
527     // 2. Not access memory (There might be memory writes between)
528     // 3. Not cause undefined behaviour (we might copy to a location when the
529     //    original instruction was no executed; this is currently not possible
530     //    because we do not forward PHINodes)
531     // 4. Not leak memory if executed multiple times (i.e. malloc)
532     //
533     // Instruction::mayHaveSideEffects is not sufficient because it considers
534     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
535     // not sufficient because it allows memory accesses.
536     if (mayBeMemoryDependent(*UseInst))
537       return FD_NotApplicable;
538 
539     if (DoIt) {
540       // To ensure the right order, prepend this instruction before its
541       // operands. This ensures that its operands are inserted before the
542       // instruction using them.
543       // TODO: The operand tree is not really a tree, but a DAG. We should be
544       // able to handle DAGs without duplication.
545       TargetStmt->prependInstruction(UseInst);
546       NumInstructionsCopied++;
547       TotalInstructionsCopied++;
548     }
549 
550     for (Value *OpVal : UseInst->operand_values()) {
551       ForwardingDecision OpDecision =
552           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
553       switch (OpDecision) {
554       case FD_CannotForward:
555         assert(!DoIt);
556         return FD_CannotForward;
557 
558       case FD_CanForwardLeaf:
559       case FD_CanForwardTree:
560         assert(!DoIt);
561         break;
562 
563       case FD_DidForward:
564         assert(DoIt);
565         break;
566 
567       case FD_NotApplicable:
568         llvm_unreachable("forwardTree should never return FD_NotApplicable");
569       }
570     }
571 
572     if (DoIt)
573       return FD_DidForward;
574     return FD_CanForwardTree;
575   }
576 
577   /// Determines whether an operand tree can be forwarded or carries out a
578   /// forwarding, depending on the @p DoIt flag.
579   ///
580   /// @param TargetStmt  The statement the operand tree will be copied to.
581   /// @param UseVal      The value (usually an instruction) which is root of an
582   ///                    operand tree.
583   /// @param UseStmt     The statement that uses @p UseVal.
584   /// @param UseLoop     The loop @p UseVal is used in.
585   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
586   ///                    A mapping from the statement instance @p UseVal is used
587   ///                    to the statement instance it is forwarded to.
588   /// @param DoIt        If false, only determine whether an operand tree can be
589   ///                    forwarded. If true, carry out the forwarding. Do not
590   ///                    use DoIt==true if an operand tree is not known to be
591   ///                    forwardable.
592   ///
593   /// @return If DoIt==false, return whether the operand tree can be forwarded.
594   ///         If DoIt==true, return FD_DidForward.
595   ForwardingDecision forwardTree(ScopStmt *TargetStmt, llvm::Value *UseVal,
596                                  ScopStmt *UseStmt, llvm::Loop *UseLoop,
597                                  isl::map UseToTarget, bool DoIt) {
598     ScopStmt *DefStmt = nullptr;
599     Loop *DefLoop = nullptr;
600 
601     // { DefDomain[] -> TargetDomain[] }
602     isl::map DefToTarget;
603 
604     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
605     switch (VUse.getKind()) {
606     case VirtualUse::Constant:
607     case VirtualUse::Block:
608     case VirtualUse::Hoisted:
609       // These can be used anywhere without special considerations.
610       if (DoIt)
611         return FD_DidForward;
612       return FD_CanForwardLeaf;
613 
614     case VirtualUse::Synthesizable: {
615       // ScopExpander will take care for of generating the code at the new
616       // location.
617       if (DoIt)
618         return FD_DidForward;
619 
620       // Check if the value is synthesizable at the new location as well. This
621       // might be possible when leaving a loop for which ScalarEvolution is
622       // unable to derive the exit value for.
623       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
624       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
625       // do not forward past its loop header. This would require us to use a
626       // previous loop induction variable instead the current one. We currently
627       // do not allow forwarding PHI nodes, thus this should never occur (the
628       // only exception where no phi is necessary being an unreachable loop
629       // without edge from the outside).
630       VirtualUse TargetUse = VirtualUse::create(
631           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
632       if (TargetUse.getKind() == VirtualUse::Synthesizable)
633         return FD_CanForwardLeaf;
634 
635       DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
636                    << *UseVal << "\n");
637       return FD_CannotForward;
638     }
639 
640     case VirtualUse::ReadOnly:
641       // Note that we cannot return FD_CanForwardTree here. With a operand tree
642       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
643       // With -polly-analyze-read-only-scalars=true we would ensure the
644       // existence of a MemoryAccess (which already exists for a leaf) and be
645       // removed again by tryForwardTree because it's goal is to remove this
646       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
647       // to do so.
648       if (!DoIt)
649         return FD_CanForwardLeaf;
650 
651       // If we model read-only scalars, we need to create a MemoryAccess for it.
652       if (ModelReadOnlyScalars)
653         TargetStmt->ensureValueRead(UseVal);
654 
655       NumReadOnlyCopied++;
656       TotalReadOnlyCopied++;
657       return FD_DidForward;
658 
659     case VirtualUse::Intra:
660       // Knowing that UseStmt and DefStmt are the same statement instance, just
661       // reuse the information about UseStmt for DefStmt
662       DefStmt = UseStmt;
663       DefToTarget = UseToTarget;
664 
665       LLVM_FALLTHROUGH;
666     case VirtualUse::Inter:
667       Instruction *Inst = cast<Instruction>(UseVal);
668 
669       if (!DefStmt) {
670         DefStmt = S->getStmtFor(Inst);
671         if (!DefStmt)
672           return FD_CannotForward;
673       }
674 
675       DefLoop = LI->getLoopFor(Inst->getParent());
676 
677       if (DefToTarget.is_null() && !Known.is_null()) {
678         // { UseDomain[] -> DefDomain[] }
679         isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
680 
681         // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
682         // { DefDomain[] -> TargetDomain[] }
683         DefToTarget = UseToTarget.apply_domain(UseToDef);
684         simplify(DefToTarget);
685       }
686 
687       ForwardingDecision SpeculativeResult = forwardSpeculatable(
688           TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
689       if (SpeculativeResult != FD_NotApplicable)
690         return SpeculativeResult;
691 
692       ForwardingDecision KnownResult =
693           forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
694                            DefStmt, DefLoop, DefToTarget, DoIt);
695       if (KnownResult != FD_NotApplicable)
696         return KnownResult;
697 
698       // When no method is found to forward the operand tree, we effectively
699       // cannot handle it.
700       DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
701       return FD_CannotForward;
702     }
703 
704     llvm_unreachable("Case unhandled");
705   }
706 
707   /// Try to forward an operand tree rooted in @p RA.
708   bool tryForwardTree(MemoryAccess *RA) {
709     assert(RA->isLatestScalarKind());
710     DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
711 
712     ScopStmt *Stmt = RA->getStatement();
713     Loop *InLoop = Stmt->getSurroundingLoop();
714 
715     isl::map TargetToUse;
716     if (!Known.is_null()) {
717       isl::space DomSpace = Stmt->getDomainSpace();
718       TargetToUse =
719           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
720     }
721 
722     ForwardingDecision Assessment = forwardTree(
723         Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
724     assert(Assessment != FD_DidForward);
725     if (Assessment != FD_CanForwardTree)
726       return false;
727 
728     ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
729                                                InLoop, TargetToUse, true);
730     assert(Execution == FD_DidForward &&
731            "A previous positive assessment must also be executable");
732     (void)Execution;
733 
734     Stmt->removeSingleMemoryAccess(RA);
735     return true;
736   }
737 
738 public:
739   ForwardOpTreeImpl(Scop *S, LoopInfo *LI)
740       : ZoneAlgorithm("polly-optree", S, LI) {}
741 
742   /// Return which SCoP this instance is processing.
743   Scop *getScop() const { return S; }
744 
745   /// Run the algorithm: Use value read accesses as operand tree roots and try
746   /// to forward them into the statement.
747   bool forwardOperandTrees() {
748     for (ScopStmt &Stmt : *S) {
749       // Currently we cannot modify the instruction list of region statements.
750       if (!Stmt.isBlockStmt())
751         continue;
752 
753       bool StmtModified = false;
754 
755       // Because we are modifying the MemoryAccess list, collect them first to
756       // avoid iterator invalidation.
757       SmallVector<MemoryAccess *, 16> Accs;
758       for (MemoryAccess *RA : Stmt) {
759         if (!RA->isRead())
760           continue;
761         if (!RA->isLatestScalarKind())
762           continue;
763 
764         Accs.push_back(RA);
765       }
766 
767       for (MemoryAccess *RA : Accs) {
768         if (tryForwardTree(RA)) {
769           Modified = true;
770           StmtModified = true;
771           NumForwardedTrees++;
772           TotalForwardedTrees++;
773         }
774       }
775 
776       if (StmtModified) {
777         NumModifiedStmts++;
778         TotalModifiedStmts++;
779       }
780     }
781 
782     if (Modified)
783       ScopsModified++;
784     return Modified;
785   }
786 
787   /// Print the pass result, performed transformations and the SCoP after the
788   /// transformation.
789   void print(llvm::raw_ostream &OS, int Indent = 0) {
790     printStatistics(OS, Indent);
791 
792     if (!Modified) {
793       // This line can easily be checked in regression tests.
794       OS << "ForwardOpTree executed, but did not modify anything\n";
795       return;
796     }
797 
798     printStatements(OS, Indent);
799   }
800 };
801 
802 /// Pass that redirects scalar reads to array elements that are known to contain
803 /// the same value.
804 ///
805 /// This reduces the number of scalar accesses and therefore potentially
806 /// increases the freedom of the scheduler. In the ideal case, all reads of a
807 /// scalar definition are redirected (We currently do not care about removing
808 /// the write in this case).  This is also useful for the main DeLICM pass as
809 /// there are less scalars to be mapped.
810 class ForwardOpTree : public ScopPass {
811 private:
812   ForwardOpTree(const ForwardOpTree &) = delete;
813   const ForwardOpTree &operator=(const ForwardOpTree &) = delete;
814 
815   /// The pass implementation, also holding per-scop data.
816   std::unique_ptr<ForwardOpTreeImpl> Impl;
817 
818 public:
819   static char ID;
820 
821   explicit ForwardOpTree() : ScopPass(ID) {}
822 
823   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
824     AU.addRequiredTransitive<ScopInfoRegionPass>();
825     AU.addRequired<LoopInfoWrapperPass>();
826     AU.setPreservesAll();
827   }
828 
829   virtual bool runOnScop(Scop &S) override {
830     // Free resources for previous SCoP's computation, if not yet done.
831     releaseMemory();
832 
833     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
834     Impl = make_unique<ForwardOpTreeImpl>(&S, &LI);
835 
836     if (AnalyzeKnown) {
837       DEBUG(dbgs() << "Prepare forwarders...\n");
838       Impl->computeKnownValues();
839     }
840 
841     DEBUG(dbgs() << "Forwarding operand trees...\n");
842     Impl->forwardOperandTrees();
843 
844     DEBUG(dbgs() << "\nFinal Scop:\n");
845     DEBUG(dbgs() << S);
846 
847     return false;
848   }
849 
850   virtual void printScop(raw_ostream &OS, Scop &S) const override {
851     if (!Impl)
852       return;
853 
854     assert(Impl->getScop() == &S);
855     Impl->print(OS);
856   }
857 
858   virtual void releaseMemory() override { Impl.reset(); }
859 
860 }; // class ForwardOpTree
861 
862 char ForwardOpTree::ID;
863 } // anonymous namespace
864 
865 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
866 
867 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
868                       "Polly - Forward operand tree", false, false)
869 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
870 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
871                     "Polly - Forward operand tree", false, false)
872