1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/ForwardOpTree.h"
15 #include "polly/Options.h"
16 #include "polly/ScopBuilder.h"
17 #include "polly/ScopInfo.h"
18 #include "polly/ScopPass.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/ISLOStream.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "polly/ZoneAlgo.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "isl/ctx.h"
40 #include "isl/isl-noexceptions.h"
41 #include <cassert>
42 #include <memory>
43 
44 #define DEBUG_TYPE "polly-optree"
45 
46 using namespace llvm;
47 using namespace polly;
48 
49 static cl::opt<bool>
50     AnalyzeKnown("polly-optree-analyze-known",
51                  cl::desc("Analyze array contents for load forwarding"),
52                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53 
54 static cl::opt<bool>
55     NormalizePHIs("polly-optree-normalize-phi",
56                   cl::desc("Replace PHIs by their incoming values"),
57                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
58 
59 static cl::opt<unsigned>
60     MaxOps("polly-optree-max-ops",
61            cl::desc("Maximum number of ISL operations to invest for known "
62                     "analysis; 0=no limit"),
63            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
64 
65 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
66 STATISTIC(KnownOutOfQuota,
67           "Analyses aborted because max_operations was reached");
68 
69 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
70 STATISTIC(TotalKnownLoadsForwarded,
71           "Number of forwarded loads because their value was known");
72 STATISTIC(TotalReloads, "Number of reloaded values");
73 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
74 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
75 STATISTIC(TotalModifiedStmts,
76           "Number of statements with at least one forwarded tree");
77 
78 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
79 
80 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
81 STATISTIC(NumValueWritesInLoops,
82           "Number of scalar value writes nested in affine loops after OpTree");
83 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
84 STATISTIC(NumPHIWritesInLoops,
85           "Number of scalar phi writes nested in affine loops after OpTree");
86 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
87 STATISTIC(NumSingletonWritesInLoops,
88           "Number of singleton writes nested in affine loops after OpTree");
89 
90 namespace {
91 
92 /// The state of whether an operand tree was/can be forwarded.
93 ///
94 /// The items apply to an instructions and its operand tree with the instruction
95 /// as the root element. If the value in question is not an instruction in the
96 /// SCoP, it can be a leaf of an instruction's operand tree.
97 enum ForwardingDecision {
98   /// The root instruction or value cannot be forwarded at all.
99   FD_CannotForward,
100 
101   /// The root instruction or value can be forwarded as a leaf of a larger
102   /// operand tree.
103   /// It does not make sense to move the value itself, it would just replace it
104   /// by a use of itself. For instance, a constant "5" used in a statement can
105   /// be forwarded, but it would just replace it by the same constant "5".
106   /// However, it makes sense to move as an operand of
107   ///
108   ///   %add = add 5, 5
109   ///
110   /// where "5" is moved as part of a larger operand tree. "5" would be placed
111   /// (disregarding for a moment that literal constants don't have a location
112   /// and can be used anywhere) into the same statement as %add would.
113   FD_CanForwardLeaf,
114 
115   /// The root instruction can be forwarded and doing so avoids a scalar
116   /// dependency.
117   ///
118   /// This can be either because the operand tree can be moved to the target
119   /// statement, or a memory access is redirected to read from a different
120   /// location.
121   FD_CanForwardProfitably,
122 
123   /// Used to indicate that a forwarding has be carried out successfully, and
124   /// the forwarded memory access can be deleted.
125   FD_DidForwardTree,
126 
127   /// Used to indicate that a forwarding has be carried out successfully, and
128   /// the forwarded memory access is being reused.
129   FD_DidForwardLeaf,
130 
131   /// A forwarding method cannot be applied to the operand tree.
132   /// The difference to FD_CannotForward is that there might be other methods
133   /// that can handle it.
134   /// The conditions that make an operand tree applicable must be checked even
135   /// with DoIt==true because a method following the one that returned
136   /// FD_NotApplicable might have returned FD_CanForwardTree.
137   FD_NotApplicable
138 };
139 
140 /// Implementation of operand tree forwarding for a specific SCoP.
141 ///
142 /// For a statement that requires a scalar value (through a value read
143 /// MemoryAccess), see if its operand can be moved into the statement. If so,
144 /// the MemoryAccess is removed and the all the operand tree instructions are
145 /// moved into the statement. All original instructions are left in the source
146 /// statements. The simplification pass can clean these up.
147 class ForwardOpTreeImpl : ZoneAlgorithm {
148 private:
149   /// Scope guard to limit the number of isl operations for this pass.
150   IslMaxOperationsGuard &MaxOpGuard;
151 
152   /// How many instructions have been copied to other statements.
153   int NumInstructionsCopied = 0;
154 
155   /// Number of loads forwarded because their value was known.
156   int NumKnownLoadsForwarded = 0;
157 
158   /// Number of values reloaded from known array elements.
159   int NumReloads = 0;
160 
161   /// How many read-only accesses have been copied.
162   int NumReadOnlyCopied = 0;
163 
164   /// How many operand trees have been forwarded.
165   int NumForwardedTrees = 0;
166 
167   /// Number of statements with at least one forwarded operand tree.
168   int NumModifiedStmts = 0;
169 
170   /// Whether we carried out at least one change to the SCoP.
171   bool Modified = false;
172 
173   /// Contains the zones where array elements are known to contain a specific
174   /// value.
175   /// { [Element[] -> Zone[]] -> ValInst[] }
176   /// @see computeKnown()
177   isl::union_map Known;
178 
179   /// Translator for newly introduced ValInsts to already existing ValInsts such
180   /// that new introduced load instructions can reuse the Known analysis of its
181   /// original load. { ValInst[] -> ValInst[] }
182   isl::union_map Translator;
183 
184   /// Get list of array elements that do contain the same ValInst[] at Domain[].
185   ///
186   /// @param ValInst { Domain[] -> ValInst[] }
187   ///                The values for which we search for alternative locations,
188   ///                per statement instance.
189   ///
190   /// @return { Domain[] -> Element[] }
191   ///         For each statement instance, the array elements that contain the
192   ///         same ValInst.
193   isl::union_map findSameContentElements(isl::union_map ValInst) {
194     assert(!ValInst.is_single_valued().is_false());
195 
196     // { Domain[] }
197     isl::union_set Domain = ValInst.domain();
198 
199     // { Domain[] -> Scatter[] }
200     isl::union_map Schedule = getScatterFor(Domain);
201 
202     // { Element[] -> [Scatter[] -> ValInst[]] }
203     isl::union_map MustKnownCurried =
204         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
205 
206     // { [Domain[] -> ValInst[]] -> Scatter[] }
207     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
208 
209     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
210     isl::union_map SchedValDomVal =
211         DomValSched.range_product(ValInst.range_map()).reverse();
212 
213     // { Element[] -> [Domain[] -> ValInst[]] }
214     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
215 
216     // { Domain[] -> Element[] }
217     isl::union_map MustKnownMap =
218         MustKnownInst.uncurry().domain().unwrap().reverse();
219     simplify(MustKnownMap);
220 
221     return MustKnownMap;
222   }
223 
224   /// Find a single array element for each statement instance, within a single
225   /// array.
226   ///
227   /// @param MustKnown { Domain[] -> Element[] }
228   ///                  Set of candidate array elements.
229   /// @param Domain    { Domain[] }
230   ///                  The statement instance for which we need elements for.
231   ///
232   /// @return { Domain[] -> Element[] }
233   ///         For each statement instance, an array element out of @p MustKnown.
234   ///         All array elements must be in the same array (Polly does not yet
235   ///         support reading from different accesses using the same
236   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
237   ///         null.
238   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
239     // { Domain[] -> Element[] }
240     isl::map Result;
241 
242     // MemoryAccesses can read only elements from a single array
243     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
244     // Look through all spaces until we find one that contains at least the
245     // wanted statement instance.s
246     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
247       // Get the array this is accessing.
248       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
249       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
250 
251       // No support for generation of indirect array accesses.
252       if (SAI->getBasePtrOriginSAI())
253         return isl::stat::ok; // continue
254 
255       // Determine whether this map contains all wanted values.
256       isl::set MapDom = Map.domain();
257       if (!Domain.is_subset(MapDom).is_true())
258         return isl::stat::ok; // continue
259 
260       // There might be multiple array elements that contain the same value, but
261       // choose only one of them. lexmin is used because it returns a one-value
262       // mapping, we do not care about which one.
263       // TODO: Get the simplest access function.
264       Result = Map.lexmin();
265       return isl::stat::error; // break
266     });
267 
268     return Result;
269   }
270 
271 public:
272   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
273       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
274 
275   /// Compute the zones of known array element contents.
276   ///
277   /// @return True if the computed #Known is usable.
278   bool computeKnownValues() {
279     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
280 
281     // Check that nothing strange occurs.
282     collectCompatibleElts();
283 
284     {
285       IslQuotaScope QuotaScope = MaxOpGuard.enter();
286 
287       computeCommon();
288       if (NormalizePHIs)
289         computeNormalizedPHIs();
290       Known = computeKnown(true, true);
291 
292       // Preexisting ValInsts use the known content analysis of themselves.
293       Translator = makeIdentityMap(Known.range(), false);
294     }
295 
296     if (!Known || !Translator || !NormalizeMap) {
297       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
298       Known = nullptr;
299       Translator = nullptr;
300       NormalizeMap = nullptr;
301       DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
302       return false;
303     }
304 
305     KnownAnalyzed++;
306     DEBUG(dbgs() << "All known: " << Known << "\n");
307 
308     return true;
309   }
310 
311   void printStatistics(raw_ostream &OS, int Indent = 0) {
312     OS.indent(Indent) << "Statistics {\n";
313     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
314                           << '\n';
315     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
316                           << '\n';
317     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
318     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
319                           << '\n';
320     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
321                           << '\n';
322     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
323                           << NumModifiedStmts << '\n';
324     OS.indent(Indent) << "}\n";
325   }
326 
327   void printStatements(raw_ostream &OS, int Indent = 0) const {
328     OS.indent(Indent) << "After statements {\n";
329     for (auto &Stmt : *S) {
330       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
331       for (auto *MA : Stmt)
332         MA->print(OS);
333 
334       OS.indent(Indent + 12);
335       Stmt.printInstructions(OS);
336     }
337     OS.indent(Indent) << "}\n";
338   }
339 
340   /// Create a new MemoryAccess of type read and MemoryKind::Array.
341   ///
342   /// @param Stmt           The statement in which the access occurs.
343   /// @param LI             The instruction that does the access.
344   /// @param AccessRelation The array element that each statement instance
345   ///                       accesses.
346   ///
347   /// @param The newly created access.
348   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
349                                     isl::map AccessRelation) {
350     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
351     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
352 
353     // Create a dummy SCEV access, to be replaced anyway.
354     SmallVector<const SCEV *, 4> Sizes;
355     Sizes.reserve(SAI->getNumberOfDimensions());
356     SmallVector<const SCEV *, 4> Subscripts;
357     Subscripts.reserve(SAI->getNumberOfDimensions());
358     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
359       Sizes.push_back(SAI->getDimensionSize(i));
360       Subscripts.push_back(nullptr);
361     }
362 
363     MemoryAccess *Access =
364         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
365                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
366     S->addAccessFunction(Access);
367     Stmt->addAccess(Access, true);
368 
369     Access->setNewAccessRelation(AccessRelation);
370 
371     return Access;
372   }
373 
374   /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
375   /// use in every instance of @p UseStmt.
376   ///
377   /// @param UseStmt Statement a scalar is used in.
378   /// @param DefStmt Statement a scalar is defined in.
379   ///
380   /// @return { DomainUse[] -> DomainDef[] }
381   isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
382     // { DomainUse[] -> Scatter[] }
383     isl::map UseScatter = getScatterFor(UseStmt);
384 
385     // { Zone[] -> DomainDef[] }
386     isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
387 
388     // { Scatter[] -> DomainDef[] }
389     isl::map ReachDefTimepoints =
390         convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
391 
392     // { DomainUse[] -> DomainDef[] }
393     return UseScatter.apply_range(ReachDefTimepoints);
394   }
395 
396   /// Forward a load by reading from an array element that contains the same
397   /// value. Typically the location it was loaded from.
398   ///
399   /// @param TargetStmt  The statement the operand tree will be copied to.
400   /// @param Inst        The (possibly speculatable) instruction to forward.
401   /// @param UseStmt     The statement that uses @p Inst.
402   /// @param UseLoop     The loop @p Inst is used in.
403   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
404   ///                    A mapping from the statement instance @p Inst is used
405   ///                    to the statement instance it is forwarded to.
406   /// @param DefStmt     The statement @p Inst is defined in.
407   /// @param DefLoop     The loop which contains @p Inst.
408   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
409   ///                    A mapping from the statement instance @p Inst is
410   ///                    defined to the statement instance it is forwarded to.
411   /// @param DoIt        If false, only determine whether an operand tree can be
412   ///                    forwarded. If true, carry out the forwarding. Do not
413   ///                    use DoIt==true if an operand tree is not known to be
414   ///                    forwardable.
415   ///
416   /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
417   ///                           load.
418   ///         FD_CannotForward  if the pointer operand cannot be forwarded.
419   ///         FD_CanForwardProfitably if @p Inst is forwardable.
420   ///         FD_DidForwardTree if @p DoIt was true.
421   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
422                                       ScopStmt *UseStmt, Loop *UseLoop,
423                                       isl::map UseToTarget, ScopStmt *DefStmt,
424                                       Loop *DefLoop, isl::map DefToTarget,
425                                       bool DoIt) {
426     // Cannot do anything without successful known analysis.
427     if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
428         DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
429       return FD_NotApplicable;
430 
431     LoadInst *LI = dyn_cast<LoadInst>(Inst);
432     if (!LI)
433       return FD_NotApplicable;
434 
435     // If the load is already in the statement, no forwarding is necessary.
436     // However, it might happen that the LoadInst is already present in the
437     // statement's instruction list. In that case we do as follows:
438     // - For the evaluation (DoIt==false), we can trivially forward it as it is
439     //   benefit of forwarding an already present instruction.
440     // - For the execution (DoIt==true), prepend the instruction (to make it
441     //   available to all instructions following in the instruction list), but
442     //   do not add another MemoryAccess.
443     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
444     if (Access && !DoIt)
445       return FD_CanForwardProfitably;
446 
447     ForwardingDecision OpDecision =
448         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
449                     DefToTarget, DoIt);
450     switch (OpDecision) {
451     case FD_CannotForward:
452       assert(!DoIt);
453       return OpDecision;
454 
455     case FD_CanForwardLeaf:
456     case FD_CanForwardProfitably:
457       assert(!DoIt);
458       break;
459 
460     case FD_DidForwardLeaf:
461     case FD_DidForwardTree:
462       assert(DoIt);
463       break;
464 
465     default:
466       llvm_unreachable("Shouldn't return this");
467     }
468 
469     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
470 
471     // { DomainDef[] -> ValInst[] }
472     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
473     assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized");
474 
475     // { DomainTarget[] -> ValInst[] }
476     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
477     isl::union_map TranslatedExpectedVal =
478         isl::union_map(TargetExpectedVal).apply_range(Translator);
479 
480     // { DomainTarget[] -> Element[] }
481     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
482 
483     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
484     if (!SameVal)
485       return FD_NotApplicable;
486 
487     if (DoIt)
488       TargetStmt->prependInstruction(LI);
489 
490     if (!DoIt)
491       return FD_CanForwardProfitably;
492 
493     if (Access) {
494       DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
495                    << Access << "\n");
496     } else {
497       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
498       DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
499                    << "\n");
500 
501       // { ValInst[] }
502       isl::space ValInstSpace = ExpectedVal.get_space().range();
503 
504       // After adding a new load to the SCoP, also update the Known content
505       // about it. The new load will have a known ValInst of
506       // { [DomainTarget[] -> Value[]] }
507       // but which -- because it is a copy of it -- has same value as the
508       // { [DomainDef[] -> Value[]] }
509       // that it replicates. Instead of  cloning the known content of
510       // [DomainDef[] -> Value[]]
511       // for DomainTarget[], we add a 'translator' that maps
512       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
513       // before comparing to the known content.
514       // TODO: 'Translator' could also be used to map PHINodes to their incoming
515       // ValInsts.
516       if (ValInstSpace.is_wrapping()) {
517         // { DefDomain[] -> Value[] }
518         isl::map ValInsts = ExpectedVal.range().unwrap();
519 
520         // { DefDomain[] }
521         isl::set DefDomain = ValInsts.domain();
522 
523         // { Value[] }
524         isl::space ValSpace = ValInstSpace.unwrap().range();
525 
526         // { Value[] -> Value[] }
527         isl::map ValToVal =
528             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
529 
530         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
531         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
532 
533         Translator = Translator.add_map(LocalTranslator);
534         DEBUG(dbgs() << "      local translator is " << LocalTranslator
535                      << "\n");
536       }
537     }
538     DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
539                  << "\n");
540     DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
541     assert(Access);
542 
543     NumKnownLoadsForwarded++;
544     TotalKnownLoadsForwarded++;
545     return FD_DidForwardTree;
546   }
547 
548   /// Forward a scalar by redirecting the access to an array element that stores
549   /// the same value.
550   ///
551   /// @param TargetStmt  The statement the operand tree will be copied to.
552   /// @param Inst        The scalar to forward.
553   /// @param UseStmt     The statement that uses @p Inst.
554   /// @param UseLoop     The loop @p Inst is used in.
555   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
556   ///                    A mapping from the statement instance @p Inst is used
557   ///                    in, to the statement instance it is forwarded to.
558   /// @param DefStmt     The statement @p Inst is defined in.
559   /// @param DefLoop     The loop which contains @p Inst.
560   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
561   ///                    A mapping from the statement instance @p Inst is
562   ///                    defined in, to the statement instance it is forwarded
563   ///                    to.
564   /// @param DoIt        If false, only determine whether an operand tree can be
565   ///                    forwarded. If true, carry out the forwarding. Do not
566   ///                    use DoIt==true if an operand tree is not known to be
567   ///                    forwardable.
568   ///
569   /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
570   ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
571   ///         FD_CanForwardProfitably if @p Inst has been reloaded.
572   ///         FD_DidForwardLeaf       if @p DoIt was true.
573   ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
574                                         ScopStmt *UseStmt, Loop *UseLoop,
575                                         isl::map UseToTarget, ScopStmt *DefStmt,
576                                         Loop *DefLoop, isl::map DefToTarget,
577                                         bool DoIt) {
578     // Cannot do anything without successful known analysis.
579     if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
580         DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
581       return FD_NotApplicable;
582 
583     MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
584     if (Access && Access->isLatestArrayKind()) {
585       if (DoIt)
586         return FD_DidForwardLeaf;
587       return FD_CanForwardLeaf;
588     }
589 
590     // Don't spend too much time analyzing whether it can be reloaded. When
591     // carrying-out the forwarding, we cannot bail-out in the middle of the
592     // transformation. It also shouldn't take as long because some results are
593     // cached.
594     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
595 
596     // { DomainDef[] -> ValInst[] }
597     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
598 
599     // { DomainTarget[] -> ValInst[] }
600     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
601     isl::union_map TranslatedExpectedVal =
602         TargetExpectedVal.apply_range(Translator);
603 
604     // { DomainTarget[] -> Element[] }
605     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
606 
607     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
608     if (!SameVal)
609       return FD_NotApplicable;
610 
611     if (!DoIt)
612       return FD_CanForwardProfitably;
613 
614     if (!Access)
615       Access = TargetStmt->ensureValueRead(Inst);
616 
617     simplify(SameVal);
618     Access->setNewAccessRelation(SameVal);
619 
620     TotalReloads++;
621     NumReloads++;
622     return FD_DidForwardLeaf;
623   }
624 
625   /// Forwards a speculatively executable instruction.
626   ///
627   /// @param TargetStmt  The statement the operand tree will be copied to.
628   /// @param UseInst     The (possibly speculatable) instruction to forward.
629   /// @param DefStmt     The statement @p UseInst is defined in.
630   /// @param DefLoop     The loop which contains @p UseInst.
631   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
632   ///                    A mapping from the statement instance @p UseInst is
633   ///                    defined to the statement instance it is forwarded to.
634   /// @param DoIt        If false, only determine whether an operand tree can be
635   ///                    forwarded. If true, carry out the forwarding. Do not
636   ///                    use DoIt==true if an operand tree is not known to be
637   ///                    forwardable.
638   ///
639   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
640   ///         FD_CannotForward  if one of @p UseInst's operands is not
641   ///                           forwardable.
642   ///         FD_CanForwardTree if @p UseInst is forwardable.
643   ///         FD_DidForward     if @p DoIt was true.
644   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
645                                          Instruction *UseInst,
646                                          ScopStmt *DefStmt, Loop *DefLoop,
647                                          isl::map DefToTarget, bool DoIt) {
648     // PHIs, unless synthesizable, are not yet supported.
649     if (isa<PHINode>(UseInst))
650       return FD_NotApplicable;
651 
652     // Compatible instructions must satisfy the following conditions:
653     // 1. Idempotent (instruction will be copied, not moved; although its
654     //    original instance might be removed by simplification)
655     // 2. Not access memory (There might be memory writes between)
656     // 3. Not cause undefined behaviour (we might copy to a location when the
657     //    original instruction was no executed; this is currently not possible
658     //    because we do not forward PHINodes)
659     // 4. Not leak memory if executed multiple times (i.e. malloc)
660     //
661     // Instruction::mayHaveSideEffects is not sufficient because it considers
662     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
663     // not sufficient because it allows memory accesses.
664     if (mayBeMemoryDependent(*UseInst))
665       return FD_NotApplicable;
666 
667     if (DoIt) {
668       // To ensure the right order, prepend this instruction before its
669       // operands. This ensures that its operands are inserted before the
670       // instruction using them.
671       // TODO: The operand tree is not really a tree, but a DAG. We should be
672       // able to handle DAGs without duplication.
673       TargetStmt->prependInstruction(UseInst);
674       NumInstructionsCopied++;
675       TotalInstructionsCopied++;
676     }
677 
678     for (Value *OpVal : UseInst->operand_values()) {
679       ForwardingDecision OpDecision =
680           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
681       switch (OpDecision) {
682       case FD_CannotForward:
683         assert(!DoIt);
684         return FD_CannotForward;
685 
686       case FD_CanForwardLeaf:
687       case FD_CanForwardProfitably:
688         assert(!DoIt);
689         break;
690 
691       case FD_DidForwardLeaf:
692       case FD_DidForwardTree:
693         assert(DoIt);
694         break;
695 
696       case FD_NotApplicable:
697         llvm_unreachable("forwardTree should never return FD_NotApplicable");
698       }
699     }
700 
701     if (DoIt)
702       return FD_DidForwardTree;
703     return FD_CanForwardProfitably;
704   }
705 
706   /// Determines whether an operand tree can be forwarded or carries out a
707   /// forwarding, depending on the @p DoIt flag.
708   ///
709   /// @param TargetStmt  The statement the operand tree will be copied to.
710   /// @param UseVal      The value (usually an instruction) which is root of an
711   ///                    operand tree.
712   /// @param UseStmt     The statement that uses @p UseVal.
713   /// @param UseLoop     The loop @p UseVal is used in.
714   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
715   ///                    A mapping from the statement instance @p UseVal is used
716   ///                    to the statement instance it is forwarded to.
717   /// @param DoIt        If false, only determine whether an operand tree can be
718   ///                    forwarded. If true, carry out the forwarding. Do not
719   ///                    use DoIt==true if an operand tree is not known to be
720   ///                    forwardable.
721   ///
722   /// @return If DoIt==false, return whether the operand tree can be forwarded.
723   ///         If DoIt==true, return FD_DidForward.
724   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
725                                  ScopStmt *UseStmt, Loop *UseLoop,
726                                  isl::map UseToTarget, bool DoIt) {
727     ScopStmt *DefStmt = nullptr;
728     Loop *DefLoop = nullptr;
729 
730     // { DefDomain[] -> TargetDomain[] }
731     isl::map DefToTarget;
732 
733     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
734     switch (VUse.getKind()) {
735     case VirtualUse::Constant:
736     case VirtualUse::Block:
737     case VirtualUse::Hoisted:
738       // These can be used anywhere without special considerations.
739       if (DoIt)
740         return FD_DidForwardTree;
741       return FD_CanForwardLeaf;
742 
743     case VirtualUse::Synthesizable: {
744       // ScopExpander will take care for of generating the code at the new
745       // location.
746       if (DoIt)
747         return FD_DidForwardTree;
748 
749       // Check if the value is synthesizable at the new location as well. This
750       // might be possible when leaving a loop for which ScalarEvolution is
751       // unable to derive the exit value for.
752       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
753       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
754       // do not forward past its loop header. This would require us to use a
755       // previous loop induction variable instead the current one. We currently
756       // do not allow forwarding PHI nodes, thus this should never occur (the
757       // only exception where no phi is necessary being an unreachable loop
758       // without edge from the outside).
759       VirtualUse TargetUse = VirtualUse::create(
760           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
761       if (TargetUse.getKind() == VirtualUse::Synthesizable)
762         return FD_CanForwardLeaf;
763 
764       DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
765                    << *UseVal << "\n");
766       return FD_CannotForward;
767     }
768 
769     case VirtualUse::ReadOnly:
770       // Note that we cannot return FD_CanForwardTree here. With a operand tree
771       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
772       // With -polly-analyze-read-only-scalars=true we would ensure the
773       // existence of a MemoryAccess (which already exists for a leaf) and be
774       // removed again by tryForwardTree because it's goal is to remove this
775       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
776       // to do so.
777       if (!DoIt)
778         return FD_CanForwardLeaf;
779 
780       // If we model read-only scalars, we need to create a MemoryAccess for it.
781       if (ModelReadOnlyScalars)
782         TargetStmt->ensureValueRead(UseVal);
783 
784       NumReadOnlyCopied++;
785       TotalReadOnlyCopied++;
786       return FD_DidForwardLeaf;
787 
788     case VirtualUse::Intra:
789       // Knowing that UseStmt and DefStmt are the same statement instance, just
790       // reuse the information about UseStmt for DefStmt
791       DefStmt = UseStmt;
792       DefToTarget = UseToTarget;
793 
794       LLVM_FALLTHROUGH;
795     case VirtualUse::Inter:
796       Instruction *Inst = cast<Instruction>(UseVal);
797 
798       if (!DefStmt) {
799         DefStmt = S->getStmtFor(Inst);
800         if (!DefStmt)
801           return FD_CannotForward;
802       }
803 
804       DefLoop = LI->getLoopFor(Inst->getParent());
805 
806       if (DefToTarget.is_null() && !Known.is_null()) {
807         IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
808 
809         // { UseDomain[] -> DefDomain[] }
810         isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
811 
812         // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
813         // { DefDomain[] -> TargetDomain[] }
814         DefToTarget = UseToTarget.apply_domain(UseToDef);
815         simplify(DefToTarget);
816       }
817 
818       ForwardingDecision SpeculativeResult = forwardSpeculatable(
819           TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
820       if (SpeculativeResult != FD_NotApplicable)
821         return SpeculativeResult;
822 
823       ForwardingDecision KnownResult =
824           forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
825                            DefStmt, DefLoop, DefToTarget, DoIt);
826       if (KnownResult != FD_NotApplicable)
827         return KnownResult;
828 
829       ForwardingDecision ReloadResult =
830           reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
831                              DefStmt, DefLoop, DefToTarget, DoIt);
832       if (ReloadResult != FD_NotApplicable)
833         return ReloadResult;
834 
835       // When no method is found to forward the operand tree, we effectively
836       // cannot handle it.
837       DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
838       return FD_CannotForward;
839     }
840 
841     llvm_unreachable("Case unhandled");
842   }
843 
844   /// Try to forward an operand tree rooted in @p RA.
845   bool tryForwardTree(MemoryAccess *RA) {
846     assert(RA->isLatestScalarKind());
847     DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
848 
849     ScopStmt *Stmt = RA->getStatement();
850     Loop *InLoop = Stmt->getSurroundingLoop();
851 
852     isl::map TargetToUse;
853     if (!Known.is_null()) {
854       isl::space DomSpace = Stmt->getDomainSpace();
855       TargetToUse =
856           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
857     }
858 
859     ForwardingDecision Assessment = forwardTree(
860         Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
861     assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
862     if (Assessment != FD_CanForwardProfitably)
863       return false;
864 
865     ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
866                                                InLoop, TargetToUse, true);
867     assert(((Execution == FD_DidForwardTree) ||
868             (Execution == FD_DidForwardLeaf)) &&
869            "A previous positive assessment must also be executable");
870 
871     if (Execution == FD_DidForwardTree)
872       Stmt->removeSingleMemoryAccess(RA);
873     return true;
874   }
875 
876   /// Return which SCoP this instance is processing.
877   Scop *getScop() const { return S; }
878 
879   /// Run the algorithm: Use value read accesses as operand tree roots and try
880   /// to forward them into the statement.
881   bool forwardOperandTrees() {
882     for (ScopStmt &Stmt : *S) {
883       bool StmtModified = false;
884 
885       // Because we are modifying the MemoryAccess list, collect them first to
886       // avoid iterator invalidation.
887       SmallVector<MemoryAccess *, 16> Accs;
888       for (MemoryAccess *RA : Stmt) {
889         if (!RA->isRead())
890           continue;
891         if (!RA->isLatestScalarKind())
892           continue;
893 
894         Accs.push_back(RA);
895       }
896 
897       for (MemoryAccess *RA : Accs) {
898         if (tryForwardTree(RA)) {
899           Modified = true;
900           StmtModified = true;
901           NumForwardedTrees++;
902           TotalForwardedTrees++;
903         }
904       }
905 
906       if (StmtModified) {
907         NumModifiedStmts++;
908         TotalModifiedStmts++;
909       }
910     }
911 
912     if (Modified)
913       ScopsModified++;
914     return Modified;
915   }
916 
917   /// Print the pass result, performed transformations and the SCoP after the
918   /// transformation.
919   void print(raw_ostream &OS, int Indent = 0) {
920     printStatistics(OS, Indent);
921 
922     if (!Modified) {
923       // This line can easily be checked in regression tests.
924       OS << "ForwardOpTree executed, but did not modify anything\n";
925       return;
926     }
927 
928     printStatements(OS, Indent);
929   }
930 };
931 
932 /// Pass that redirects scalar reads to array elements that are known to contain
933 /// the same value.
934 ///
935 /// This reduces the number of scalar accesses and therefore potentially
936 /// increases the freedom of the scheduler. In the ideal case, all reads of a
937 /// scalar definition are redirected (We currently do not care about removing
938 /// the write in this case).  This is also useful for the main DeLICM pass as
939 /// there are less scalars to be mapped.
940 class ForwardOpTree : public ScopPass {
941 private:
942   /// The pass implementation, also holding per-scop data.
943   std::unique_ptr<ForwardOpTreeImpl> Impl;
944 
945 public:
946   static char ID;
947 
948   explicit ForwardOpTree() : ScopPass(ID) {}
949   ForwardOpTree(const ForwardOpTree &) = delete;
950   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
951 
952   void getAnalysisUsage(AnalysisUsage &AU) const override {
953     AU.addRequiredTransitive<ScopInfoRegionPass>();
954     AU.addRequired<LoopInfoWrapperPass>();
955     AU.setPreservesAll();
956   }
957 
958   bool runOnScop(Scop &S) override {
959     // Free resources for previous SCoP's computation, if not yet done.
960     releaseMemory();
961 
962     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
963 
964     {
965       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
966       Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
967 
968       if (AnalyzeKnown) {
969         DEBUG(dbgs() << "Prepare forwarders...\n");
970         Impl->computeKnownValues();
971       }
972 
973       DEBUG(dbgs() << "Forwarding operand trees...\n");
974       Impl->forwardOperandTrees();
975 
976       if (MaxOpGuard.hasQuotaExceeded()) {
977         DEBUG(dbgs() << "Not all operations completed because of "
978                         "max_operations exceeded\n");
979         KnownOutOfQuota++;
980       }
981     }
982 
983     DEBUG(dbgs() << "\nFinal Scop:\n");
984     DEBUG(dbgs() << S);
985 
986     // Update statistics
987     auto ScopStats = S.getStatistics();
988     NumValueWrites += ScopStats.NumValueWrites;
989     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
990     NumPHIWrites += ScopStats.NumPHIWrites;
991     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
992     NumSingletonWrites += ScopStats.NumSingletonWrites;
993     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
994 
995     return false;
996   }
997 
998   void printScop(raw_ostream &OS, Scop &S) const override {
999     if (!Impl)
1000       return;
1001 
1002     assert(Impl->getScop() == &S);
1003     Impl->print(OS);
1004   }
1005 
1006   void releaseMemory() override { Impl.reset(); }
1007 }; // class ForwardOpTree
1008 
1009 char ForwardOpTree::ID;
1010 
1011 } // namespace
1012 
1013 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1014 
1015 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1016                       "Polly - Forward operand tree", false, false)
1017 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1018 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1019                     "Polly - Forward operand tree", false, false)
1020