1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/ForwardOpTree.h"
15 #include "polly/Options.h"
16 #include "polly/ScopBuilder.h"
17 #include "polly/ScopInfo.h"
18 #include "polly/ScopPass.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/ISLOStream.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "polly/ZoneAlgo.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "isl/ctx.h"
40 #include "isl/isl-noexceptions.h"
41 #include <cassert>
42 #include <memory>
43 
44 #define DEBUG_TYPE "polly-optree"
45 
46 using namespace llvm;
47 using namespace polly;
48 
49 static cl::opt<bool>
50     AnalyzeKnown("polly-optree-analyze-known",
51                  cl::desc("Analyze array contents for load forwarding"),
52                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53 
54 static cl::opt<bool>
55     NormalizePHIs("polly-optree-normalize-phi",
56                   cl::desc("Replace PHIs by their incoming values"),
57                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
58 
59 static cl::opt<unsigned>
60     MaxOps("polly-optree-max-ops",
61            cl::desc("Maximum number of ISL operations to invest for known "
62                     "analysis; 0=no limit"),
63            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
64 
65 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
66 STATISTIC(KnownOutOfQuota,
67           "Analyses aborted because max_operations was reached");
68 
69 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
70 STATISTIC(TotalKnownLoadsForwarded,
71           "Number of forwarded loads because their value was known");
72 STATISTIC(TotalReloads, "Number of reloaded values");
73 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
74 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
75 STATISTIC(TotalModifiedStmts,
76           "Number of statements with at least one forwarded tree");
77 
78 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
79 
80 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
81 STATISTIC(NumValueWritesInLoops,
82           "Number of scalar value writes nested in affine loops after OpTree");
83 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
84 STATISTIC(NumPHIWritesInLoops,
85           "Number of scalar phi writes nested in affine loops after OpTree");
86 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
87 STATISTIC(NumSingletonWritesInLoops,
88           "Number of singleton writes nested in affine loops after OpTree");
89 
90 namespace {
91 
92 /// The state of whether an operand tree was/can be forwarded.
93 ///
94 /// The items apply to an instructions and its operand tree with the instruction
95 /// as the root element. If the value in question is not an instruction in the
96 /// SCoP, it can be a leaf of an instruction's operand tree.
97 enum ForwardingDecision {
98   /// The root instruction or value cannot be forwarded at all.
99   FD_CannotForward,
100 
101   /// The root instruction or value can be forwarded as a leaf of a larger
102   /// operand tree.
103   /// It does not make sense to move the value itself, it would just replace it
104   /// by a use of itself. For instance, a constant "5" used in a statement can
105   /// be forwarded, but it would just replace it by the same constant "5".
106   /// However, it makes sense to move as an operand of
107   ///
108   ///   %add = add 5, 5
109   ///
110   /// where "5" is moved as part of a larger operand tree. "5" would be placed
111   /// (disregarding for a moment that literal constants don't have a location
112   /// and can be used anywhere) into the same statement as %add would.
113   FD_CanForwardLeaf,
114 
115   /// The root instruction can be forwarded and doing so avoids a scalar
116   /// dependency.
117   ///
118   /// This can be either because the operand tree can be moved to the target
119   /// statement, or a memory access is redirected to read from a different
120   /// location.
121   FD_CanForwardProfitably,
122 
123   /// Used to indicate that a forwarding has be carried out successfully, and
124   /// the forwarded memory access can be deleted.
125   FD_DidForwardTree,
126 
127   /// Used to indicate that a forwarding has be carried out successfully, and
128   /// the forwarded memory access is being reused.
129   FD_DidForwardLeaf,
130 
131   /// A forwarding method cannot be applied to the operand tree.
132   /// The difference to FD_CannotForward is that there might be other methods
133   /// that can handle it.
134   /// The conditions that make an operand tree applicable must be checked even
135   /// with DoIt==true because a method following the one that returned
136   /// FD_NotApplicable might have returned FD_CanForwardTree.
137   FD_NotApplicable
138 };
139 
140 /// Implementation of operand tree forwarding for a specific SCoP.
141 ///
142 /// For a statement that requires a scalar value (through a value read
143 /// MemoryAccess), see if its operand can be moved into the statement. If so,
144 /// the MemoryAccess is removed and the all the operand tree instructions are
145 /// moved into the statement. All original instructions are left in the source
146 /// statements. The simplification pass can clean these up.
147 class ForwardOpTreeImpl : ZoneAlgorithm {
148 private:
149   /// Scope guard to limit the number of isl operations for this pass.
150   IslMaxOperationsGuard &MaxOpGuard;
151 
152   /// How many instructions have been copied to other statements.
153   int NumInstructionsCopied = 0;
154 
155   /// Number of loads forwarded because their value was known.
156   int NumKnownLoadsForwarded = 0;
157 
158   /// Number of values reloaded from known array elements.
159   int NumReloads = 0;
160 
161   /// How many read-only accesses have been copied.
162   int NumReadOnlyCopied = 0;
163 
164   /// How many operand trees have been forwarded.
165   int NumForwardedTrees = 0;
166 
167   /// Number of statements with at least one forwarded operand tree.
168   int NumModifiedStmts = 0;
169 
170   /// Whether we carried out at least one change to the SCoP.
171   bool Modified = false;
172 
173   /// Contains the zones where array elements are known to contain a specific
174   /// value.
175   /// { [Element[] -> Zone[]] -> ValInst[] }
176   /// @see computeKnown()
177   isl::union_map Known;
178 
179   /// Translator for newly introduced ValInsts to already existing ValInsts such
180   /// that new introduced load instructions can reuse the Known analysis of its
181   /// original load. { ValInst[] -> ValInst[] }
182   isl::union_map Translator;
183 
184   /// A cache for getDefToTarget().
185   DenseMap<std::pair<ScopStmt *, ScopStmt *>, isl::map> DefToTargetCache;
186 
187   /// Get list of array elements that do contain the same ValInst[] at Domain[].
188   ///
189   /// @param ValInst { Domain[] -> ValInst[] }
190   ///                The values for which we search for alternative locations,
191   ///                per statement instance.
192   ///
193   /// @return { Domain[] -> Element[] }
194   ///         For each statement instance, the array elements that contain the
195   ///         same ValInst.
196   isl::union_map findSameContentElements(isl::union_map ValInst) {
197     assert(!ValInst.is_single_valued().is_false());
198 
199     // { Domain[] }
200     isl::union_set Domain = ValInst.domain();
201 
202     // { Domain[] -> Scatter[] }
203     isl::union_map Schedule = getScatterFor(Domain);
204 
205     // { Element[] -> [Scatter[] -> ValInst[]] }
206     isl::union_map MustKnownCurried =
207         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
208 
209     // { [Domain[] -> ValInst[]] -> Scatter[] }
210     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
211 
212     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
213     isl::union_map SchedValDomVal =
214         DomValSched.range_product(ValInst.range_map()).reverse();
215 
216     // { Element[] -> [Domain[] -> ValInst[]] }
217     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
218 
219     // { Domain[] -> Element[] }
220     isl::union_map MustKnownMap =
221         MustKnownInst.uncurry().domain().unwrap().reverse();
222     simplify(MustKnownMap);
223 
224     return MustKnownMap;
225   }
226 
227   /// Find a single array element for each statement instance, within a single
228   /// array.
229   ///
230   /// @param MustKnown { Domain[] -> Element[] }
231   ///                  Set of candidate array elements.
232   /// @param Domain    { Domain[] }
233   ///                  The statement instance for which we need elements for.
234   ///
235   /// @return { Domain[] -> Element[] }
236   ///         For each statement instance, an array element out of @p MustKnown.
237   ///         All array elements must be in the same array (Polly does not yet
238   ///         support reading from different accesses using the same
239   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
240   ///         null.
241   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
242     // { Domain[] -> Element[] }
243     isl::map Result;
244 
245     // MemoryAccesses can read only elements from a single array
246     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
247     // Look through all spaces until we find one that contains at least the
248     // wanted statement instance.s
249     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
250       // Get the array this is accessing.
251       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
252       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
253 
254       // No support for generation of indirect array accesses.
255       if (SAI->getBasePtrOriginSAI())
256         return isl::stat::ok; // continue
257 
258       // Determine whether this map contains all wanted values.
259       isl::set MapDom = Map.domain();
260       if (!Domain.is_subset(MapDom).is_true())
261         return isl::stat::ok; // continue
262 
263       // There might be multiple array elements that contain the same value, but
264       // choose only one of them. lexmin is used because it returns a one-value
265       // mapping, we do not care about which one.
266       // TODO: Get the simplest access function.
267       Result = Map.lexmin();
268       return isl::stat::error; // break
269     });
270 
271     return Result;
272   }
273 
274 public:
275   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
276       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
277 
278   /// Compute the zones of known array element contents.
279   ///
280   /// @return True if the computed #Known is usable.
281   bool computeKnownValues() {
282     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
283 
284     // Check that nothing strange occurs.
285     collectCompatibleElts();
286 
287     {
288       IslQuotaScope QuotaScope = MaxOpGuard.enter();
289 
290       computeCommon();
291       if (NormalizePHIs)
292         computeNormalizedPHIs();
293       Known = computeKnown(true, true);
294 
295       // Preexisting ValInsts use the known content analysis of themselves.
296       Translator = makeIdentityMap(Known.range(), false);
297     }
298 
299     if (!Known || !Translator || !NormalizeMap) {
300       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
301       Known = nullptr;
302       Translator = nullptr;
303       NormalizeMap = nullptr;
304       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
305       return false;
306     }
307 
308     KnownAnalyzed++;
309     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
310 
311     return true;
312   }
313 
314   void printStatistics(raw_ostream &OS, int Indent = 0) {
315     OS.indent(Indent) << "Statistics {\n";
316     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
317                           << '\n';
318     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
319                           << '\n';
320     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
321     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
322                           << '\n';
323     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
324                           << '\n';
325     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
326                           << NumModifiedStmts << '\n';
327     OS.indent(Indent) << "}\n";
328   }
329 
330   void printStatements(raw_ostream &OS, int Indent = 0) const {
331     OS.indent(Indent) << "After statements {\n";
332     for (auto &Stmt : *S) {
333       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
334       for (auto *MA : Stmt)
335         MA->print(OS);
336 
337       OS.indent(Indent + 12);
338       Stmt.printInstructions(OS);
339     }
340     OS.indent(Indent) << "}\n";
341   }
342 
343   /// Create a new MemoryAccess of type read and MemoryKind::Array.
344   ///
345   /// @param Stmt           The statement in which the access occurs.
346   /// @param LI             The instruction that does the access.
347   /// @param AccessRelation The array element that each statement instance
348   ///                       accesses.
349   ///
350   /// @param The newly created access.
351   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
352                                     isl::map AccessRelation) {
353     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
354     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
355 
356     // Create a dummy SCEV access, to be replaced anyway.
357     SmallVector<const SCEV *, 4> Sizes;
358     Sizes.reserve(SAI->getNumberOfDimensions());
359     SmallVector<const SCEV *, 4> Subscripts;
360     Subscripts.reserve(SAI->getNumberOfDimensions());
361     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
362       Sizes.push_back(SAI->getDimensionSize(i));
363       Subscripts.push_back(nullptr);
364     }
365 
366     MemoryAccess *Access =
367         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
368                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
369     S->addAccessFunction(Access);
370     Stmt->addAccess(Access, true);
371 
372     Access->setNewAccessRelation(AccessRelation);
373 
374     return Access;
375   }
376 
377   /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
378   /// use in every instance of @p UseStmt.
379   ///
380   /// @param UseStmt Statement a scalar is used in.
381   /// @param DefStmt Statement a scalar is defined in.
382   ///
383   /// @return { DomainUse[] -> DomainDef[] }
384   isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
385     // { DomainUse[] -> Scatter[] }
386     isl::map UseScatter = getScatterFor(UseStmt);
387 
388     // { Zone[] -> DomainDef[] }
389     isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
390 
391     // { Scatter[] -> DomainDef[] }
392     isl::map ReachDefTimepoints =
393         convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
394 
395     // { DomainUse[] -> DomainDef[] }
396     return UseScatter.apply_range(ReachDefTimepoints);
397   }
398 
399   /// Is @p InnerLoop nested inside @p OuterLoop?
400   static bool isInsideLoop(Loop *OuterLoop, Loop *InnerLoop) {
401     // If OuterLoop is nullptr, we cannot call its contains() method. In this
402     // case OuterLoop represents the 'top level' and therefore contains all
403     // loop.
404     return !OuterLoop || OuterLoop->contains(InnerLoop);
405   }
406 
407   /// Get a domain translation map from a (scalar) definition to the statement
408   /// where the definition is being moved to.
409   ///
410   /// @p TargetStmt can also be seen an llvm::Use of an llvm::Value in
411   /// @p DefStmt. In addition, we allow transitive uses:
412   ///
413   /// DefStmt -> MiddleStmt -> TargetStmt
414   ///
415   /// where an operand tree of instructions in DefStmt and MiddleStmt are to be
416   /// moved to TargetStmt. To be generally correct, we also need to know all the
417   /// intermediate statements. However, we make use of the fact that we
418   /// currently do not support a move from a loop body across its header such
419   /// that only the first definition and the target statement are relevant.
420   ///
421   /// @param DefStmt    Statement from where a definition might be moved from.
422   /// @param TargetStmt Statement where the definition is potentially being
423   ///                   moved to (should contain a use of that definition).
424   ///
425   /// @return  { DomainDef[] -> DomainTarget[] }
426   isl::map getDefToTarget(ScopStmt *DefStmt, ScopStmt *TargetStmt) {
427     // No translation required if the definition is already at the target.
428     if (TargetStmt == DefStmt)
429       return isl::map::identity(
430           getDomainFor(TargetStmt).get_space().map_from_set());
431 
432     isl::map &Result = DefToTargetCache[std::make_pair(TargetStmt, DefStmt)];
433 
434     // This is a shortcut in case the schedule is still the original and
435     // TargetStmt is in the same or nested inside DefStmt's loop. With the
436     // additional assumption that operand trees do not cross DefStmt's loop
437     // header, then TargetStmt's instance shared coordinated are the same as
438     // DefStmt's coordinated. All TargetStmt instances with this prefix share
439     // the same DefStmt instance.
440     // Model:
441     //
442     //   for (int i < 0; i < N; i+=1) {
443     // DefStmt:
444     //    D = ...;
445     //    for (int j < 0; j < N; j+=1) {
446     // TargetStmt:
447     //      use(D);
448     //    }
449     //  }
450     //
451     // Here, the value used in TargetStmt is defined in the corresponding
452     // DefStmt, i.e.
453     //
454     //   { DefStmt[i] -> TargetStmt[i,j] }
455     //
456     // In practice, this should cover the majority of cases.
457     if (S->isOriginalSchedule() &&
458         isInsideLoop(DefStmt->getSurroundingLoop(),
459                      TargetStmt->getSurroundingLoop())) {
460       isl::set DefDomain = getDomainFor(DefStmt);
461       isl::set TargetDomain = getDomainFor(TargetStmt);
462       assert(DefDomain.dim(isl::dim::set) <= TargetDomain.dim(isl::dim::set));
463 
464       Result = isl::map::from_domain_and_range(DefDomain, TargetDomain);
465       for (unsigned i = 0, DefDims = DefDomain.dim(isl::dim::set); i < DefDims;
466            i += 1)
467         Result = Result.equate(isl::dim::in, i, isl::dim::out, i);
468     }
469 
470     if (!Result) {
471       // { DomainDef[] -> DomainTarget[] }
472       Result = computeUseToDefFlowDependency(TargetStmt, DefStmt).reverse();
473       simplify(Result);
474     }
475 
476     return Result;
477   }
478 
479   /// Forward a load by reading from an array element that contains the same
480   /// value. Typically the location it was loaded from.
481   ///
482   /// @param TargetStmt  The statement the operand tree will be copied to.
483   /// @param Inst        The (possibly speculatable) instruction to forward.
484   /// @param UseStmt     The statement that uses @p Inst.
485   /// @param UseLoop     The loop @p Inst is used in.
486   /// @param DefStmt     The statement @p Inst is defined in.
487   /// @param DefLoop     The loop which contains @p Inst.
488   /// @param DoIt        If false, only determine whether an operand tree can be
489   ///                    forwarded. If true, carry out the forwarding. Do not
490   ///                    use DoIt==true if an operand tree is not known to be
491   ///                    forwardable.
492   ///
493   /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
494   ///                           load.
495   ///         FD_CannotForward  if the pointer operand cannot be forwarded.
496   ///         FD_CanForwardProfitably if @p Inst is forwardable.
497   ///         FD_DidForwardTree if @p DoIt was true.
498   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
499                                       ScopStmt *UseStmt, Loop *UseLoop,
500                                       ScopStmt *DefStmt, Loop *DefLoop,
501                                       bool DoIt) {
502     // Cannot do anything without successful known analysis.
503     if (Known.is_null() || Translator.is_null() ||
504         MaxOpGuard.hasQuotaExceeded())
505       return FD_NotApplicable;
506 
507     LoadInst *LI = dyn_cast<LoadInst>(Inst);
508     if (!LI)
509       return FD_NotApplicable;
510 
511     // If the load is already in the statement, no forwarding is necessary.
512     // However, it might happen that the LoadInst is already present in the
513     // statement's instruction list. In that case we do as follows:
514     // - For the evaluation (DoIt==false), we can trivially forward it as it is
515     //   benefit of forwarding an already present instruction.
516     // - For the execution (DoIt==true), prepend the instruction (to make it
517     //   available to all instructions following in the instruction list), but
518     //   do not add another MemoryAccess.
519     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
520     if (Access && !DoIt)
521       return FD_CanForwardProfitably;
522 
523     ForwardingDecision OpDecision = forwardTree(
524         TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
525     switch (OpDecision) {
526     case FD_CannotForward:
527       assert(!DoIt);
528       return OpDecision;
529 
530     case FD_CanForwardLeaf:
531     case FD_CanForwardProfitably:
532       assert(!DoIt);
533       break;
534 
535     case FD_DidForwardLeaf:
536     case FD_DidForwardTree:
537       assert(DoIt);
538       break;
539 
540     default:
541       llvm_unreachable("Shouldn't return this");
542     }
543 
544     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
545 
546     // { DomainDef[] -> ValInst[] }
547     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
548     assert(!isNormalized(ExpectedVal).is_false() &&
549            "LoadInsts are always normalized");
550 
551     // { DomainUse[] -> DomainTarget[] }
552     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
553 
554     // { DomainTarget[] -> ValInst[] }
555     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
556     isl::union_map TranslatedExpectedVal =
557         isl::union_map(TargetExpectedVal).apply_range(Translator);
558 
559     // { DomainTarget[] -> Element[] }
560     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
561 
562     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
563     if (!SameVal)
564       return FD_NotApplicable;
565 
566     if (DoIt)
567       TargetStmt->prependInstruction(LI);
568 
569     if (!DoIt)
570       return FD_CanForwardProfitably;
571 
572     if (Access) {
573       LLVM_DEBUG(
574           dbgs() << "    forwarded known load with preexisting MemoryAccess"
575                  << Access << "\n");
576     } else {
577       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
578       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
579                         << Access << "\n");
580 
581       // { ValInst[] }
582       isl::space ValInstSpace = ExpectedVal.get_space().range();
583 
584       // After adding a new load to the SCoP, also update the Known content
585       // about it. The new load will have a known ValInst of
586       // { [DomainTarget[] -> Value[]] }
587       // but which -- because it is a copy of it -- has same value as the
588       // { [DomainDef[] -> Value[]] }
589       // that it replicates. Instead of  cloning the known content of
590       // [DomainDef[] -> Value[]]
591       // for DomainTarget[], we add a 'translator' that maps
592       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
593       // before comparing to the known content.
594       // TODO: 'Translator' could also be used to map PHINodes to their incoming
595       // ValInsts.
596       if (ValInstSpace.is_wrapping()) {
597         // { DefDomain[] -> Value[] }
598         isl::map ValInsts = ExpectedVal.range().unwrap();
599 
600         // { DefDomain[] }
601         isl::set DefDomain = ValInsts.domain();
602 
603         // { Value[] }
604         isl::space ValSpace = ValInstSpace.unwrap().range();
605 
606         // { Value[] -> Value[] }
607         isl::map ValToVal =
608             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
609 
610         // { DomainDef[] -> DomainTarget[] }
611         isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
612 
613         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
614         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
615 
616         Translator = Translator.add_map(LocalTranslator);
617         LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
618                           << "\n");
619       }
620     }
621     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
622                       << "\n");
623     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
624                       << "\n");
625     assert(Access);
626 
627     NumKnownLoadsForwarded++;
628     TotalKnownLoadsForwarded++;
629     return FD_DidForwardTree;
630   }
631 
632   /// Forward a scalar by redirecting the access to an array element that stores
633   /// the same value.
634   ///
635   /// @param TargetStmt  The statement the operand tree will be copied to.
636   /// @param Inst        The scalar to forward.
637   /// @param UseStmt     The statement that uses @p Inst.
638   /// @param UseLoop     The loop @p Inst is used in.
639   /// @param DefStmt     The statement @p Inst is defined in.
640   /// @param DefLoop     The loop which contains @p Inst.
641   /// @param DoIt        If false, only determine whether an operand tree can be
642   ///                    forwarded. If true, carry out the forwarding. Do not
643   ///                    use DoIt==true if an operand tree is not known to be
644   ///                    forwardable.
645   ///
646   /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
647   ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
648   ///         FD_CanForwardProfitably if @p Inst has been reloaded.
649   ///         FD_DidForwardLeaf       if @p DoIt was true.
650   ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
651                                         ScopStmt *UseStmt, Loop *UseLoop,
652                                         ScopStmt *DefStmt, Loop *DefLoop,
653                                         bool DoIt) {
654     // Cannot do anything without successful known analysis.
655     if (Known.is_null() || Translator.is_null() ||
656         MaxOpGuard.hasQuotaExceeded())
657       return FD_NotApplicable;
658 
659     MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
660     if (Access && Access->isLatestArrayKind()) {
661       if (DoIt)
662         return FD_DidForwardLeaf;
663       return FD_CanForwardLeaf;
664     }
665 
666     // Don't spend too much time analyzing whether it can be reloaded. When
667     // carrying-out the forwarding, we cannot bail-out in the middle of the
668     // transformation. It also shouldn't take as long because some results are
669     // cached.
670     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
671 
672     // { DomainDef[] -> ValInst[] }
673     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
674 
675     // { DomainUse[] -> DomainTarget[] }
676     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
677 
678     // { DomainTarget[] -> ValInst[] }
679     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
680     isl::union_map TranslatedExpectedVal =
681         TargetExpectedVal.apply_range(Translator);
682 
683     // { DomainTarget[] -> Element[] }
684     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
685 
686     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
687     if (!SameVal)
688       return FD_NotApplicable;
689 
690     if (!DoIt)
691       return FD_CanForwardProfitably;
692 
693     if (!Access)
694       Access = TargetStmt->ensureValueRead(Inst);
695 
696     simplify(SameVal);
697     Access->setNewAccessRelation(SameVal);
698 
699     TotalReloads++;
700     NumReloads++;
701     return FD_DidForwardLeaf;
702   }
703 
704   /// Forwards a speculatively executable instruction.
705   ///
706   /// @param TargetStmt  The statement the operand tree will be copied to.
707   /// @param UseInst     The (possibly speculatable) instruction to forward.
708   /// @param DefStmt     The statement @p UseInst is defined in.
709   /// @param DefLoop     The loop which contains @p UseInst.
710   /// @param DoIt        If false, only determine whether an operand tree can be
711   ///                    forwarded. If true, carry out the forwarding. Do not
712   ///                    use DoIt==true if an operand tree is not known to be
713   ///                    forwardable.
714   ///
715   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
716   ///         FD_CannotForward  if one of @p UseInst's operands is not
717   ///                           forwardable.
718   ///         FD_CanForwardTree if @p UseInst is forwardable.
719   ///         FD_DidForward     if @p DoIt was true.
720   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
721                                          Instruction *UseInst,
722                                          ScopStmt *DefStmt, Loop *DefLoop,
723                                          bool DoIt) {
724     // PHIs, unless synthesizable, are not yet supported.
725     if (isa<PHINode>(UseInst))
726       return FD_NotApplicable;
727 
728     // Compatible instructions must satisfy the following conditions:
729     // 1. Idempotent (instruction will be copied, not moved; although its
730     //    original instance might be removed by simplification)
731     // 2. Not access memory (There might be memory writes between)
732     // 3. Not cause undefined behaviour (we might copy to a location when the
733     //    original instruction was no executed; this is currently not possible
734     //    because we do not forward PHINodes)
735     // 4. Not leak memory if executed multiple times (i.e. malloc)
736     //
737     // Instruction::mayHaveSideEffects is not sufficient because it considers
738     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
739     // not sufficient because it allows memory accesses.
740     if (mayBeMemoryDependent(*UseInst))
741       return FD_NotApplicable;
742 
743     if (DoIt) {
744       // To ensure the right order, prepend this instruction before its
745       // operands. This ensures that its operands are inserted before the
746       // instruction using them.
747       // TODO: The operand tree is not really a tree, but a DAG. We should be
748       // able to handle DAGs without duplication.
749       TargetStmt->prependInstruction(UseInst);
750       NumInstructionsCopied++;
751       TotalInstructionsCopied++;
752     }
753 
754     for (Value *OpVal : UseInst->operand_values()) {
755       ForwardingDecision OpDecision =
756           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
757       switch (OpDecision) {
758       case FD_CannotForward:
759         assert(!DoIt);
760         return FD_CannotForward;
761 
762       case FD_CanForwardLeaf:
763       case FD_CanForwardProfitably:
764         assert(!DoIt);
765         break;
766 
767       case FD_DidForwardLeaf:
768       case FD_DidForwardTree:
769         assert(DoIt);
770         break;
771 
772       case FD_NotApplicable:
773         llvm_unreachable("forwardTree should never return FD_NotApplicable");
774       }
775     }
776 
777     if (DoIt)
778       return FD_DidForwardTree;
779     return FD_CanForwardProfitably;
780   }
781 
782   /// Determines whether an operand tree can be forwarded or carries out a
783   /// forwarding, depending on the @p DoIt flag.
784   ///
785   /// @param TargetStmt  The statement the operand tree will be copied to.
786   /// @param UseVal      The value (usually an instruction) which is root of an
787   ///                    operand tree.
788   /// @param UseStmt     The statement that uses @p UseVal.
789   /// @param UseLoop     The loop @p UseVal is used in.
790   /// @param DoIt        If false, only determine whether an operand tree can be
791   ///                    forwarded. If true, carry out the forwarding. Do not
792   ///                    use DoIt==true if an operand tree is not known to be
793   ///                    forwardable.
794   ///
795   /// @return If DoIt==false, return whether the operand tree can be forwarded.
796   ///         If DoIt==true, return FD_DidForward.
797   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
798                                  ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
799     ScopStmt *DefStmt = nullptr;
800     Loop *DefLoop = nullptr;
801 
802     // { DefDomain[] -> TargetDomain[] }
803     isl::map DefToTarget;
804 
805     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
806     switch (VUse.getKind()) {
807     case VirtualUse::Constant:
808     case VirtualUse::Block:
809     case VirtualUse::Hoisted:
810       // These can be used anywhere without special considerations.
811       if (DoIt)
812         return FD_DidForwardTree;
813       return FD_CanForwardLeaf;
814 
815     case VirtualUse::Synthesizable: {
816       // ScopExpander will take care for of generating the code at the new
817       // location.
818       if (DoIt)
819         return FD_DidForwardTree;
820 
821       // Check if the value is synthesizable at the new location as well. This
822       // might be possible when leaving a loop for which ScalarEvolution is
823       // unable to derive the exit value for.
824       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
825       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
826       // do not forward past its loop header. This would require us to use a
827       // previous loop induction variable instead the current one. We currently
828       // do not allow forwarding PHI nodes, thus this should never occur (the
829       // only exception where no phi is necessary being an unreachable loop
830       // without edge from the outside).
831       VirtualUse TargetUse = VirtualUse::create(
832           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
833       if (TargetUse.getKind() == VirtualUse::Synthesizable)
834         return FD_CanForwardLeaf;
835 
836       LLVM_DEBUG(
837           dbgs() << "    Synthesizable would not be synthesizable anymore: "
838                  << *UseVal << "\n");
839       return FD_CannotForward;
840     }
841 
842     case VirtualUse::ReadOnly:
843       // Note that we cannot return FD_CanForwardTree here. With a operand tree
844       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
845       // With -polly-analyze-read-only-scalars=true we would ensure the
846       // existence of a MemoryAccess (which already exists for a leaf) and be
847       // removed again by tryForwardTree because it's goal is to remove this
848       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
849       // to do so.
850       if (!DoIt)
851         return FD_CanForwardLeaf;
852 
853       // If we model read-only scalars, we need to create a MemoryAccess for it.
854       if (ModelReadOnlyScalars)
855         TargetStmt->ensureValueRead(UseVal);
856 
857       NumReadOnlyCopied++;
858       TotalReadOnlyCopied++;
859       return FD_DidForwardLeaf;
860 
861     case VirtualUse::Intra:
862       // Knowing that UseStmt and DefStmt are the same statement instance, just
863       // reuse the information about UseStmt for DefStmt
864       DefStmt = UseStmt;
865 
866       LLVM_FALLTHROUGH;
867     case VirtualUse::Inter:
868       Instruction *Inst = cast<Instruction>(UseVal);
869 
870       if (!DefStmt) {
871         DefStmt = S->getStmtFor(Inst);
872         if (!DefStmt)
873           return FD_CannotForward;
874       }
875 
876       DefLoop = LI->getLoopFor(Inst->getParent());
877 
878       ForwardingDecision SpeculativeResult =
879           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
880       if (SpeculativeResult != FD_NotApplicable)
881         return SpeculativeResult;
882 
883       ForwardingDecision KnownResult = forwardKnownLoad(
884           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
885       if (KnownResult != FD_NotApplicable)
886         return KnownResult;
887 
888       ForwardingDecision ReloadResult = reloadKnownContent(
889           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
890       if (ReloadResult != FD_NotApplicable)
891         return ReloadResult;
892 
893       // When no method is found to forward the operand tree, we effectively
894       // cannot handle it.
895       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
896       return FD_CannotForward;
897     }
898 
899     llvm_unreachable("Case unhandled");
900   }
901 
902   /// Try to forward an operand tree rooted in @p RA.
903   bool tryForwardTree(MemoryAccess *RA) {
904     assert(RA->isLatestScalarKind());
905     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
906 
907     ScopStmt *Stmt = RA->getStatement();
908     Loop *InLoop = Stmt->getSurroundingLoop();
909 
910     isl::map TargetToUse;
911     if (!Known.is_null()) {
912       isl::space DomSpace = Stmt->getDomainSpace();
913       TargetToUse =
914           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
915     }
916 
917     ForwardingDecision Assessment =
918         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
919     assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
920     if (Assessment != FD_CanForwardProfitably)
921       return false;
922 
923     ForwardingDecision Execution =
924         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
925     assert(((Execution == FD_DidForwardTree) ||
926             (Execution == FD_DidForwardLeaf)) &&
927            "A previous positive assessment must also be executable");
928 
929     if (Execution == FD_DidForwardTree)
930       Stmt->removeSingleMemoryAccess(RA);
931     return true;
932   }
933 
934   /// Return which SCoP this instance is processing.
935   Scop *getScop() const { return S; }
936 
937   /// Run the algorithm: Use value read accesses as operand tree roots and try
938   /// to forward them into the statement.
939   bool forwardOperandTrees() {
940     for (ScopStmt &Stmt : *S) {
941       bool StmtModified = false;
942 
943       // Because we are modifying the MemoryAccess list, collect them first to
944       // avoid iterator invalidation.
945       SmallVector<MemoryAccess *, 16> Accs;
946       for (MemoryAccess *RA : Stmt) {
947         if (!RA->isRead())
948           continue;
949         if (!RA->isLatestScalarKind())
950           continue;
951 
952         Accs.push_back(RA);
953       }
954 
955       for (MemoryAccess *RA : Accs) {
956         if (tryForwardTree(RA)) {
957           Modified = true;
958           StmtModified = true;
959           NumForwardedTrees++;
960           TotalForwardedTrees++;
961         }
962       }
963 
964       if (StmtModified) {
965         NumModifiedStmts++;
966         TotalModifiedStmts++;
967       }
968     }
969 
970     if (Modified)
971       ScopsModified++;
972     return Modified;
973   }
974 
975   /// Print the pass result, performed transformations and the SCoP after the
976   /// transformation.
977   void print(raw_ostream &OS, int Indent = 0) {
978     printStatistics(OS, Indent);
979 
980     if (!Modified) {
981       // This line can easily be checked in regression tests.
982       OS << "ForwardOpTree executed, but did not modify anything\n";
983       return;
984     }
985 
986     printStatements(OS, Indent);
987   }
988 };
989 
990 /// Pass that redirects scalar reads to array elements that are known to contain
991 /// the same value.
992 ///
993 /// This reduces the number of scalar accesses and therefore potentially
994 /// increases the freedom of the scheduler. In the ideal case, all reads of a
995 /// scalar definition are redirected (We currently do not care about removing
996 /// the write in this case).  This is also useful for the main DeLICM pass as
997 /// there are less scalars to be mapped.
998 class ForwardOpTree : public ScopPass {
999 private:
1000   /// The pass implementation, also holding per-scop data.
1001   std::unique_ptr<ForwardOpTreeImpl> Impl;
1002 
1003 public:
1004   static char ID;
1005 
1006   explicit ForwardOpTree() : ScopPass(ID) {}
1007   ForwardOpTree(const ForwardOpTree &) = delete;
1008   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
1009 
1010   void getAnalysisUsage(AnalysisUsage &AU) const override {
1011     AU.addRequiredTransitive<ScopInfoRegionPass>();
1012     AU.addRequired<LoopInfoWrapperPass>();
1013     AU.setPreservesAll();
1014   }
1015 
1016   bool runOnScop(Scop &S) override {
1017     // Free resources for previous SCoP's computation, if not yet done.
1018     releaseMemory();
1019 
1020     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1021 
1022     {
1023       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
1024       Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
1025 
1026       if (AnalyzeKnown) {
1027         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1028         Impl->computeKnownValues();
1029       }
1030 
1031       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1032       Impl->forwardOperandTrees();
1033 
1034       if (MaxOpGuard.hasQuotaExceeded()) {
1035         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1036                              "max_operations exceeded\n");
1037         KnownOutOfQuota++;
1038       }
1039     }
1040 
1041     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1042     LLVM_DEBUG(dbgs() << S);
1043 
1044     // Update statistics
1045     auto ScopStats = S.getStatistics();
1046     NumValueWrites += ScopStats.NumValueWrites;
1047     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1048     NumPHIWrites += ScopStats.NumPHIWrites;
1049     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1050     NumSingletonWrites += ScopStats.NumSingletonWrites;
1051     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1052 
1053     return false;
1054   }
1055 
1056   void printScop(raw_ostream &OS, Scop &S) const override {
1057     if (!Impl)
1058       return;
1059 
1060     assert(Impl->getScop() == &S);
1061     Impl->print(OS);
1062   }
1063 
1064   void releaseMemory() override { Impl.reset(); }
1065 }; // class ForwardOpTree
1066 
1067 char ForwardOpTree::ID;
1068 } // namespace
1069 
1070 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1071 
1072 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1073                       "Polly - Forward operand tree", false, false)
1074 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1075 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1076                     "Polly - Forward operand tree", false, false)
1077