1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "isl/ctx.h"
38 #include "isl/isl-noexceptions.h"
39 #include <cassert>
40 #include <memory>
41 
42 #define DEBUG_TYPE "polly-optree"
43 
44 using namespace llvm;
45 using namespace polly;
46 
47 static cl::opt<bool>
48     AnalyzeKnown("polly-optree-analyze-known",
49                  cl::desc("Analyze array contents for load forwarding"),
50                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
51 
52 static cl::opt<bool>
53     NormalizePHIs("polly-optree-normalize-phi",
54                   cl::desc("Replace PHIs by their incoming values"),
55                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
56 
57 static cl::opt<unsigned>
58     MaxOps("polly-optree-max-ops",
59            cl::desc("Maximum number of ISL operations to invest for known "
60                     "analysis; 0=no limit"),
61            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
62 
63 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
64 STATISTIC(KnownOutOfQuota,
65           "Analyses aborted because max_operations was reached");
66 
67 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
68 STATISTIC(TotalKnownLoadsForwarded,
69           "Number of forwarded loads because their value was known");
70 STATISTIC(TotalReloads, "Number of reloaded values");
71 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
72 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
73 STATISTIC(TotalModifiedStmts,
74           "Number of statements with at least one forwarded tree");
75 
76 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
77 
78 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
79 STATISTIC(NumValueWritesInLoops,
80           "Number of scalar value writes nested in affine loops after OpTree");
81 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
82 STATISTIC(NumPHIWritesInLoops,
83           "Number of scalar phi writes nested in affine loops after OpTree");
84 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
85 STATISTIC(NumSingletonWritesInLoops,
86           "Number of singleton writes nested in affine loops after OpTree");
87 
88 namespace {
89 
90 /// The state of whether an operand tree was/can be forwarded.
91 ///
92 /// The items apply to an instructions and its operand tree with the instruction
93 /// as the root element. If the value in question is not an instruction in the
94 /// SCoP, it can be a leaf of an instruction's operand tree.
95 enum ForwardingDecision {
96   /// The root instruction or value cannot be forwarded at all.
97   FD_CannotForward,
98 
99   /// The root instruction or value can be forwarded as a leaf of a larger
100   /// operand tree.
101   /// It does not make sense to move the value itself, it would just replace it
102   /// by a use of itself. For instance, a constant "5" used in a statement can
103   /// be forwarded, but it would just replace it by the same constant "5".
104   /// However, it makes sense to move as an operand of
105   ///
106   ///   %add = add 5, 5
107   ///
108   /// where "5" is moved as part of a larger operand tree. "5" would be placed
109   /// (disregarding for a moment that literal constants don't have a location
110   /// and can be used anywhere) into the same statement as %add would.
111   FD_CanForwardLeaf,
112 
113   /// The root instruction can be forwarded and doing so avoids a scalar
114   /// dependency.
115   ///
116   /// This can be either because the operand tree can be moved to the target
117   /// statement, or a memory access is redirected to read from a different
118   /// location.
119   FD_CanForwardProfitably,
120 
121   /// Used to indicate that a forwarding has be carried out successfully, and
122   /// the forwarded memory access can be deleted.
123   FD_DidForwardTree,
124 
125   /// Used to indicate that a forwarding has be carried out successfully, and
126   /// the forwarded memory access is being reused.
127   FD_DidForwardLeaf,
128 
129   /// A forwarding method cannot be applied to the operand tree.
130   /// The difference to FD_CannotForward is that there might be other methods
131   /// that can handle it.
132   /// The conditions that make an operand tree applicable must be checked even
133   /// with DoIt==true because a method following the one that returned
134   /// FD_NotApplicable might have returned FD_CanForwardTree.
135   FD_NotApplicable
136 };
137 
138 /// Implementation of operand tree forwarding for a specific SCoP.
139 ///
140 /// For a statement that requires a scalar value (through a value read
141 /// MemoryAccess), see if its operand can be moved into the statement. If so,
142 /// the MemoryAccess is removed and the all the operand tree instructions are
143 /// moved into the statement. All original instructions are left in the source
144 /// statements. The simplification pass can clean these up.
145 class ForwardOpTreeImpl : ZoneAlgorithm {
146 private:
147   /// Scope guard to limit the number of isl operations for this pass.
148   IslMaxOperationsGuard &MaxOpGuard;
149 
150   /// How many instructions have been copied to other statements.
151   int NumInstructionsCopied = 0;
152 
153   /// Number of loads forwarded because their value was known.
154   int NumKnownLoadsForwarded = 0;
155 
156   /// Number of values reloaded from known array elements.
157   int NumReloads = 0;
158 
159   /// How many read-only accesses have been copied.
160   int NumReadOnlyCopied = 0;
161 
162   /// How many operand trees have been forwarded.
163   int NumForwardedTrees = 0;
164 
165   /// Number of statements with at least one forwarded operand tree.
166   int NumModifiedStmts = 0;
167 
168   /// Whether we carried out at least one change to the SCoP.
169   bool Modified = false;
170 
171   /// Contains the zones where array elements are known to contain a specific
172   /// value.
173   /// { [Element[] -> Zone[]] -> ValInst[] }
174   /// @see computeKnown()
175   isl::union_map Known;
176 
177   /// Translator for newly introduced ValInsts to already existing ValInsts such
178   /// that new introduced load instructions can reuse the Known analysis of its
179   /// original load. { ValInst[] -> ValInst[] }
180   isl::union_map Translator;
181 
182   /// Get list of array elements that do contain the same ValInst[] at Domain[].
183   ///
184   /// @param ValInst { Domain[] -> ValInst[] }
185   ///                The values for which we search for alternative locations,
186   ///                per statement instance.
187   ///
188   /// @return { Domain[] -> Element[] }
189   ///         For each statement instance, the array elements that contain the
190   ///         same ValInst.
191   isl::union_map findSameContentElements(isl::union_map ValInst) {
192     assert(!ValInst.is_single_valued().is_false());
193 
194     // { Domain[] }
195     isl::union_set Domain = ValInst.domain();
196 
197     // { Domain[] -> Scatter[] }
198     isl::union_map Schedule = getScatterFor(Domain);
199 
200     // { Element[] -> [Scatter[] -> ValInst[]] }
201     isl::union_map MustKnownCurried =
202         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
203 
204     // { [Domain[] -> ValInst[]] -> Scatter[] }
205     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
206 
207     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
208     isl::union_map SchedValDomVal =
209         DomValSched.range_product(ValInst.range_map()).reverse();
210 
211     // { Element[] -> [Domain[] -> ValInst[]] }
212     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
213 
214     // { Domain[] -> Element[] }
215     isl::union_map MustKnownMap =
216         MustKnownInst.uncurry().domain().unwrap().reverse();
217     simplify(MustKnownMap);
218 
219     return MustKnownMap;
220   }
221 
222   /// Find a single array element for each statement instance, within a single
223   /// array.
224   ///
225   /// @param MustKnown { Domain[] -> Element[] }
226   ///                  Set of candidate array elements.
227   /// @param Domain    { Domain[] }
228   ///                  The statement instance for which we need elements for.
229   ///
230   /// @return { Domain[] -> Element[] }
231   ///         For each statement instance, an array element out of @p MustKnown.
232   ///         All array elements must be in the same array (Polly does not yet
233   ///         support reading from different accesses using the same
234   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
235   ///         null.
236   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
237     // { Domain[] -> Element[] }
238     isl::map Result;
239 
240     // MemoryAccesses can read only elements from a single array
241     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
242     // Look through all spaces until we find one that contains at least the
243     // wanted statement instance.s
244     for (isl::map Map : MustKnown.get_map_list()) {
245       // Get the array this is accessing.
246       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
247       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
248 
249       // No support for generation of indirect array accesses.
250       if (SAI->getBasePtrOriginSAI())
251         continue;
252 
253       // Determine whether this map contains all wanted values.
254       isl::set MapDom = Map.domain();
255       if (!Domain.is_subset(MapDom).is_true())
256         continue;
257 
258       // There might be multiple array elements that contain the same value, but
259       // choose only one of them. lexmin is used because it returns a one-value
260       // mapping, we do not care about which one.
261       // TODO: Get the simplest access function.
262       Result = Map.lexmin();
263       break;
264     }
265 
266     return Result;
267   }
268 
269 public:
270   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
271       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
272 
273   /// Compute the zones of known array element contents.
274   ///
275   /// @return True if the computed #Known is usable.
276   bool computeKnownValues() {
277     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
278 
279     // Check that nothing strange occurs.
280     collectCompatibleElts();
281 
282     {
283       IslQuotaScope QuotaScope = MaxOpGuard.enter();
284 
285       computeCommon();
286       if (NormalizePHIs)
287         computeNormalizedPHIs();
288       Known = computeKnown(true, true);
289 
290       // Preexisting ValInsts use the known content analysis of themselves.
291       Translator = makeIdentityMap(Known.range(), false);
292     }
293 
294     if (!Known || !Translator || !NormalizeMap) {
295       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
296       Known = nullptr;
297       Translator = nullptr;
298       NormalizeMap = nullptr;
299       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
300       return false;
301     }
302 
303     KnownAnalyzed++;
304     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
305 
306     return true;
307   }
308 
309   void printStatistics(raw_ostream &OS, int Indent = 0) {
310     OS.indent(Indent) << "Statistics {\n";
311     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
312                           << '\n';
313     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
314                           << '\n';
315     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
316     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
317                           << '\n';
318     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
319                           << '\n';
320     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
321                           << NumModifiedStmts << '\n';
322     OS.indent(Indent) << "}\n";
323   }
324 
325   void printStatements(raw_ostream &OS, int Indent = 0) const {
326     OS.indent(Indent) << "After statements {\n";
327     for (auto &Stmt : *S) {
328       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
329       for (auto *MA : Stmt)
330         MA->print(OS);
331 
332       OS.indent(Indent + 12);
333       Stmt.printInstructions(OS);
334     }
335     OS.indent(Indent) << "}\n";
336   }
337 
338   /// Create a new MemoryAccess of type read and MemoryKind::Array.
339   ///
340   /// @param Stmt           The statement in which the access occurs.
341   /// @param LI             The instruction that does the access.
342   /// @param AccessRelation The array element that each statement instance
343   ///                       accesses.
344   ///
345   /// @param The newly created access.
346   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
347                                     isl::map AccessRelation) {
348     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
349     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
350 
351     // Create a dummy SCEV access, to be replaced anyway.
352     SmallVector<const SCEV *, 4> Sizes;
353     Sizes.reserve(SAI->getNumberOfDimensions());
354     SmallVector<const SCEV *, 4> Subscripts;
355     Subscripts.reserve(SAI->getNumberOfDimensions());
356     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
357       Sizes.push_back(SAI->getDimensionSize(i));
358       Subscripts.push_back(nullptr);
359     }
360 
361     MemoryAccess *Access =
362         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
363                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
364     S->addAccessFunction(Access);
365     Stmt->addAccess(Access, true);
366 
367     Access->setNewAccessRelation(AccessRelation);
368 
369     return Access;
370   }
371 
372   /// Forward a load by reading from an array element that contains the same
373   /// value. Typically the location it was loaded from.
374   ///
375   /// @param TargetStmt  The statement the operand tree will be copied to.
376   /// @param Inst        The (possibly speculatable) instruction to forward.
377   /// @param UseStmt     The statement that uses @p Inst.
378   /// @param UseLoop     The loop @p Inst is used in.
379   /// @param DefStmt     The statement @p Inst is defined in.
380   /// @param DefLoop     The loop which contains @p Inst.
381   /// @param DoIt        If false, only determine whether an operand tree can be
382   ///                    forwarded. If true, carry out the forwarding. Do not
383   ///                    use DoIt==true if an operand tree is not known to be
384   ///                    forwardable.
385   ///
386   /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
387   ///                           load.
388   ///         FD_CannotForward  if the pointer operand cannot be forwarded.
389   ///         FD_CanForwardProfitably if @p Inst is forwardable.
390   ///         FD_DidForwardTree if @p DoIt was true.
391   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
392                                       ScopStmt *UseStmt, Loop *UseLoop,
393                                       ScopStmt *DefStmt, Loop *DefLoop,
394                                       bool DoIt) {
395     // Cannot do anything without successful known analysis.
396     if (Known.is_null() || Translator.is_null() ||
397         MaxOpGuard.hasQuotaExceeded())
398       return FD_NotApplicable;
399 
400     LoadInst *LI = dyn_cast<LoadInst>(Inst);
401     if (!LI)
402       return FD_NotApplicable;
403 
404     // If the load is already in the statement, no forwarding is necessary.
405     // However, it might happen that the LoadInst is already present in the
406     // statement's instruction list. In that case we do as follows:
407     // - For the evaluation (DoIt==false), we can trivially forward it as it is
408     //   benefit of forwarding an already present instruction.
409     // - For the execution (DoIt==true), prepend the instruction (to make it
410     //   available to all instructions following in the instruction list), but
411     //   do not add another MemoryAccess.
412     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
413     if (Access && !DoIt)
414       return FD_CanForwardProfitably;
415 
416     ForwardingDecision OpDecision = forwardTree(
417         TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
418     switch (OpDecision) {
419     case FD_CannotForward:
420       assert(!DoIt);
421       return OpDecision;
422 
423     case FD_CanForwardLeaf:
424     case FD_CanForwardProfitably:
425       assert(!DoIt);
426       break;
427 
428     case FD_DidForwardLeaf:
429     case FD_DidForwardTree:
430       assert(DoIt);
431       break;
432 
433     default:
434       llvm_unreachable("Shouldn't return this");
435     }
436 
437     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
438 
439     // { DomainDef[] -> ValInst[] }
440     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
441     assert(!isNormalized(ExpectedVal).is_false() &&
442            "LoadInsts are always normalized");
443 
444     // { DomainUse[] -> DomainTarget[] }
445     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
446 
447     // { DomainTarget[] -> ValInst[] }
448     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
449     isl::union_map TranslatedExpectedVal =
450         isl::union_map(TargetExpectedVal).apply_range(Translator);
451 
452     // { DomainTarget[] -> Element[] }
453     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
454 
455     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
456     if (!SameVal)
457       return FD_NotApplicable;
458 
459     if (DoIt)
460       TargetStmt->prependInstruction(LI);
461 
462     if (!DoIt)
463       return FD_CanForwardProfitably;
464 
465     if (Access) {
466       LLVM_DEBUG(
467           dbgs() << "    forwarded known load with preexisting MemoryAccess"
468                  << Access << "\n");
469     } else {
470       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
471       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
472                         << Access << "\n");
473 
474       // { ValInst[] }
475       isl::space ValInstSpace = ExpectedVal.get_space().range();
476 
477       // After adding a new load to the SCoP, also update the Known content
478       // about it. The new load will have a known ValInst of
479       // { [DomainTarget[] -> Value[]] }
480       // but which -- because it is a copy of it -- has same value as the
481       // { [DomainDef[] -> Value[]] }
482       // that it replicates. Instead of  cloning the known content of
483       // [DomainDef[] -> Value[]]
484       // for DomainTarget[], we add a 'translator' that maps
485       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
486       // before comparing to the known content.
487       // TODO: 'Translator' could also be used to map PHINodes to their incoming
488       // ValInsts.
489       if (ValInstSpace.is_wrapping()) {
490         // { DefDomain[] -> Value[] }
491         isl::map ValInsts = ExpectedVal.range().unwrap();
492 
493         // { DefDomain[] }
494         isl::set DefDomain = ValInsts.domain();
495 
496         // { Value[] }
497         isl::space ValSpace = ValInstSpace.unwrap().range();
498 
499         // { Value[] -> Value[] }
500         isl::map ValToVal =
501             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
502 
503         // { DomainDef[] -> DomainTarget[] }
504         isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
505 
506         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
507         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
508 
509         Translator = Translator.add_map(LocalTranslator);
510         LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
511                           << "\n");
512       }
513     }
514     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
515                       << "\n");
516     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
517                       << "\n");
518     assert(Access);
519 
520     NumKnownLoadsForwarded++;
521     TotalKnownLoadsForwarded++;
522     return FD_DidForwardTree;
523   }
524 
525   /// Forward a scalar by redirecting the access to an array element that stores
526   /// the same value.
527   ///
528   /// @param TargetStmt  The statement the operand tree will be copied to.
529   /// @param Inst        The scalar to forward.
530   /// @param UseStmt     The statement that uses @p Inst.
531   /// @param UseLoop     The loop @p Inst is used in.
532   /// @param DefStmt     The statement @p Inst is defined in.
533   /// @param DefLoop     The loop which contains @p Inst.
534   /// @param DoIt        If false, only determine whether an operand tree can be
535   ///                    forwarded. If true, carry out the forwarding. Do not
536   ///                    use DoIt==true if an operand tree is not known to be
537   ///                    forwardable.
538   ///
539   /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
540   ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
541   ///         FD_CanForwardProfitably if @p Inst has been reloaded.
542   ///         FD_DidForwardLeaf       if @p DoIt was true.
543   ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
544                                         ScopStmt *UseStmt, Loop *UseLoop,
545                                         ScopStmt *DefStmt, Loop *DefLoop,
546                                         bool DoIt) {
547     // Cannot do anything without successful known analysis.
548     if (Known.is_null() || Translator.is_null() ||
549         MaxOpGuard.hasQuotaExceeded())
550       return FD_NotApplicable;
551 
552     MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
553     if (Access && Access->isLatestArrayKind()) {
554       if (DoIt)
555         return FD_DidForwardLeaf;
556       return FD_CanForwardLeaf;
557     }
558 
559     // Don't spend too much time analyzing whether it can be reloaded. When
560     // carrying-out the forwarding, we cannot bail-out in the middle of the
561     // transformation. It also shouldn't take as long because some results are
562     // cached.
563     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
564 
565     // { DomainDef[] -> ValInst[] }
566     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
567 
568     // { DomainUse[] -> DomainTarget[] }
569     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
570 
571     // { DomainTarget[] -> ValInst[] }
572     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
573     isl::union_map TranslatedExpectedVal =
574         TargetExpectedVal.apply_range(Translator);
575 
576     // { DomainTarget[] -> Element[] }
577     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
578 
579     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
580     if (!SameVal)
581       return FD_NotApplicable;
582 
583     if (!DoIt)
584       return FD_CanForwardProfitably;
585 
586     if (!Access)
587       Access = TargetStmt->ensureValueRead(Inst);
588 
589     simplify(SameVal);
590     Access->setNewAccessRelation(SameVal);
591 
592     TotalReloads++;
593     NumReloads++;
594     return FD_DidForwardLeaf;
595   }
596 
597   /// Forwards a speculatively executable instruction.
598   ///
599   /// @param TargetStmt  The statement the operand tree will be copied to.
600   /// @param UseInst     The (possibly speculatable) instruction to forward.
601   /// @param DefStmt     The statement @p UseInst is defined in.
602   /// @param DefLoop     The loop which contains @p UseInst.
603   /// @param DoIt        If false, only determine whether an operand tree can be
604   ///                    forwarded. If true, carry out the forwarding. Do not
605   ///                    use DoIt==true if an operand tree is not known to be
606   ///                    forwardable.
607   ///
608   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
609   ///         FD_CannotForward  if one of @p UseInst's operands is not
610   ///                           forwardable.
611   ///         FD_CanForwardTree if @p UseInst is forwardable.
612   ///         FD_DidForward     if @p DoIt was true.
613   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
614                                          Instruction *UseInst,
615                                          ScopStmt *DefStmt, Loop *DefLoop,
616                                          bool DoIt) {
617     // PHIs, unless synthesizable, are not yet supported.
618     if (isa<PHINode>(UseInst))
619       return FD_NotApplicable;
620 
621     // Compatible instructions must satisfy the following conditions:
622     // 1. Idempotent (instruction will be copied, not moved; although its
623     //    original instance might be removed by simplification)
624     // 2. Not access memory (There might be memory writes between)
625     // 3. Not cause undefined behaviour (we might copy to a location when the
626     //    original instruction was no executed; this is currently not possible
627     //    because we do not forward PHINodes)
628     // 4. Not leak memory if executed multiple times (i.e. malloc)
629     //
630     // Instruction::mayHaveSideEffects is not sufficient because it considers
631     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
632     // not sufficient because it allows memory accesses.
633     if (mayBeMemoryDependent(*UseInst))
634       return FD_NotApplicable;
635 
636     if (DoIt) {
637       // To ensure the right order, prepend this instruction before its
638       // operands. This ensures that its operands are inserted before the
639       // instruction using them.
640       // TODO: The operand tree is not really a tree, but a DAG. We should be
641       // able to handle DAGs without duplication.
642       TargetStmt->prependInstruction(UseInst);
643       NumInstructionsCopied++;
644       TotalInstructionsCopied++;
645     }
646 
647     for (Value *OpVal : UseInst->operand_values()) {
648       ForwardingDecision OpDecision =
649           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
650       switch (OpDecision) {
651       case FD_CannotForward:
652         assert(!DoIt);
653         return FD_CannotForward;
654 
655       case FD_CanForwardLeaf:
656       case FD_CanForwardProfitably:
657         assert(!DoIt);
658         break;
659 
660       case FD_DidForwardLeaf:
661       case FD_DidForwardTree:
662         assert(DoIt);
663         break;
664 
665       case FD_NotApplicable:
666         llvm_unreachable("forwardTree should never return FD_NotApplicable");
667       }
668     }
669 
670     if (DoIt)
671       return FD_DidForwardTree;
672     return FD_CanForwardProfitably;
673   }
674 
675   /// Determines whether an operand tree can be forwarded or carries out a
676   /// forwarding, depending on the @p DoIt flag.
677   ///
678   /// @param TargetStmt  The statement the operand tree will be copied to.
679   /// @param UseVal      The value (usually an instruction) which is root of an
680   ///                    operand tree.
681   /// @param UseStmt     The statement that uses @p UseVal.
682   /// @param UseLoop     The loop @p UseVal is used in.
683   /// @param DoIt        If false, only determine whether an operand tree can be
684   ///                    forwarded. If true, carry out the forwarding. Do not
685   ///                    use DoIt==true if an operand tree is not known to be
686   ///                    forwardable.
687   ///
688   /// @return If DoIt==false, return whether the operand tree can be forwarded.
689   ///         If DoIt==true, return FD_DidForward.
690   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
691                                  ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
692     ScopStmt *DefStmt = nullptr;
693     Loop *DefLoop = nullptr;
694 
695     // { DefDomain[] -> TargetDomain[] }
696     isl::map DefToTarget;
697 
698     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
699     switch (VUse.getKind()) {
700     case VirtualUse::Constant:
701     case VirtualUse::Block:
702     case VirtualUse::Hoisted:
703       // These can be used anywhere without special considerations.
704       if (DoIt)
705         return FD_DidForwardTree;
706       return FD_CanForwardLeaf;
707 
708     case VirtualUse::Synthesizable: {
709       // ScopExpander will take care for of generating the code at the new
710       // location.
711       if (DoIt)
712         return FD_DidForwardTree;
713 
714       // Check if the value is synthesizable at the new location as well. This
715       // might be possible when leaving a loop for which ScalarEvolution is
716       // unable to derive the exit value for.
717       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
718       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
719       // do not forward past its loop header. This would require us to use a
720       // previous loop induction variable instead the current one. We currently
721       // do not allow forwarding PHI nodes, thus this should never occur (the
722       // only exception where no phi is necessary being an unreachable loop
723       // without edge from the outside).
724       VirtualUse TargetUse = VirtualUse::create(
725           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
726       if (TargetUse.getKind() == VirtualUse::Synthesizable)
727         return FD_CanForwardLeaf;
728 
729       LLVM_DEBUG(
730           dbgs() << "    Synthesizable would not be synthesizable anymore: "
731                  << *UseVal << "\n");
732       return FD_CannotForward;
733     }
734 
735     case VirtualUse::ReadOnly:
736       // Note that we cannot return FD_CanForwardTree here. With a operand tree
737       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
738       // With -polly-analyze-read-only-scalars=true we would ensure the
739       // existence of a MemoryAccess (which already exists for a leaf) and be
740       // removed again by tryForwardTree because it's goal is to remove this
741       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
742       // to do so.
743       if (!DoIt)
744         return FD_CanForwardLeaf;
745 
746       // If we model read-only scalars, we need to create a MemoryAccess for it.
747       if (ModelReadOnlyScalars)
748         TargetStmt->ensureValueRead(UseVal);
749 
750       NumReadOnlyCopied++;
751       TotalReadOnlyCopied++;
752       return FD_DidForwardLeaf;
753 
754     case VirtualUse::Intra:
755       // Knowing that UseStmt and DefStmt are the same statement instance, just
756       // reuse the information about UseStmt for DefStmt
757       DefStmt = UseStmt;
758 
759       LLVM_FALLTHROUGH;
760     case VirtualUse::Inter:
761       Instruction *Inst = cast<Instruction>(UseVal);
762 
763       if (!DefStmt) {
764         DefStmt = S->getStmtFor(Inst);
765         if (!DefStmt)
766           return FD_CannotForward;
767       }
768 
769       DefLoop = LI->getLoopFor(Inst->getParent());
770 
771       ForwardingDecision SpeculativeResult =
772           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
773       if (SpeculativeResult != FD_NotApplicable)
774         return SpeculativeResult;
775 
776       ForwardingDecision KnownResult = forwardKnownLoad(
777           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
778       if (KnownResult != FD_NotApplicable)
779         return KnownResult;
780 
781       ForwardingDecision ReloadResult = reloadKnownContent(
782           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
783       if (ReloadResult != FD_NotApplicable)
784         return ReloadResult;
785 
786       // When no method is found to forward the operand tree, we effectively
787       // cannot handle it.
788       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
789       return FD_CannotForward;
790     }
791 
792     llvm_unreachable("Case unhandled");
793   }
794 
795   /// Try to forward an operand tree rooted in @p RA.
796   bool tryForwardTree(MemoryAccess *RA) {
797     assert(RA->isLatestScalarKind());
798     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
799 
800     ScopStmt *Stmt = RA->getStatement();
801     Loop *InLoop = Stmt->getSurroundingLoop();
802 
803     isl::map TargetToUse;
804     if (!Known.is_null()) {
805       isl::space DomSpace = Stmt->getDomainSpace();
806       TargetToUse =
807           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
808     }
809 
810     ForwardingDecision Assessment =
811         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
812     assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
813     if (Assessment != FD_CanForwardProfitably)
814       return false;
815 
816     ForwardingDecision Execution =
817         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
818     assert(((Execution == FD_DidForwardTree) ||
819             (Execution == FD_DidForwardLeaf)) &&
820            "A previous positive assessment must also be executable");
821 
822     if (Execution == FD_DidForwardTree)
823       Stmt->removeSingleMemoryAccess(RA);
824     return true;
825   }
826 
827   /// Return which SCoP this instance is processing.
828   Scop *getScop() const { return S; }
829 
830   /// Run the algorithm: Use value read accesses as operand tree roots and try
831   /// to forward them into the statement.
832   bool forwardOperandTrees() {
833     for (ScopStmt &Stmt : *S) {
834       bool StmtModified = false;
835 
836       // Because we are modifying the MemoryAccess list, collect them first to
837       // avoid iterator invalidation.
838       SmallVector<MemoryAccess *, 16> Accs;
839       for (MemoryAccess *RA : Stmt) {
840         if (!RA->isRead())
841           continue;
842         if (!RA->isLatestScalarKind())
843           continue;
844 
845         Accs.push_back(RA);
846       }
847 
848       for (MemoryAccess *RA : Accs) {
849         if (tryForwardTree(RA)) {
850           Modified = true;
851           StmtModified = true;
852           NumForwardedTrees++;
853           TotalForwardedTrees++;
854         }
855       }
856 
857       if (StmtModified) {
858         NumModifiedStmts++;
859         TotalModifiedStmts++;
860       }
861     }
862 
863     if (Modified)
864       ScopsModified++;
865     return Modified;
866   }
867 
868   /// Print the pass result, performed transformations and the SCoP after the
869   /// transformation.
870   void print(raw_ostream &OS, int Indent = 0) {
871     printStatistics(OS, Indent);
872 
873     if (!Modified) {
874       // This line can easily be checked in regression tests.
875       OS << "ForwardOpTree executed, but did not modify anything\n";
876       return;
877     }
878 
879     printStatements(OS, Indent);
880   }
881 };
882 
883 /// Pass that redirects scalar reads to array elements that are known to contain
884 /// the same value.
885 ///
886 /// This reduces the number of scalar accesses and therefore potentially
887 /// increases the freedom of the scheduler. In the ideal case, all reads of a
888 /// scalar definition are redirected (We currently do not care about removing
889 /// the write in this case).  This is also useful for the main DeLICM pass as
890 /// there are less scalars to be mapped.
891 class ForwardOpTree : public ScopPass {
892 private:
893   /// The pass implementation, also holding per-scop data.
894   std::unique_ptr<ForwardOpTreeImpl> Impl;
895 
896 public:
897   static char ID;
898 
899   explicit ForwardOpTree() : ScopPass(ID) {}
900   ForwardOpTree(const ForwardOpTree &) = delete;
901   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
902 
903   void getAnalysisUsage(AnalysisUsage &AU) const override {
904     AU.addRequiredTransitive<ScopInfoRegionPass>();
905     AU.addRequired<LoopInfoWrapperPass>();
906     AU.setPreservesAll();
907   }
908 
909   bool runOnScop(Scop &S) override {
910     // Free resources for previous SCoP's computation, if not yet done.
911     releaseMemory();
912 
913     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
914 
915     {
916       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
917       Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
918 
919       if (AnalyzeKnown) {
920         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
921         Impl->computeKnownValues();
922       }
923 
924       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
925       Impl->forwardOperandTrees();
926 
927       if (MaxOpGuard.hasQuotaExceeded()) {
928         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
929                              "max_operations exceeded\n");
930         KnownOutOfQuota++;
931       }
932     }
933 
934     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
935     LLVM_DEBUG(dbgs() << S);
936 
937     // Update statistics
938     auto ScopStats = S.getStatistics();
939     NumValueWrites += ScopStats.NumValueWrites;
940     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
941     NumPHIWrites += ScopStats.NumPHIWrites;
942     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
943     NumSingletonWrites += ScopStats.NumSingletonWrites;
944     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
945 
946     return false;
947   }
948 
949   void printScop(raw_ostream &OS, Scop &S) const override {
950     if (!Impl)
951       return;
952 
953     assert(Impl->getScop() == &S);
954     Impl->print(OS);
955   }
956 
957   void releaseMemory() override { Impl.reset(); }
958 }; // class ForwardOpTree
959 
960 char ForwardOpTree::ID;
961 } // namespace
962 
963 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
964 
965 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
966                       "Polly - Forward operand tree", false, false)
967 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
968 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
969                     "Polly - Forward operand tree", false, false)
970